diff --git a/notebooks/api-examples/1-corpus-creation.ipynb b/notebooks/api-examples/1-corpus-creation.ipynb
index e15fa89..4a57d59 100644
--- a/notebooks/api-examples/1-corpus-creation.ipynb
+++ b/notebooks/api-examples/1-corpus-creation.ipynb
@@ -57,7 +57,14 @@
"cell_type": "code",
"execution_count": 1,
"id": "6019e01a",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T21:48:26.180562Z",
+ "iopub.status.busy": "2026-05-28T21:48:26.180217Z",
+ "iopub.status.idle": "2026-05-28T21:48:26.236525Z",
+ "shell.execute_reply": "2026-05-28T21:48:26.236257Z"
+ }
+ },
"outputs": [],
"source": [
"import os\n",
@@ -94,7 +101,14 @@
"cell_type": "code",
"execution_count": 2,
"id": "40947545",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T21:48:26.237922Z",
+ "iopub.status.busy": "2026-05-28T21:48:26.237823Z",
+ "iopub.status.idle": "2026-05-28T21:48:26.777803Z",
+ "shell.execute_reply": "2026-05-28T21:48:26.776609Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -175,7 +189,14 @@
"cell_type": "code",
"execution_count": 3,
"id": "c154dd4b",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T21:48:26.782197Z",
+ "iopub.status.busy": "2026-05-28T21:48:26.781914Z",
+ "iopub.status.idle": "2026-05-28T21:48:26.901378Z",
+ "shell.execute_reply": "2026-05-28T21:48:26.900600Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -255,15 +276,28 @@
"cell_type": "code",
"execution_count": 4,
"id": "21facbac",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T21:48:26.904584Z",
+ "iopub.status.busy": "2026-05-28T21:48:26.904092Z",
+ "iopub.status.idle": "2026-05-28T21:48:28.240950Z",
+ "shell.execute_reply": "2026-05-28T21:48:28.240002Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
- "=== Your Corpora ===\n",
- "Total corpora found: 52\n",
+ "=== Your Corpora ===\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Total corpora found: 54\n",
"\n",
"\n",
"AI Research Papers\n",
diff --git a/notebooks/api-examples/9-structured-output-multi-step.ipynb b/notebooks/api-examples/10-structured-output-multi-step.ipynb
similarity index 66%
rename from notebooks/api-examples/9-structured-output-multi-step.ipynb
rename to notebooks/api-examples/10-structured-output-multi-step.ipynb
index 2ec8f54..10643f8 100644
--- a/notebooks/api-examples/9-structured-output-multi-step.ipynb
+++ b/notebooks/api-examples/10-structured-output-multi-step.ipynb
@@ -5,7 +5,7 @@
"id": "colab-badge",
"metadata": {},
"source": [
- "
"
+ "
"
]
},
{
@@ -27,7 +27,7 @@
"4. Parse structured output events and step transition events\n",
"5. Combine both features to build a classifier-router pattern\n",
"\n",
- "> **Related: see notebook 13 for sequential pipelines, conditional gates, and `reentry_step`.** This notebook covers the **classifier-router fan-out** pattern — one classifier branches to one of N terminal handlers. Notebook 13 covers the complementary **plan-then-execute pipeline** (each phase chains forward), conditional gating that skips heavy phases, and `reentry_step` for multi-turn follow-up flows. Read both for the full step-orchestration picture."
+ "> **Related: see notebook 14 for sequential pipelines, conditional gates, and `reentry_step`.** This notebook covers the **classifier-router fan-out** pattern — one classifier branches to one of N terminal handlers. Notebook 14 covers the complementary **plan-then-execute pipeline** (each phase chains forward), conditional gating that skips heavy phases, and `reentry_step` for multi-turn follow-up flows. Read both for the full step-orchestration picture."
]
},
{
@@ -64,7 +64,14 @@
"cell_type": "code",
"execution_count": 1,
"id": "setup-code",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:19:38.998074Z",
+ "iopub.status.busy": "2026-05-28T22:19:38.997797Z",
+ "iopub.status.idle": "2026-05-28T22:19:39.064276Z",
+ "shell.execute_reply": "2026-05-28T22:19:39.064036Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -99,7 +106,14 @@
"cell_type": "code",
"execution_count": 2,
"id": "93c74e89",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:19:39.065504Z",
+ "iopub.status.busy": "2026-05-28T22:19:39.065411Z",
+ "iopub.status.idle": "2026-05-28T22:19:39.067608Z",
+ "shell.execute_reply": "2026-05-28T22:19:39.067413Z"
+ }
+ },
"outputs": [],
"source": [
"# Load the shared helpers (delete_and_create_agent).\n",
@@ -123,7 +137,14 @@
"cell_type": "code",
"execution_count": 3,
"id": "helpers",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:19:39.068623Z",
+ "iopub.status.busy": "2026-05-28T22:19:39.068546Z",
+ "iopub.status.idle": "2026-05-28T22:19:39.071858Z",
+ "shell.execute_reply": "2026-05-28T22:19:39.071668Z"
+ }
+ },
"outputs": [],
"source": [
"def chat_with_agent(agent_key, session_key, message, show_events=False):\n",
@@ -197,13 +218,20 @@
"cell_type": "code",
"execution_count": 4,
"id": "create-extraction-agent",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:19:39.072890Z",
+ "iopub.status.busy": "2026-05-28T22:19:39.072812Z",
+ "iopub.status.idle": "2026-05-28T22:19:43.327158Z",
+ "shell.execute_reply": "2026-05-28T22:19:43.325930Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Created agent 'Research Entity Extractor' (key: agt_research_entity_extractor_dcee)\n"
+ "Created agent 'Research Entity Extractor' (key: agt_research_entity_extractor_0a4a)\n"
]
}
],
@@ -291,13 +319,20 @@
"cell_type": "code",
"execution_count": 5,
"id": "create-session",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:19:43.331718Z",
+ "iopub.status.busy": "2026-05-28T22:19:43.331337Z",
+ "iopub.status.idle": "2026-05-28T22:19:43.503093Z",
+ "shell.execute_reply": "2026-05-28T22:19:43.501502Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Session created: ase_extraction_demo_20260506-063044_2382\n"
+ "Session created: ase_extraction_demo_20260528-151943_055d\n"
]
}
],
@@ -321,7 +356,14 @@
"cell_type": "code",
"execution_count": 6,
"id": "test-query-1",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:19:43.507735Z",
+ "iopub.status.busy": "2026-05-28T22:19:43.507268Z",
+ "iopub.status.idle": "2026-05-28T22:19:46.628473Z",
+ "shell.execute_reply": "2026-05-28T22:19:46.627918Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -331,29 +373,26 @@
"--- Agent Events ---\n",
" Tool call: research_search\n",
" Tool response: research_search\n",
- " Structured output (research_extraction): {\"topics\": [\"RAG\", \"language model hallucinations\", \"hallucination detection in AI\"], \"techniques\": [\"MiniCheck\", \"AlignScore\", \"RAGTruth\", \"self reflection\", \"fine-grained hallucination detection\"], \n",
+ " Structured output (research_extraction): {\"topics\": [\"RAG\", \"language models\", \"hallucinations\", \"factual consistency\"], \"techniques\": [\"Retrieval-Augmented Generation (RAG)\"], \"answer\": \"RAG, or Retrieval-Augmented Generation, is a techniqu\n",
"---\n",
"\n",
"Structured Result:\n",
"{\n",
" \"topics\": [\n",
" \"RAG\",\n",
- " \"language model hallucinations\",\n",
- " \"hallucination detection in AI\"\n",
+ " \"language models\",\n",
+ " \"hallucinations\",\n",
+ " \"factual consistency\"\n",
" ],\n",
" \"techniques\": [\n",
- " \"MiniCheck\",\n",
- " \"AlignScore\",\n",
- " \"RAGTruth\",\n",
- " \"self reflection\",\n",
- " \"fine-grained hallucination detection\"\n",
+ " \"Retrieval-Augmented Generation (RAG)\"\n",
" ],\n",
- " \"answer\": \"RAG employs techniques such as MiniCheck and AlignScore to detect and reduce hallucinations in language models. Additionally, the use of retrieval-augmented generation (RAG) and benchmarks like RAGTruth helps improve accuracy by grounding generations in reliable sources.\",\n",
+ " \"answer\": \"RAG, or Retrieval-Augmented Generation, is a technique that incorporates knowledge retrieval to support generation tasks. This method helps in reducing hallucinations by ensuring that language models refer to non-parametric memory or external documents during generation tasks. The method involves retrieving relevant information before crafting a response, which can potentially enhance factual consistency in the output of language models.\",\n",
" \"confidence\": \"high\"\n",
"}\n",
"\n",
- "Topics: ['RAG', 'language model hallucinations', 'hallucination detection in AI']\n",
- "Techniques: ['MiniCheck', 'AlignScore', 'RAGTruth', 'self reflection', 'fine-grained hallucination detection']\n",
+ "Topics: ['RAG', 'language models', 'hallucinations', 'factual consistency']\n",
+ "Techniques: ['Retrieval-Augmented Generation (RAG)']\n",
"Confidence: high\n"
]
}
@@ -380,7 +419,14 @@
"cell_type": "code",
"execution_count": 7,
"id": "test-query-2",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:19:46.631044Z",
+ "iopub.status.busy": "2026-05-28T22:19:46.630826Z",
+ "iopub.status.idle": "2026-05-28T22:19:52.553215Z",
+ "shell.execute_reply": "2026-05-28T22:19:52.552134Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -390,22 +436,22 @@
"--- Agent Events ---\n",
" Tool call: research_search\n",
" Tool response: research_search\n",
- " Structured output (research_extraction): {\"topics\": [\"dense retrieval models\", \"sparse retrieval methods\", \"information retrieval\"], \"techniques\": [\"BM25\", \"dense passage retrieval\", \"word overlap-based retrieval\"], \"answer\": \"Dense retrieva\n",
+ " Structured output (research_extraction): {\"topics\": [\"dense retrieval\", \"sparse retrieval\", \"BM25\", \"open-domain question answering\"], \"techniques\": [\"dense passage retrieval\", \"BM25 framework\"], \"answer\": \"Dense retrieval models, such as th\n",
"---\n",
"\n",
"Structured Result:\n",
"{\n",
" \"topics\": [\n",
- " \"dense retrieval models\",\n",
- " \"sparse retrieval methods\",\n",
- " \"information retrieval\"\n",
+ " \"dense retrieval\",\n",
+ " \"sparse retrieval\",\n",
+ " \"BM25\",\n",
+ " \"open-domain question answering\"\n",
" ],\n",
" \"techniques\": [\n",
- " \"BM25\",\n",
" \"dense passage retrieval\",\n",
- " \"word overlap-based retrieval\"\n",
+ " \"BM25 framework\"\n",
" ],\n",
- " \"answer\": \"Dense retrieval models like dense passage retrieval are designed to capture semantic meanings and are often better suited for tasks requiring high accuracy on open-domain question answering. Sparse retrieval methods like BM25, which rely on exact word matching, may perform better in contexts that are heavily entity-centric, such as some fact extraction and verification tasks. For instance, BM25 performs well in the FEVER dataset due to its word overlap-based nature, while dense passage retrieval shows advantages in broader, semantic-based queries.\",\n",
+ " \"answer\": \"Dense retrieval models, such as those used in dense passage retrieval, typically utilize dense vector representations for matching queries and documents, which can capture semantic similarities beyond simple keyword overlap. They are often considered more effective for open-domain question answering tasks compared to sparse retrieval models like BM25, which rely primarily on exact term matching. BM25 is noted for performing well on tasks with an entity-centric focus due to its reliance on word overlap. However, dense retrieval methods have demonstrated significant improvements in tasks requiring deeper semantic understanding and broader contextual integration.\",\n",
" \"confidence\": \"high\"\n",
"}\n"
]
@@ -448,13 +494,20 @@
"cell_type": "code",
"execution_count": 8,
"id": "create-multistep-agent",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:19:52.557980Z",
+ "iopub.status.busy": "2026-05-28T22:19:52.557641Z",
+ "iopub.status.idle": "2026-05-28T22:19:56.181701Z",
+ "shell.execute_reply": "2026-05-28T22:19:56.180192Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Created agent 'Research Assistant Router' (key: agt_research_assistant_router_116c)\n"
+ "Created agent 'Research Assistant Router' (key: agt_research_assistant_router_b292)\n"
]
}
],
@@ -607,7 +660,14 @@
"cell_type": "code",
"execution_count": 9,
"id": "test-research",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:19:56.186365Z",
+ "iopub.status.busy": "2026-05-28T22:19:56.185992Z",
+ "iopub.status.idle": "2026-05-28T22:20:09.743558Z",
+ "shell.execute_reply": "2026-05-28T22:20:09.743089Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -617,38 +677,38 @@
"TEST 1: Research query (expect: classifier -> research_handler)\n",
"============================================================\n",
"User: What are the key innovations in the original transformer architecture's attention mechanism?\n",
- "\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
"\n",
"--- Agent Events ---\n",
- " Structured output (intent_classification): {\"intent\": \"research\", \"reasoning\": \"The question focuses on understanding the theoretical concepts and innovations behind the attention mechanism in the original transformer architecture, which falls\n",
+ " Structured output (intent_classification): {\"intent\": \"research\", \"reasoning\": \"The question asks about the key innovations in the attention mechanism of the original transformer architecture, which involves understanding theoretical concepts \n",
" Step transition: classifier -> research_handler\n",
" Tool call: research_search\n",
" Tool response: research_search\n",
- " Agent output: The original Transformer architecture, as introduced in the seminal paper \"Attention Is All You Need\" by Vaswani et al. (2017), incorporated several key innovations in its attention mechanism that sig...\n",
+ " Agent output: The original transformer architecture, introduced by Vaswani et al. in the seminal paper \"Attention Is All You Need,\" presents several key innovations in its attention mechanism that distinguish it fr...\n",
"---\n",
"\n",
"Response:\n",
- "The original Transformer architecture, as introduced in the seminal paper \"Attention Is All You Need\" by Vaswani et al. (2017), incorporated several key innovations in its attention mechanism that significantly advanced the field of machine learning and natural language processing. Here are the primary innovations:\n",
+ "The original transformer architecture, introduced by Vaswani et al. in the seminal paper \"Attention Is All You Need,\" presents several key innovations in its attention mechanism that distinguish it from previous approaches. Here are the major contributions:\n",
"\n",
- "1. **Self-Attention Mechanism**: \n",
- " - The Transformer architecture introduced the self-attention mechanism, which allows each position in the input sequence to focus on all other positions. This mechanism is crucial because it enables the model to effectively capture dependencies between elements in a sequence, regardless of their distance from each other. This contrasts sharply with the limitations of recurrent neural networks (RNNs), which struggle with long-range dependencies due to their sequential nature.\n",
+ "1. **Self-Attention Mechanism**: The transformer uses self-attention (also known as intra-attention) to compute representations of input and output sequences. This mechanism allows the model to consider different positions within a single sequence, calculating relationships among them to derive a contextual understanding. Self-attention enables the model to capture long-range dependencies without relying on the localized convolution or recurrent operations typically used in earlier architectures like RNNs or LSTMs.\n",
"\n",
- "2. **Multi-Head Attention**:\n",
- " - The innovation of multi-head attention allows the model to jointly attend to information from different representation subspaces at different positions. In practice, this means that the Transformer can apply multiple sets of query, key, and value projections and concatenate the results. This enhances the model's ability to focus on different features of the input sequence simultaneously, significantly improving performance over single-head attention mechanisms.\n",
+ "2. **Scalability and Parallelization**: By relying entirely on attention mechanisms, the transformer architecture dispenses with recurrence and convolutions. This design choice allows the model to be parallelized and scaled more efficiently, as dependencies are not sequential, unlike in RNNs. Consequently, data can be processed faster, leading to improvements in training times and scalability on larger datasets.\n",
"\n",
- "3. **Positional Encoding**:\n",
- " - Since the Transformer eschews the sequential processing typical of RNNs, it lacks inherent sequence order information. To address this, Vaswani et al. introduced positional encoding, adding a vector to each input embedding to provide information about the position of the word in the sequence. This addition allows the model to maintain the ability to understand the sequence order.\n",
+ "3. **Multi-Head Attention**: This innovation involves running several self-attention layers in parallel, known as heads. Each head processes a different linear projection of the same input, focusing on various parts of the data and allowing the model to learn features across multiple representation subspaces. The outputs of these parallel attention layers are concatenated and linearly transformed, providing richer and more nuanced representations.\n",
"\n",
- "4. **Parallelization**:\n",
- " - Unlike RNNs, which require processing inputs sequentially, the use of self-attention mechanisms allows for the parallelization of operations. This results in a significant speed-up in both training and inference times, making the Transformer architecture particularly suited for handling large datasets and complex tasks efficiently.\n",
+ "4. **Positional Embeddings**: Since the self-attention mechanism treats the input sequence as a bag of words, it loses the sequence ordering information. To address this, transformers introduce positional encodings, which utilize sinusoidal functions added to input embeddings to preserve the order of tokens in a sequence. This inclusion allows the model to learn and make predictions based on word order.\n",
"\n",
- "5. **Simplified Architecture**:\n",
- " - The architecture of the Transformer dispenses with both recurrence and convolution entirely, relying solely on attention. This simplification leads to fewer parameters than RNNs and Convolutional Neural Networks (CNNs) for similar problem sizes, making it not only powerful but also computationally efficient.\n",
+ "5. **Layer Normalization and Residual Connections**: Each layer of the transformer includes residual connections around the self-attention module and the subsequent feed-forward sub-layers, coupled with layer normalization. These techniques help stabilize the training process, improve convergence rates, and allow the model to achieve deeper architectures without suffering from vanishing gradient issues.\n",
"\n",
- "These innovations collectively enabled the Transformer to surpass previous architectures in terms of parallelizability and effectiveness, particularly for tasks involving language understanding and generation. The model's reliance on attention mechanisms rather than recurrence or convolution was a fundamental shift that has since influenced countless subsequent works in deep learning and AI.\n",
+ "These innovations collectively contribute to the transformer's ability to model complex dependencies and achieve superior performance on various tasks in natural language processing (NLP). The attention mechanism is central to the architecture's success, enabling highly effective modeling of sequences in parallel while maintaining the ability to capture both short and long-range dependencies. \n",
"\n",
- "Referenced Papers:\n",
- "- Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A. N., Kaiser, L., & Polosukhin, I. (2017). Attention Is All You Need. *Advances in Neural Information Processing Systems*, 30, 5998-6008. [Link to Paper](https://arxiv.org/abs/1706.03762)\n"
+ "For further details, refer to the original paper by Vaswani et al. (2017): [\"Attention Is All You Need\"](https://arxiv.org/abs/1706.03762).\n"
]
}
],
@@ -678,7 +738,14 @@
"cell_type": "code",
"execution_count": 10,
"id": "test-implementation",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:20:09.746975Z",
+ "iopub.status.busy": "2026-05-28T22:20:09.746735Z",
+ "iopub.status.idle": "2026-05-28T22:20:18.330507Z",
+ "shell.execute_reply": "2026-05-28T22:20:18.329701Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -688,56 +755,48 @@
"TEST 2: Implementation query (expect: classifier -> implementation_handler)\n",
"============================================================\n",
"User: How do I configure hybrid search with reranking in Vectara's API?\n",
- "\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
"\n",
"--- Agent Events ---\n",
- " Structured output (intent_classification): {\"intent\": \"implementation\", \"reasoning\": \"The question is asking for specific guidance on setting up a feature (hybrid search with reranking) using Vectara's API, indicating a need for practical impl\n",
+ " Structured output (intent_classification): {\"intent\": \"implementation\", \"reasoning\": \"The question is asking about configuring a specific feature of Vectara's API, which involves setup and usage specifics, characteristic of an implementation q\n",
" Step transition: classifier -> implementation_handler\n",
" Tool call: docs_search\n",
" Tool response: docs_search\n",
- " Agent output: To configure hybrid search with reranking in Vectara's API, you can follow these steps:\n",
- "\n",
- "1. **Hybrid Search Configuration:**\n",
- " - When performing a query, adjust the `lexical_interpolation` value to c...\n",
+ " Agent output: To configure hybrid search with reranking in Vectara's API, you'll need to leverage Vectara’s capability to perform both lexical (keyword-based) and dense (semantic) retrieval along with reranking mec...\n",
"---\n",
"\n",
"Response:\n",
- "To configure hybrid search with reranking in Vectara's API, you can follow these steps:\n",
+ "To configure hybrid search with reranking in Vectara's API, you'll need to leverage Vectara’s capability to perform both lexical (keyword-based) and dense (semantic) retrieval along with reranking mechanisms. Here’s a step-by-step guide to help you set it up:\n",
+ "\n",
+ "### Step-by-Step Setup\n",
"\n",
- "1. **Hybrid Search Configuration:**\n",
- " - When performing a query, adjust the `lexical_interpolation` value to configure hybrid search. This parameter balances the influence of neural search and lexical (keyword-based) search.\n",
- " - Set `lexical_interpolation` between `0.0` (purely neural) and `1.0` (purely lexical). Optimal results are often found between `0.01` and `0.1`.\n",
+ "1. **Understanding Hybrid Search**: Hybrid search in Vectara combines BM25 (a lexical search algorithm) and dense vector retrieval. This blend allows you to benefit from both keyword search accuracy and semantic understanding.\n",
"\n",
- " Example API Call for Hybrid Search:\n",
- " ```json\n",
- " {\n",
- " \"query\": {\n",
- " \"text\": \"your search query\",\n",
- " \"lexical_interpolation\": 0.05\n",
- " }\n",
- " }\n",
- " ```\n",
+ "2. **Enabling Hybrid Search**:\n",
+ " - Use the `Lambda` option to balance between neural search and keyword-based search. This allows you to configure the level of influence each component should have in the search process.\n",
"\n",
- "2. **Enabling Reranking:**\n",
- " - Use Maximal Marginal Relevance (MMR) reranking to enhance result diversity and relevance. This reduces redundancy while ensuring high-quality results.\n",
- " - It is essential to ensure your API calls incorporate reranking methods such as MMR if supported.\n",
+ "3. **Reranking Configuration**:\n",
+ " - You can use Maximal Marginal Relevance (MMR) reranking to enhance the diversity of search results. This algorithm helps reduce redundancy while maintaining high relevance.\n",
+ " - Implement reranking by configuring your search queries to include a reranker, improving the quality of the returned results by reordering them based on additional criteria beyond the initial retrieval phase.\n",
"\n",
- " Example snippet for enabling MMR reranking (pseudo-code example):\n",
- " ```json\n",
- " {\n",
- " \"query\": {\n",
- " \"text\": \"your search query\",\n",
- " \"reranking\": {\n",
- " \"enabled\": true,\n",
- " \"type\": \"MMR\"\n",
- " }\n",
- " }\n",
- " }\n",
- " ```\n",
+ "### Example API Usage\n",
"\n",
- "Refer to the Vectara documentation for detailed instructions on enabling and configuring reranking within your API calls. Additionally, you can explore their [Configure Queries Guide](https://docs.vectara.com/docs/console-ui/configure-queries) for more in-depth examples and best practices.\n",
+ "When making a search API request, you might need to specify parameters or a configuration that enables both hybrid search and reranking. An example API call is not provided in the search results, but you should look for points in the API where these settings can be applied.\n",
"\n",
- "These configuration settings allow you to effectively utilize Vectara’s capabilities for combining neural and lexical search techniques with reranking methodologies, enhancing the quality and relevance of search results.\n"
+ "### Documentation and Resources\n",
+ "\n",
+ "For more details on configuring queries and reranking in Vectara, you may refer to the following resources:\n",
+ "\n",
+ "- [Configure Queries](https://docs.vectara.com/docs/console-ui/configure-queries)\n",
+ "- [Python SDK Reranker Guide](https://docs.vectara.com/docs/sdk/python/rerankers)\n",
+ "\n",
+ "This setup should enable you to fine-tune your search capabilities using Vectara's APIs for optimal results. If you need more specific code examples or request formats, consulting the API documentation would be highly beneficial.\n"
]
}
],
@@ -767,7 +826,14 @@
"cell_type": "code",
"execution_count": 11,
"id": "test-comparison",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:20:18.336393Z",
+ "iopub.status.busy": "2026-05-28T22:20:18.335973Z",
+ "iopub.status.idle": "2026-05-28T22:20:28.391021Z",
+ "shell.execute_reply": "2026-05-28T22:20:28.390246Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -777,56 +843,39 @@
"TEST 3: Comparison query (expect: classifier -> comparison_handler)\n",
"============================================================\n",
"User: How does semantic search compare to keyword-based search for finding relevant documents?\n",
- "\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
"\n",
"--- Agent Events ---\n",
- " Structured output (intent_classification): {\"intent\": \"comparison\", \"reasoning\": \"The user's question explicitly asks for a comparison between semantic search and keyword-based search in terms of their effectiveness for finding relevant docume\n",
+ " Structured output (intent_classification): {\"intent\": \"comparison\", \"reasoning\": \"The user's question is directly asking for a comparison between semantic search and keyword-based search in terms of their effectiveness in finding relevant docu\n",
" Step transition: classifier -> comparison_handler\n",
- " Tool call: docs_search\n",
" Tool call: research_search\n",
- " Tool response: docs_search\n",
+ " Tool call: docs_search\n",
" Tool response: research_search\n",
- " Agent output: Semantic search and keyword-based search represent two different approaches to document retrieval, each with its own strengths and limitations. Here's a comparative analysis based on both academic res...\n",
+ " Tool response: docs_search\n",
+ " Agent output: Semantic search and keyword-based search are two approaches used for retrieving relevant documents, each with its strengths and limitations. Here's a comparison based on both research literature and p...\n",
"---\n",
"\n",
"Response:\n",
- "Semantic search and keyword-based search represent two different approaches to document retrieval, each with its own strengths and limitations. Here's a comparative analysis based on both academic research and practical documentation:\n",
- "\n",
- "### Keyword-Based Search\n",
- "\n",
- "**Methodology:**\n",
- "Keyword-based search relies on exact matches between the search query and the words contained in documents. It is highly dependent on the presence of specific words and phrases.\n",
- "\n",
- "**Advantages:**\n",
- "- **Simplicity:** The search mechanism is easy to implement and understand. \n",
- "- **Efficiency for Specific Queries:** Works well for queries with specific terms and less need for contextual understanding.\n",
- "\n",
- "**Limitations:**\n",
- "- **Limited Contextual Understanding:** It may not capture the meaning if synonyms or related terms are used.\n",
- "- **Sensitivity to Word Choice:** Variations in phrasing can lead to missed results.\n",
- "\n",
- "According to research papers, keyword-based search can be restrictive, as it hinges on middle-layer translations between machine and linguistic tasks without fully understanding the meaning ([ZLL+18]). \n",
+ "Semantic search and keyword-based search are two approaches used for retrieving relevant documents, each with its strengths and limitations. Here's a comparison based on both research literature and practical documentation:\n",
"\n",
"### Semantic Search\n",
+ "- **Understanding Context**: Semantic search leverages natural language processing and machine learning to understand the context and intent behind a query. This allows it to retrieve documents that are contextually relevant, even if they do not contain the exact keywords used in the query. For example, semantic search can understand synonyms and related terms ([Vectara Docs](https://docs.vectara.com/docs/sdk/python/query)).\n",
+ "- **Performance on Semantics**: Studies using models like BERT and SimCSE-BERT demonstrate that semantic search can significantly outperform traditional keyword search in tasks involving semantic textual similarity and natural language understanding ([BEIR Benchmark](https://arxiv.org/abs/2104.08663)).\n",
+ "- **Hybrid Models**: Tools like those from Vectara offer configurations to balance between pure semantic and keyword-based search using parameters like Lambda, allowing a mix of both approaches for improved accuracy ([Vectara Docs on Hybrid Search](https://docs.vectara.com/docs/console-ui/configure-queries)).\n",
"\n",
- "**Methodology:**\n",
- "Semantic search measures the contextual meaning of words, rather than just matching spelling. It uses machine learning models and Natural Language Processing (NLP) to determine the relationship between words and their context.\n",
- "\n",
- "**Advantages:**\n",
- "- **Contextual Accuracy:** Accurately interprets the meaning of queries and content, often yielding more relevant results.\n",
- "- **Robust to Synonyms:** Handles variations in language and can understand synonyms and related concepts.\n",
- "\n",
- "**Limitations:**\n",
- "- **Complexity:** Requires sophisticated algorithms and significant computational power.\n",
- "- **Implementation Cost:** Generally more resource-intensive to develop and maintain.\n",
- "\n",
- "Research indicates that semantic search can combine neural language models to improve understanding of user intent ([VSP⁺17]). This allows retrieval systems to fetch more semantically relevant documents, thus improving result quality ([SS20]).\n",
- "\n",
- "### Hybrid Approaches\n",
- "Some platforms implement a hybrid approach, achieving balance between keyword precision and semantic understanding. For instance, Vectara offers a hybrid search model where lexical (keyword-based) and semantic searches are mixed to suit different use cases ([Advanced Single Corpus Query](https://docs.vectara.com/docs/rest-api/query-corpus)).\n",
+ "### Keyword-Based Search\n",
+ "- **Exact Matches**: This method focuses on matching search queries with exact keywords in documents. It is efficient for straightforward lookups where precision in matching specific terms is paramount ([Vectara Docs](https://docs.vectara.com/docs/build/data-ingestion)).\n",
+ "- **Simplicity and Efficiency**: Keyword-based search is simpler and often faster, as it doesn't require the computational complexity involved in understanding natural language. This makes it suitable for environments with limited computational resources or databases where precision is more critical than context.\n",
+ "- **Limitations on Understanding**: Unlike semantic search, keyword-based search lacks the capability to understand the underlying intent or context behind a query, making it less effective for complex queries or those that require context comprehension ([Vectara Advanced Query](https://docs.vectara.com/docs/rest-api/query-corpus)).\n",
"\n",
"### Conclusion\n",
- "Semantic search provides enhanced capabilities over keyword-based search by understanding context and meaning, leading to more accurate results. However, keyword-based search remains relevant for contexts where precise term matching is crucial. A hybrid approach may serve scenarios where both precision and context are required. As research in NLP and machine learning progresses, the benefits of semantic search will continue to grow, making it an increasingly critical tool for information retrieval.\n"
+ "In summary, semantic search provides a more sophisticated approach to understanding and retrieving contextually relevant documents, outperforming keyword-based search in scenarios requiring nuanced interpretation. However, keyword-based search remains valuable for its speed and simplicity in scenarios where exact term matching is sufficient. Implementations combining both methods can offer the benefits of both, allowing users to adjust the balance depending on specific retrieval needs, as seen with Vectara's hybrid search capabilities.\n"
]
}
],
@@ -866,13 +915,26 @@
"cell_type": "code",
"execution_count": 12,
"id": "cleanup-code",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:20:28.396071Z",
+ "iopub.status.busy": "2026-05-28T22:20:28.395653Z",
+ "iopub.status.idle": "2026-05-28T22:20:32.214892Z",
+ "shell.execute_reply": "2026-05-28T22:20:32.213337Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Deleted agent: Research Entity Extractor\n",
+ "Deleted agent: Research Entity Extractor\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
"Deleted agent: Research Assistant Router\n"
]
}
diff --git a/notebooks/api-examples/10-agent-schedules.ipynb b/notebooks/api-examples/11-agent-schedules.ipynb
similarity index 86%
rename from notebooks/api-examples/10-agent-schedules.ipynb
rename to notebooks/api-examples/11-agent-schedules.ipynb
index 4278d4b..47dda88 100644
--- a/notebooks/api-examples/10-agent-schedules.ipynb
+++ b/notebooks/api-examples/11-agent-schedules.ipynb
@@ -5,7 +5,7 @@
"id": "colab-badge",
"metadata": {},
"source": [
- "
"
+ "
"
]
},
{
@@ -63,7 +63,14 @@
"cell_type": "code",
"execution_count": 1,
"id": "setup",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:20:35.231817Z",
+ "iopub.status.busy": "2026-05-28T22:20:35.231316Z",
+ "iopub.status.idle": "2026-05-28T22:20:35.294881Z",
+ "shell.execute_reply": "2026-05-28T22:20:35.294600Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -100,7 +107,14 @@
"cell_type": "code",
"execution_count": 2,
"id": "1903363f",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:20:35.296271Z",
+ "iopub.status.busy": "2026-05-28T22:20:35.296154Z",
+ "iopub.status.idle": "2026-05-28T22:20:35.298720Z",
+ "shell.execute_reply": "2026-05-28T22:20:35.298496Z"
+ }
+ },
"outputs": [],
"source": [
"# Load the shared helpers (delete_and_create_agent).\n",
@@ -136,13 +150,20 @@
"cell_type": "code",
"execution_count": 3,
"id": "step1-code",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:20:35.299979Z",
+ "iopub.status.busy": "2026-05-28T22:20:35.299880Z",
+ "iopub.status.idle": "2026-05-28T22:20:39.177117Z",
+ "shell.execute_reply": "2026-05-28T22:20:39.176088Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Created agent 'Research Digest Generator' (key: agt_research_digest_generator_1d99)\n"
+ "Created agent 'Research Digest Generator' (key: agt_research_digest_generator_81da)\n"
]
}
],
@@ -213,14 +234,21 @@
"cell_type": "code",
"execution_count": 4,
"id": "step2-code",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:20:39.181600Z",
+ "iopub.status.busy": "2026-05-28T22:20:39.181343Z",
+ "iopub.status.idle": "2026-05-28T22:20:39.625339Z",
+ "shell.execute_reply": "2026-05-28T22:20:39.624235Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Schedule created: Daily Research Digest\n",
- " Key: asc_daily_research_digest_3d4e\n",
+ " Key: asc_daily_research_digest_a99e\n",
" Cron: 0 9 * * 1-5\n",
" Enabled: False\n",
" Max executions to keep: 20\n"
@@ -285,14 +313,21 @@
"cell_type": "code",
"execution_count": 5,
"id": "step3-code",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:20:39.631171Z",
+ "iopub.status.busy": "2026-05-28T22:20:39.630454Z",
+ "iopub.status.idle": "2026-05-28T22:20:39.822538Z",
+ "shell.execute_reply": "2026-05-28T22:20:39.821077Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Schedule created: Periodic Research Check\n",
- " Key: asc_periodic_research_check_6e65\n",
+ " Key: asc_periodic_research_check_c53e\n",
" Interval: PT6H\n",
" Enabled: False\n"
]
@@ -353,7 +388,14 @@
"cell_type": "code",
"execution_count": 6,
"id": "step4-code",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:20:39.826923Z",
+ "iopub.status.busy": "2026-05-28T22:20:39.826429Z",
+ "iopub.status.idle": "2026-05-28T22:20:42.315225Z",
+ "shell.execute_reply": "2026-05-28T22:20:42.314553Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -361,14 +403,14 @@
"text": [
"Found 2 schedule(s):\n",
"\n",
- " asc_periodic_research_check_6e65\n",
- " Key: asc_periodic_research_check_6e65\n",
+ " asc_periodic_research_check_c53e\n",
+ " Key: asc_periodic_research_check_c53e\n",
" Type: interval (PT6H)\n",
" Enabled: False\n",
" Last execution: Never\n",
"\n",
- " asc_daily_research_digest_3d4e\n",
- " Key: asc_daily_research_digest_3d4e\n",
+ " asc_daily_research_digest_a99e\n",
+ " Key: asc_daily_research_digest_a99e\n",
" Type: interval (PT1H)\n",
" Enabled: False\n",
" Last execution: Never\n",
@@ -435,14 +477,21 @@
"cell_type": "code",
"execution_count": 7,
"id": "step5-code",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:20:42.318699Z",
+ "iopub.status.busy": "2026-05-28T22:20:42.318258Z",
+ "iopub.status.idle": "2026-05-28T22:20:42.616398Z",
+ "shell.execute_reply": "2026-05-28T22:20:42.615487Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Schedule updated: asc_daily_research_digest_3d4e\n",
- " Key: asc_daily_research_digest_3d4e\n",
+ "Schedule updated: asc_daily_research_digest_a99e\n",
+ " Key: asc_daily_research_digest_a99e\n",
" Enabled: True\n",
" New cron: 0 * * * *\n",
" Description: Generates a research digest every hour (demo)\n"
@@ -492,7 +541,14 @@
"cell_type": "code",
"execution_count": 8,
"id": "step6-code",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:20:42.621687Z",
+ "iopub.status.busy": "2026-05-28T22:20:42.621087Z",
+ "iopub.status.idle": "2026-05-28T22:20:42.768396Z",
+ "shell.execute_reply": "2026-05-28T22:20:42.766666Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -547,7 +603,14 @@
"cell_type": "code",
"execution_count": 9,
"id": "step7-code",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:20:42.773830Z",
+ "iopub.status.busy": "2026-05-28T22:20:42.773239Z",
+ "iopub.status.idle": "2026-05-28T22:20:42.780942Z",
+ "shell.execute_reply": "2026-05-28T22:20:42.780091Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -605,14 +668,33 @@
"cell_type": "code",
"execution_count": 10,
"id": "cleanup-code",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:20:42.783817Z",
+ "iopub.status.busy": "2026-05-28T22:20:42.783555Z",
+ "iopub.status.idle": "2026-05-28T22:20:44.965926Z",
+ "shell.execute_reply": "2026-05-28T22:20:44.964388Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Deleted schedule: Daily Research Digest\n",
- "Deleted schedule: Periodic Research Check\n",
+ "Deleted schedule: Daily Research Digest\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Deleted schedule: Periodic Research Check\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
"Deleted agent: Research Digest Generator\n"
]
}
diff --git a/notebooks/api-examples/11-web-get-tool.ipynb b/notebooks/api-examples/12-web-get-tool.ipynb
similarity index 86%
rename from notebooks/api-examples/11-web-get-tool.ipynb
rename to notebooks/api-examples/12-web-get-tool.ipynb
index 86bd584..183ff4a 100644
--- a/notebooks/api-examples/11-web-get-tool.ipynb
+++ b/notebooks/api-examples/12-web-get-tool.ipynb
@@ -4,7 +4,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "
"
+ "
"
]
},
{
@@ -81,7 +81,14 @@
{
"cell_type": "code",
"execution_count": 1,
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:20:47.275780Z",
+ "iopub.status.busy": "2026-05-28T22:20:47.275409Z",
+ "iopub.status.idle": "2026-05-28T22:20:47.337716Z",
+ "shell.execute_reply": "2026-05-28T22:20:47.337449Z"
+ }
+ },
"outputs": [],
"source": [
"import os\n",
@@ -103,7 +110,14 @@
{
"cell_type": "code",
"execution_count": 2,
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:20:47.339153Z",
+ "iopub.status.busy": "2026-05-28T22:20:47.339066Z",
+ "iopub.status.idle": "2026-05-28T22:20:47.341344Z",
+ "shell.execute_reply": "2026-05-28T22:20:47.341105Z"
+ }
+ },
"outputs": [],
"source": [
"# Load the shared helpers (delete_and_create_agent).\n",
@@ -142,13 +156,20 @@
{
"cell_type": "code",
"execution_count": 3,
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:20:47.342513Z",
+ "iopub.status.busy": "2026-05-28T22:20:47.342431Z",
+ "iopub.status.idle": "2026-05-28T22:20:51.421274Z",
+ "shell.execute_reply": "2026-05-28T22:20:51.420013Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Created agent 'Weather Assistant' (key: agt_weather_assistant_c73f)\n"
+ "Created agent 'Weather Assistant' (key: agt_weather_assistant_8fdf)\n"
]
}
],
@@ -210,13 +231,20 @@
{
"cell_type": "code",
"execution_count": 4,
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:20:51.456299Z",
+ "iopub.status.busy": "2026-05-28T22:20:51.456094Z",
+ "iopub.status.idle": "2026-05-28T22:20:51.616708Z",
+ "shell.execute_reply": "2026-05-28T22:20:51.614791Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Session Created: ase_weather_demo_20260506-100555_ec6c\n"
+ "Session Created: ase_weather_demo_20260528-152051_74c6\n"
]
}
],
@@ -252,7 +280,14 @@
{
"cell_type": "code",
"execution_count": 5,
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:20:51.622467Z",
+ "iopub.status.busy": "2026-05-28T22:20:51.621787Z",
+ "iopub.status.idle": "2026-05-28T22:20:51.633216Z",
+ "shell.execute_reply": "2026-05-28T22:20:51.632620Z"
+ }
+ },
"outputs": [],
"source": [
"def ask_weather(agent_key, session_key, question, show_events=True, body_preview_chars=300, return_events=False):\n",
@@ -321,23 +356,36 @@
{
"cell_type": "code",
"execution_count": 6,
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:20:51.635951Z",
+ "iopub.status.busy": "2026-05-28T22:20:51.635622Z",
+ "iopub.status.idle": "2026-05-28T22:21:00.130756Z",
+ "shell.execute_reply": "2026-05-28T22:21:00.130225Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"User: What's the weather in Tokyo right now?\n",
- "================================================================================\n",
+ "================================================================================\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
"\n",
"------ Agent Events ------\n",
"[web_get] GET https://geocoding-api.open-meteo.com/v1/search?name=Tokyo&count=1&language=en&format=json\n",
- " -> [web_get] HTTP 200, body: {\"results\":[{\"id\":1850147,\"name\":\"Tokyo\",\"latitude\":35.6895,\"longitude\":139.69171,\"elevation\":44.0,\"feature_code\":\"PPLC\",\"country_code\":\"JP\",\"admin1_id\":1850144,\"timezone\":\"Asia/Tokyo\",\"population\":9733276,\"country_id\":1861060,\"country\":\"Japan\",\"admin1\":\"Tokyo\"}],\"generationtime_ms\":0.6380081}\n",
+ " -> [web_get] HTTP 200, body: {\"results\":[{\"id\":1850147,\"name\":\"Tokyo\",\"latitude\":35.6895,\"longitude\":139.69171,\"elevation\":44.0,\"feature_code\":\"PPLC\",\"country_code\":\"JP\",\"admin1_id\":1850144,\"timezone\":\"Asia/Tokyo\",\"population\":9733276,\"country_id\":1861060,\"country\":\"Japan\",\"admin1\":\"Tokyo\"}],\"generationtime_ms\":0.7586479}\n",
"[web_get] GET https://api.open-meteo.com/v1/forecast?latitude=35.6895&longitude=139.69171¤t=temperature_2m,weather_code,wind_speed_10m&temperature_unit=celsius&wind_speed_unit=kmh\n",
- " -> [web_get] HTTP 200, body: {\"latitude\":35.7,\"longitude\":139.6875,\"generationtime_ms\":0.10943412780761719,\"utc_offset_seconds\":0,\"timezone\":\"GMT\",\"timezone_abbreviation\":\"GMT\",\"elevation\":40.0,\"current_units\":{\"time\":\"iso8601\",\"interval\":\"seconds\",\"temperature_2m\":\"°C\",\"weather_code\":\"wmo code\",\"wind_speed_10m\":\"km/h\"},\"curren...\n",
+ " -> [web_get] HTTP 200, body: {\"latitude\":35.7,\"longitude\":139.6875,\"generationtime_ms\":0.10097026824951172,\"utc_offset_seconds\":0,\"timezone\":\"GMT\",\"timezone_abbreviation\":\"GMT\",\"elevation\":40.0,\"current_units\":{\"time\":\"iso8601\",\"interval\":\"seconds\",\"temperature_2m\":\"°C\",\"weather_code\":\"wmo code\",\"wind_speed_10m\":\"km/h\"},\"curren...\n",
"--------------------------\n",
"\n",
- "Agent: The current temperature in Tokyo is 15.9°C with a gentle breeze blowing at 3.1 km/h. The weather is cloudy right now.\n"
+ "Agent: Right now in Tokyo, the temperature is 23.4°C with a gentle wind speed of 6.6 km/h. The weather is generally clear.\n"
]
}
],
@@ -381,14 +429,27 @@
{
"cell_type": "code",
"execution_count": 7,
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:21:00.133610Z",
+ "iopub.status.busy": "2026-05-28T22:21:00.133397Z",
+ "iopub.status.idle": "2026-05-28T22:21:05.682169Z",
+ "shell.execute_reply": "2026-05-28T22:21:05.681265Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Deleted existing agent 'Weather Assistant' (agt_weather_assistant_c73f)\n",
- "Created agent 'Weather Assistant' (key: agt_weather_assistant_c409)\n"
+ "Deleted existing agent 'Weather Assistant' (agt_weather_assistant_8fdf)\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Created agent 'Weather Assistant' (key: agt_weather_assistant_00ae)\n"
]
}
],
@@ -417,16 +478,29 @@
{
"cell_type": "code",
"execution_count": 8,
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:21:05.688111Z",
+ "iopub.status.busy": "2026-05-28T22:21:05.687426Z",
+ "iopub.status.idle": "2026-05-28T22:21:18.423958Z",
+ "shell.execute_reply": "2026-05-28T22:21:18.423378Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Session Created: ase_weather_demo_locked_20260506-100608_9f2e\n",
+ "Session Created: ase_weather_demo_locked_20260528-152105_7dfb\n",
"\n",
"User: What's the weather in Reykjavik right now?\n",
- "================================================================================\n",
+ "================================================================================\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
"\n",
"------ Agent Events ------\n",
"[web_get] GET https://geocoding-api.open-meteo.com/v1/search?name=Reykjavik&count=1&language=en&format=json\n",
@@ -434,10 +508,10 @@
" -> [web_get] HTTP 200, body: {\"results\":[{\"id\":3413829,\"name\":\"Reykjavik\",\"latitude\":64.13548,\"longitude\":-21.89541,\"elevation\":37.0,\"feature_code\":\"PPLC\",\"country_code\":\"IS\",\"admin1_id\":3426182,\"admin2_id\":3413831,\"timezone\":\"Atlantic/Reykjavik\",\"population\":118918,\"postcodes\":[\"101\",\"103\",\"104\",\"105\",\"107\",\"108\",\"109\",\"110\",\"...\n",
"[web_get] GET https://api.open-meteo.com/v1/forecast?latitude=64.13548&longitude=-21.89541¤t=temperature_2m,weather_code,wind_speed_10m&temperature_unit=celsius&wind_speed_unit=kmh\n",
" headers: {'User-Agent': 'vectara-tutorial/1.0'}\n",
- " -> [web_get] HTTP 200, body: {\"latitude\":64.12922,\"longitude\":-21.883698,\"generationtime_ms\":0.11265277862548828,\"utc_offset_seconds\":0,\"timezone\":\"GMT\",\"timezone_abbreviation\":\"GMT\",\"elevation\":37.0,\"current_units\":{\"time\":\"iso8601\",\"interval\":\"seconds\",\"temperature_2m\":\"°C\",\"weather_code\":\"wmo code\",\"wind_speed_10m\":\"km/h\"},\"...\n",
+ " -> [web_get] HTTP 200, body: {\"latitude\":64.12922,\"longitude\":-21.883698,\"generationtime_ms\":0.09715557098388672,\"utc_offset_seconds\":0,\"timezone\":\"GMT\",\"timezone_abbreviation\":\"GMT\",\"elevation\":37.0,\"current_units\":{\"time\":\"iso8601\",\"interval\":\"seconds\",\"temperature_2m\":\"°C\",\"weather_code\":\"wmo code\",\"wind_speed_10m\":\"km/h\"},\"...\n",
"--------------------------\n",
"\n",
- "Agent: Right now in Reykjavik, it's 6.4°C with a wind speed of 20.2 km/h. The weather is clear, which corresponds to the weather code for sunny conditions.\n"
+ "Agent: In Reykjavik right now, the temperature is 8.1°C with a gentle breeze blowing at 4.3 km/h. The weather is partly cloudy.\n"
]
}
],
@@ -497,14 +571,27 @@
{
"cell_type": "code",
"execution_count": 9,
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:21:18.428283Z",
+ "iopub.status.busy": "2026-05-28T22:21:18.427794Z",
+ "iopub.status.idle": "2026-05-28T22:21:24.393909Z",
+ "shell.execute_reply": "2026-05-28T22:21:24.392818Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Deleted existing agent 'Weather Assistant' (agt_weather_assistant_c409)\n",
- "Created agent 'Weather Assistant' (key: agt_weather_assistant_7065)\n"
+ "Deleted existing agent 'Weather Assistant' (agt_weather_assistant_00ae)\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Created agent 'Weather Assistant' (key: agt_weather_assistant_1e95)\n"
]
}
],
@@ -582,16 +669,29 @@
{
"cell_type": "code",
"execution_count": 10,
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:21:24.399148Z",
+ "iopub.status.busy": "2026-05-28T22:21:24.398297Z",
+ "iopub.status.idle": "2026-05-28T22:21:31.523275Z",
+ "shell.execute_reply": "2026-05-28T22:21:31.522308Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Session Created: ase_weather_demo_specialized_20260506-100624_6f5c\n",
+ "Session Created: ase_weather_demo_specialized_20260528-152124_5e2f\n",
"\n",
"User: What's the weather in Paris right now?\n",
- "================================================================================\n",
+ "================================================================================\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
"\n",
"------ Agent Events ------\n",
"[geocode_city] GET https://geocoding-api.open-meteo.com/v1/search?name=Paris&count=1&language=en&format=json\n",
@@ -599,10 +699,10 @@
" -> [geocode_city] HTTP 200, body: {\"results\":[{\"id\":2988507,\"name\":\"Paris\",\"latitude\":48.85341,\"longitude\":2.3488,\"elevation\":42.0,\"feature_code\":\"PPLC\",\"country_code\":\"FR\",\"admin1_id\":3012874,\"admin2_id\":2968815,\"admin3_id\":2988506,\"admin4_id\":6455259,\"timezone\":\"Europe/Paris\",\"population\":2138551,\"postcodes\":[\"75001\",\"75020\",\"7500...\n",
"[get_current_weather] GET https://api.open-meteo.com/v1/forecast?latitude=48.85341&longitude=2.3488¤t=temperature_2m,weather_code,wind_speed_10m&temperature_unit=celsius&wind_speed_unit=kmh\n",
" headers: {'User-Agent': 'vectara-tutorial/1.0'}\n",
- " -> [get_current_weather] HTTP 200, body: {\"latitude\":48.86,\"longitude\":2.3399997,\"generationtime_ms\":0.1691579818725586,\"utc_offset_seconds\":0,\"timezone\":\"GMT\",\"timezone_abbreviation\":\"GMT\",\"elevation\":43.0,\"current_units\":{\"time\":\"iso8601\",\"interval\":\"seconds\",\"temperature_2m\":\"°C\",\"weather_code\":\"wmo code\",\"wind_speed_10m\":\"km/h\"},\"curre...\n",
+ " -> [get_current_weather] HTTP 200, body: {\"latitude\":48.86,\"longitude\":2.3399997,\"generationtime_ms\":0.08392333984375,\"utc_offset_seconds\":0,\"timezone\":\"GMT\",\"timezone_abbreviation\":\"GMT\",\"elevation\":43.0,\"current_units\":{\"time\":\"iso8601\",\"interval\":\"seconds\",\"temperature_2m\":\"°C\",\"weather_code\":\"wmo code\",\"wind_speed_10m\":\"km/h\"},\"current...\n",
"--------------------------\n",
"\n",
- "Agent: Currently, in Paris, it's 13.4°C with a moderate breeze blowing at 12.3 km/h. The weather is partly cloudy.\n"
+ "Agent: In Paris, it's currently 25.9°C with a gentle breeze at 5.9 km/h. The weather is clear and sunny.\n"
]
}
],
@@ -659,13 +759,20 @@
{
"cell_type": "code",
"execution_count": 11,
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:21:31.527472Z",
+ "iopub.status.busy": "2026-05-28T22:21:31.526955Z",
+ "iopub.status.idle": "2026-05-28T22:21:31.531566Z",
+ "shell.execute_reply": "2026-05-28T22:21:31.530967Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Posting alerts to: https://ntfy.sh/vectara-demo-5cbed713\n",
+ "Posting alerts to: https://ntfy.sh/vectara-demo-c3c65484\n",
"(public topic — anyone with this URL can read its messages)\n"
]
}
@@ -682,14 +789,27 @@
{
"cell_type": "code",
"execution_count": 12,
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:21:31.534543Z",
+ "iopub.status.busy": "2026-05-28T22:21:31.534227Z",
+ "iopub.status.idle": "2026-05-28T22:21:37.018303Z",
+ "shell.execute_reply": "2026-05-28T22:21:37.017885Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Deleted existing agent 'Weather Assistant' (agt_weather_assistant_7065)\n",
- "Created agent 'Weather Assistant' (key: agt_weather_assistant_b58d)\n"
+ "Deleted existing agent 'Weather Assistant' (agt_weather_assistant_1e95)\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Created agent 'Weather Assistant' (key: agt_weather_assistant_53fc)\n"
]
}
],
@@ -776,16 +896,29 @@
{
"cell_type": "code",
"execution_count": 13,
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:21:37.021534Z",
+ "iopub.status.busy": "2026-05-28T22:21:37.021226Z",
+ "iopub.status.idle": "2026-05-28T22:21:49.761059Z",
+ "shell.execute_reply": "2026-05-28T22:21:49.759305Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Session Created: ase_weather_demo_alert_20260506-100633_4819\n",
+ "Session Created: ase_weather_demo_alert_20260528-152137_1165\n",
"\n",
"User: What's the weather in Anchorage right now? Send me an alert.\n",
- "================================================================================\n",
+ "================================================================================\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
"\n",
"------ Agent Events ------\n",
"[geocode_city] GET https://geocoding-api.open-meteo.com/v1/search?name=Anchorage&count=1&language=en&format=json\n",
@@ -793,14 +926,15 @@
" -> [geocode_city] HTTP 200, body: {\"results\":[{\"id\":5879400,\"name\":\"Anchorage\",\"latitude\":61.21806,\"longitude\":-149.90028,\"elevation\":31.0,\"feature_code\":\"PPLA2\",\"country_code\":\"US\",\"admin1_id\":5879092,\"admin2_id\":5879348,\"timezone\":\"America/Anchorage\",\"population\":289600,\"postcodes\":[\"99501\",\"99502\",\"99503\",\"99504\",\"99507\",\"99508\",\"99509\",\"99510\",\"99511\",\"99513\",\"99514\",\"99515\",\"99516\",\"99517\",\"99518\",\"99519\",\"99520\",\"99521\",\"995...\n",
"[get_current_weather] GET https://api.open-meteo.com/v1/forecast?latitude=61.21806&longitude=-149.90028¤t=temperature_2m,weather_code,wind_speed_10m&temperature_unit=celsius&wind_speed_unit=kmh\n",
" headers: {'User-Agent': 'vectara-tutorial/1.0'}\n",
- " -> [get_current_weather] HTTP 200, body: {\"latitude\":61.265377,\"longitude\":-149.92735,\"generationtime_ms\":0.10097026824951172,\"utc_offset_seconds\":0,\"timezone\":\"GMT\",\"timezone_abbreviation\":\"GMT\",\"elevation\":34.0,\"current_units\":{\"time\":\"iso8601\",\"interval\":\"seconds\",\"temperature_2m\":\"°C\",\"weather_code\":\"wmo code\",\"wind_speed_10m\":\"km/h\"},\"current\":{\"time\":\"2026-05-06T17:00\",\"interval\":900,\"temperature_2m\":6.1,\"weather_code\":3,\"wind_spee...\n",
- "[send_weather_alert] POST https://ntfy.sh/vectara-demo-5cbed713\n",
- " body: It's currently 6.1°C in Anchorage with light winds at 0.8 km/h and partly cloudy skies.\n",
- " -> [send_weather_alert] HTTP 200, body: {\"id\":\"O0uAKf8pUSL4\",\"time\":1778087200,\"expires\":1778130400,\"event\":\"message\",\"topic\":\"vectara-demo-5cbed713\",\"message\":\"It's currently 6.1°C in Anchorage with light winds at 0.8 km/h and partly cloudy skies.\"}\n",
+ " -> [get_current_weather] HTTP 200, body: {\"latitude\":61.265377,\"longitude\":-149.92735,\"generationtime_ms\":0.16951560974121094,\"utc_offset_seconds\":0,\"timezone\":\"GMT\",\"timezone_abbreviation\":\"GMT\",\"elevation\":34.0,\"current_units\":{\"time\":\"iso8601\",\"interval\":\"seconds\",\"temperature_2m\":\"°C\",\"weather_code\":\"wmo code\",\"wind_speed_10m\":\"km/h\"},\"current\":{\"time\":\"2026-05-28T22:15\",\"interval\":900,\"temperature_2m\":11.3,\"weather_code\":2,\"wind_spe...\n",
+ "[send_weather_alert] POST https://ntfy.sh/vectara-demo-c3c65484\n",
+ " headers: {'Title': 'Mild Weather in Anchorage', 'Priority': '3', 'Tags': 'sun'}\n",
+ " body: The temperature in Anchorage is 11.3°C with a gentle breeze at 6 km/h, and the sky is clear with a few clouds.\n",
+ " -> [send_weather_alert] HTTP 200, body: {\"id\":\"ExijJoj0dEk0\",\"time\":1780006908,\"expires\":1780050108,\"event\":\"message\",\"topic\":\"vectara-demo-c3c65484\",\"title\":\"Mild Weather in Anchorage\",\"message\":\"The temperature in Anchorage is 11.3°C with a gentle breeze at 6 km/h, and the sky is clear with a few clouds.\",\"priority\":3,\"tags\":[\"sun\"]}\n",
"\n",
"--------------------------\n",
"\n",
- "Agent: The current temperature in Anchorage is 6.1°C, with light winds at 0.8 km/h, and the weather is partly cloudy. I've sent you a weather alert with this information.\n"
+ "Agent: The current temperature in Anchorage is 11.3°C with a gentle breeze of 6 km/h and the weather is clear with a few clouds. A weather alert has been sent to you with this information!\n"
]
}
],
@@ -839,17 +973,24 @@
{
"cell_type": "code",
"execution_count": 14,
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:21:49.767892Z",
+ "iopub.status.busy": "2026-05-28T22:21:49.767413Z",
+ "iopub.status.idle": "2026-05-28T22:21:51.276095Z",
+ "shell.execute_reply": "2026-05-28T22:21:51.274127Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Messages on topic 'vectara-demo-5cbed713':\n",
+ "Messages on topic 'vectara-demo-c3c65484':\n",
"\n",
- " [priority ?] (no title)\n",
- " It's currently 6.1°C in Anchorage with light winds at 0.8 km/h and partly cloudy skies.\n",
- " tags: []\n",
+ " [priority 3] Mild Weather in Anchorage\n",
+ " The temperature in Anchorage is 11.3°C with a gentle breeze at 6 km/h, and the sky is clear with a few clouds.\n",
+ " tags: ['sun']\n",
"\n"
]
}
@@ -894,13 +1035,20 @@
{
"cell_type": "code",
"execution_count": 15,
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:21:51.282317Z",
+ "iopub.status.busy": "2026-05-28T22:21:51.281871Z",
+ "iopub.status.idle": "2026-05-28T22:21:55.322037Z",
+ "shell.execute_reply": "2026-05-28T22:21:55.321367Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Created agent 'Weather Assistant (broken endpoint)' (key: agt_weather_assistant_broken_endpoint_a5fd)\n"
+ "Created agent 'Weather Assistant (broken endpoint)' (key: agt_weather_assistant_broken_endpoint_b2f6)\n"
]
}
],
@@ -958,24 +1106,37 @@
{
"cell_type": "code",
"execution_count": 16,
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:21:55.325599Z",
+ "iopub.status.busy": "2026-05-28T22:21:55.325260Z",
+ "iopub.status.idle": "2026-05-28T22:22:01.662871Z",
+ "shell.execute_reply": "2026-05-28T22:22:01.661342Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Session Created: ase_weather_demo_broken_20260506-100645_ed1a\n",
+ "Session Created: ase_weather_demo_broken_20260528-152155_b667\n",
"\n",
"User: What's the weather in Madrid right now?\n",
- "================================================================================\n",
+ "================================================================================\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
"\n",
"------ Agent Events ------\n",
"[geocode_city] GET https://geocoding-api.open-meteo.com/v1/search-cities?name=Madrid&count=1&language=en&format=json\n",
" headers: {'User-Agent': 'vectara-tutorial/1.0'}\n",
- " -> [geocode_city] HTTP 404, body: {\"error\":true,\"reason\":\"Not Found\"}\n",
+ " -> [geocode_city] HTTP 404, body: {\"reason\":\"Not Found\",\"error\":true}\n",
"--------------------------\n",
"\n",
- "Agent: I encountered an issue while trying to get the geographical coordinates for Madrid. Please try again later!\n"
+ "Agent: I wasn't able to retrieve the location data for Madrid at the moment. Please try again later.\n"
]
}
],
@@ -1023,7 +1184,14 @@
{
"cell_type": "code",
"execution_count": 17,
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:22:01.668499Z",
+ "iopub.status.busy": "2026-05-28T22:22:01.667748Z",
+ "iopub.status.idle": "2026-05-28T22:22:01.676283Z",
+ "shell.execute_reply": "2026-05-28T22:22:01.675679Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -1033,7 +1201,7 @@
"\n",
" [geocode_city] HTTP 404\n",
" url: https://geocoding-api.open-meteo.com/v1/search-cities?name=Madrid&count=1&language=en&format=json\n",
- " body: {\"error\":true,\"reason\":\"Not Found\"}\n",
+ " body: {\"reason\":\"Not Found\",\"error\":true}\n",
"\n"
]
}
@@ -1112,14 +1280,27 @@
{
"cell_type": "code",
"execution_count": 18,
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:22:01.678642Z",
+ "iopub.status.busy": "2026-05-28T22:22:01.678413Z",
+ "iopub.status.idle": "2026-05-28T22:22:05.540209Z",
+ "shell.execute_reply": "2026-05-28T22:22:05.539295Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Deleted alert agent: agt_weather_assistant_b58d\n",
- "Deleted broken-endpoint agent: agt_weather_assistant_broken_endpoint_a5fd\n"
+ "Deleted alert agent: agt_weather_assistant_53fc\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Deleted broken-endpoint agent: agt_weather_assistant_broken_endpoint_b2f6\n"
]
}
],
diff --git a/notebooks/api-examples/12-agent-skills.ipynb b/notebooks/api-examples/13-agent-skills.ipynb
similarity index 82%
rename from notebooks/api-examples/12-agent-skills.ipynb
rename to notebooks/api-examples/13-agent-skills.ipynb
index bf998cd..59e8d89 100644
--- a/notebooks/api-examples/12-agent-skills.ipynb
+++ b/notebooks/api-examples/13-agent-skills.ipynb
@@ -5,7 +5,7 @@
"id": "cell-0",
"metadata": {},
"source": [
- "
"
+ "
"
]
},
{
@@ -47,7 +47,7 @@
"| **Skill** | Specialist *instructions* loaded only when needed | You have multiple distinct mindsets/runbooks/voices the agent might need; stuffing them all into the system prompt would balloon every turn's token cost. |\n",
"| **Tool** (e.g. `web_get`, `lambda`, `corpora_search`) | A *capability* — the agent can take an action | The agent needs to **do** something (fetch data, search a corpus, call an API), not just adopt a different mindset. |\n",
"| **System-prompt baseline** | Always-on instructions | The guidance applies to every turn and is short enough that loading-on-demand isn't worth the indirection. |\n",
- "| **Sub-agent** | A wholly separate agent invoked as a tool | The work needs its own model, its own tool set, or its own multi-step flow. Heavier than a skill — see notebook 5. |\n",
+ "| **Sub-agent** | A wholly separate agent invoked as a tool | The work needs its own model, its own tool set, or its own multi-step flow. Heavier than a skill — see notebook 6. |\n",
"\n",
"The clearest tell that you want a *skill*, not a tool or a longer prompt: you're about to write *\"… and ALSO, when the user reports an outage, do these 30 things …\"* in your system prompt. Pull each \"and ALSO\" branch out into a skill instead.\n",
"\n",
@@ -83,7 +83,14 @@
"cell_type": "code",
"execution_count": 1,
"id": "cell-5",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:22:08.002191Z",
+ "iopub.status.busy": "2026-05-28T22:22:08.001997Z",
+ "iopub.status.idle": "2026-05-28T22:22:08.073287Z",
+ "shell.execute_reply": "2026-05-28T22:22:08.073023Z"
+ }
+ },
"outputs": [],
"source": [
"import os\n",
@@ -106,7 +113,14 @@
"cell_type": "code",
"execution_count": 2,
"id": "cell-6",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:22:08.074677Z",
+ "iopub.status.busy": "2026-05-28T22:22:08.074588Z",
+ "iopub.status.idle": "2026-05-28T22:22:08.076877Z",
+ "shell.execute_reply": "2026-05-28T22:22:08.076669Z"
+ }
+ },
"outputs": [],
"source": [
"# Load the shared helpers (delete_and_create_agent).\n",
@@ -142,13 +156,20 @@
"cell_type": "code",
"execution_count": 3,
"id": "cell-8",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:22:08.078252Z",
+ "iopub.status.busy": "2026-05-28T22:22:08.078124Z",
+ "iopub.status.idle": "2026-05-28T22:22:11.373255Z",
+ "shell.execute_reply": "2026-05-28T22:22:11.372735Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Created agent 'Support Copilot' (key: agt_support_copilot_efae)\n",
+ "Created agent 'Support Copilot' (key: agt_support_copilot_af36)\n",
"\n",
"Skill description (always in system prompt): 194 chars\n",
"Skill content (loaded only on invoke): 2282 chars\n",
@@ -270,7 +291,14 @@
"cell_type": "code",
"execution_count": 4,
"id": "cell-11",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:22:11.375759Z",
+ "iopub.status.busy": "2026-05-28T22:22:11.375554Z",
+ "iopub.status.idle": "2026-05-28T22:22:11.398401Z",
+ "shell.execute_reply": "2026-05-28T22:22:11.396779Z"
+ }
+ },
"outputs": [],
"source": [
"def ask_with_skills(agent_key, session_key, messages, show_events=True, content_preview_chars=160):\n",
@@ -323,13 +351,20 @@
"cell_type": "code",
"execution_count": 5,
"id": "cell-12",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:22:11.406292Z",
+ "iopub.status.busy": "2026-05-28T22:22:11.405799Z",
+ "iopub.status.idle": "2026-05-28T22:22:19.513809Z",
+ "shell.execute_reply": "2026-05-28T22:22:19.512773Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Session Created: ase_skills_demo_20260506-100816_5b26\n",
+ "Session Created: ase_skills_demo_20260528-152211_7d3e\n",
"\n",
"Inbound message:\n",
"From: ops@acme-financial.example\n",
@@ -340,7 +375,13 @@
"in the middle of pre-market prep so this is blocking real work. We tried\n",
"two regions and it's the same. Can you tell us if you're aware of an\n",
"outage and give us an ETA? — Lina\n",
- "================================================================================\n",
+ "================================================================================\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
"\n",
"------ Agent Events ------\n",
"[input_message] kinds=['text']\n",
@@ -350,25 +391,28 @@
"\n",
"Copilot reply:\n",
"## 1. Severity classification\n",
- "**SEV-1** — production is down for many customers. Anchoring phrases: \"our entire analyst team (about 40 people) is getting 'Service unavailable'\" and \"blocking real work.\"\n",
+ "- **SEV-1** — production is down for many customers. \n",
+ "- Anchored by: \"none of our analysts can log in\" and \"this is blocking real work.\"\n",
"\n",
"## 2. Required information to collect\n",
- "- Account / tenant / org ID\n",
- "- The exact time the issue started in their timezone\n",
- "- Specific region or environment affected\n",
- "- Confirmation that all users are affected and any reproduction steps\n",
- "- Any error messages, screenshots, or request IDs they can share\n",
+ "- Account / tenant / org id\n",
+ "- Time the issue started in ET\n",
+ "- The specific regions attempted\n",
+ "- Confirmation whether the \"Service unavailable\" error is the only error message\n",
+ "- Any screenshots or request IDs they can provide\n",
"\n",
"## 3. Routing\n",
- "- SEV-1 → Page on-call SRE immediately + notify the account's CSM + post in #incidents.\n",
+ "- Page `` immediately.\n",
+ "- Notify the account's CSM.\n",
+ "- Post in #incidents.\n",
"\n",
"## 4. Customer-facing reply (draft)\n",
"Hi Lina,\n",
"\n",
- "We acknowledge the critical impact that this \"Service unavailable\" issue is having on your team during pre-market prep. Our on-call team is being paged right now to investigate this as a SEV-1 priority. We will update you on the status within 30 minutes. Thank you for your patience as we work to resolve this.\n",
+ "We're aware of the service availability issue impacting your analysts and understand this is blocking pre-market preparations. Our on-call team is investigating this with urgency, and we've escalated the matter internally. I will update you with more information within the next 30 minutes. Thank you for your patience as we work to resolve this.\n",
"\n",
"## 5. Internal note\n",
- "This is a SEV-1 incident affecting all 40 users at Acme Financial; their analysts cannot log into dashboards, blocking pre-market preparations. Information still needed includes the account ID, time the issue started, specific regions, and any error information. There is a potential churn risk if this impacts financial operations significantly.\n"
+ "SEV-1 incident: Acme Financial's analyst team cannot access dashboards due to a \"Service unavailable\" error, affecting approximately 40 users across multiple regions. We need their org id, the exact time the issue began in ET, and details on any other error messages to aid the investigation. There's potential churn risk, given the pre-market operational impact. Please ensure the on-call SRE is paged immediately.\n"
]
}
],
@@ -441,25 +485,45 @@
"cell_type": "code",
"execution_count": 6,
"id": "cell-15",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:22:19.518547Z",
+ "iopub.status.busy": "2026-05-28T22:22:19.518203Z",
+ "iopub.status.idle": "2026-05-28T22:22:30.869122Z",
+ "shell.execute_reply": "2026-05-28T22:22:30.868377Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Session Created: ase_skills_client-trigger_20260506-100819_9673\n",
+ "Session Created: ase_skills_client-trigger_20260528-152219_659c\n",
"\n",
"Step A: client preloads the escalation skill (e.g. monitoring webhook fired SEV-1).\n",
- "================================================================================\n",
+ "================================================================================\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
"\n",
"------ Agent Events ------\n",
"[input_message] kinds=['skill', 'text']\n",
+ "[skill_load] customer_escalation (2282 chars): When the inbound message looks like an escalation, follow this runbook exactly. Produce a single Markdown response with these sections, in order: ## 1. Severit...\n",
"[tool_input invoke_skill] args={'skill_name': 'customer_escalation'}\n",
"[skill_load] customer_escalation (2282 chars): When the inbound message looks like an escalation, follow this runbook exactly. Produce a single Markdown response with these sections, in order: ## 1. Severit...\n",
"--------------------------\n",
"\n",
"Step B: forward the message; runbook is already in context.\n",
- "================================================================================\n",
+ "================================================================================\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
"\n",
"------ Agent Events ------\n",
"[input_message] kinds=['text']\n",
@@ -467,21 +531,21 @@
"\n",
"Copilot reply:\n",
"## 1. Severity classification\n",
- "**SEV-1** — Production is down for many customers. Phrases anchoring this decision: \"none of our analysts can log in\" and \"blocking real work\".\n",
+ "**SEV-1** — production is down for many customers as none of the analysts can log in, preventing them from performing critical work. The anchoring phrases include: \"entire analyst team... is getting 'Service unavailable'\" and \"blocking real work during pre-market prep.\"\n",
"\n",
"## 2. Required information to collect\n",
- "- Account / tenant / org id\n",
- "- Reproduction steps or confirmation that it affects all users\n",
- "- Any error messages, request IDs, or additional screenshots \n",
+ "- Account / tenant / org id (likely Acme Financial)\n",
+ "- Any error messages, screenshots, or request IDs they can share\n",
+ "- Confirmation of both regions they have tested\n",
"\n",
"## 3. Routing\n",
- "Page the on-call SRE immediately, notify the account's CSM, and post in #incidents.\n",
+ "Page on-call SRE immediately, notify the account's CSM, and post in #incidents. Use , , and #incidents for communication.\n",
"\n",
"## 4. Customer-facing reply (draft)\n",
- "Hi Lina, I'm sorry to hear about the access issues your team is experiencing with the dashboards. We're escalating this as a top priority and have alerted our on-call team to investigate immediately. We'll update you on our findings within the next 30 minutes. Thank you for your patience.\n",
+ "Thank you for alerting us to this critical issue—I'm sorry your team is experiencing this disruption right now. We have immediately escalated this to our technical team who are urgently investigating. I'll keep you updated at least every hour until this is resolved. If you have any additional details, like error messages, please share them with me.\n",
"\n",
"## 5. Internal note\n",
- "SEV-1 escalation as all analysts at Acme Financial are unable to log in, impeding critical pre-market operations. Missing information includes account ID and any error specifics they can provide. This is likely a significant churn risk if not resolved quickly.\n"
+ "SEV-1 incident affecting Acme Financial with a full analyst team (about 40) unable to log in due to a \"Service unavailable\" error, confirmed in multiple regions. Missing information includes any specific error messages and exact regions tested. High urgency due to business criticality during market prep.\n"
]
}
],
@@ -548,14 +612,27 @@
"cell_type": "code",
"execution_count": 7,
"id": "cell-18",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:22:30.872931Z",
+ "iopub.status.busy": "2026-05-28T22:22:30.872495Z",
+ "iopub.status.idle": "2026-05-28T22:22:45.289253Z",
+ "shell.execute_reply": "2026-05-28T22:22:45.287821Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Deleted existing agent 'Support Copilot' (agt_support_copilot_efae)\n",
- "Created agent 'Support Copilot' (key: agt_support_copilot_707e)\n",
+ "Deleted existing agent 'Support Copilot' (agt_support_copilot_af36)\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Created agent 'Support Copilot' (key: agt_support_copilot_2014)\n",
"\n",
"Inbound message:\n",
"From: pm@northwind.example\n",
@@ -565,7 +642,13 @@
"out SSO before we expand seats. Specifically, can we wire up Okta SAML?\n",
"We're not in a huge rush — would just like to know if it's on the roadmap\n",
"or if we should wait. Happy to be a design partner. — Sam\n",
- "================================================================================\n",
+ "================================================================================\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
"\n",
"------ Agent Events ------\n",
"[input_message] kinds=['text']\n",
@@ -575,25 +658,31 @@
"\n",
"Copilot reply:\n",
"## 1. One-line summary\n",
- "Request to integrate Okta SAML for Single Sign-On (SSO) as part of expanding their security setup.\n",
+ "The customer is requesting SSO integration with Okta using SAML.\n",
"\n",
"## 2. Required intake fields\n",
- "- **Persona**: Security team member or Product Manager\n",
- "- **Problem**: Need to roll out SSO to meet security policies before expanding seats.\n",
+ "- **Persona**: security team / project manager\n",
+ "- **Problem**: need to roll out SSO before expanding seats\n",
"- **Current workaround**: \n",
"- **Frequency / volume**: \n",
- "- **Desired outcome**: Successfully integrate Okta SAML for SSO.\n",
- "- **Time sensitivity**: Nice-to-have, no immediate rush.\n",
+ "- **Desired outcome**: successful integration of Okta SAML for SSO\n",
+ "- **Time sensitivity**: not in a huge rush, but interested in roadmap\n",
"\n",
"## 3. Categorization\n",
"`integration`, `enterprise-controls`\n",
"\n",
"## 4. Suggested filing\n",
"- Where this should go: `partner-integrations queue`\n",
- "- Whether it warrants a discovery call or can be filed asynchronously: Discovery call might be beneficial given the offer to be a design partner.\n",
+ "- Whether it warrants a discovery call: can be filed asynchronously\n",
"\n",
"## 5. Customer-facing reply (draft)\n",
- "Thank you for reaching out, Sam. We appreciate your interest in integrating Okta SAML for SSO. I've shared your request with our product team and will update you shortly on whether it's on our roadmap.\n",
+ "Hi Sam! Thank you for your interest in expanding your use of our product. We understand you're looking to integrate Okta SAML for SSO. I'll pass your request to our product team to review the feasibility and roadmap, and we'll get back to you soon with more details.\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
"\n",
"Inbound message:\n",
"From: head-of-data@globex.example\n",
@@ -603,7 +692,13 @@
"morning. The titles are still there but the underlying data is gone. We\n",
"have a board meeting at 4pm and a few of those dashboards are what they\n",
"want to see. Please look ASAP. — Priya\n",
- "================================================================================\n",
+ "================================================================================\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
"\n",
"------ Agent Events ------\n",
"[input_message] kinds=['text']\n",
@@ -613,29 +708,25 @@
"\n",
"Copilot reply:\n",
"## 1. Severity classification\n",
- "**SEV-1** — The report indicates that \"most of our team's saved dashboards are showing as empty... the underlying data is gone\", implying a significant data loss for a key feature, crucial especially with an upcoming board meeting.\n",
+ "**SEV-1** — The customer's message indicates that \"most of our team's saved dashboards are showing as empty\" and they need them for a meeting, suggesting that data loss may be confirmed.\n",
"\n",
"## 2. Required information to collect\n",
- "- Account / tenant / org ID\n",
- "- Time the issue started (their timezone)\n",
- "- Region or environment if known\n",
- "- Confirmation if all users are affected or specific ones\n",
- "- Any error messages, screenshots, or request IDs they can share\n",
+ "- Account / tenant / org id\n",
+ "- Time the issue started (including timezone)\n",
+ "- Region or environment\n",
+ "- Confirmation of whether this affects all users or specific ones\n",
+ "- Any error messages, screenshots, or request IDs available\n",
"\n",
"## 3. Routing\n",
- "Page the on-call SRE immediately, notify the account's Customer Success Manager (CSM), and post in #incidents.\n",
+ "Page the on-call SRE immediately, notify the account's CSM, and post in #incidents.\n",
"\n",
"## 4. Customer-facing reply (draft)\n",
- "Hi Priya, \n",
- "\n",
- "I'm sorry to hear that your team's saved dashboards are currently showing as empty. We're treating this with high urgency and have paged our on-call engineers to investigate immediately. I'll keep you updated on our progress and aim to reach out within the next hour. Thank you for your patience as we work to resolve this swiftly.\n",
- "\n",
- "Best regards,\n",
+ "Hi Priya,\n",
"\n",
- "[Your Name]\n",
+ "I'm sorry to hear that your team's dashboards are showing as empty. We are currently investigating the issue with high priority and have notified our on-call team to address it. I'll update you within the next hour with more information. Thank you for your patience as we work to resolve this.\n",
"\n",
"## 5. Internal note\n",
- "SEV-1 incident due to potential data loss impacting multiple users at Globex, especially urgent with a board meeting imminent. Missing information includes account ID, the exact time the issue began, and any error messages/screenshots to aid investigation. Immediate action by on-call SRE and communication with CSM required. Potential risk of customer dissatisfaction and churn given the timing and impact.\n"
+ "SEV-1 escalation due to potential data loss affecting Globex where dashboards are showing empty. Need to gather their account ID, exact start time, and the scope of impact. Ensure the on-call SRE and CSM are aware and monitoring the situation closely. High priority given their upcoming board meeting and potential for churn if unresolved quickly.\n"
]
}
],
@@ -735,7 +826,7 @@
"source": [
"## Step 5: Restricting Skills per Step with `allowed_skills`\n",
"\n",
- "For multi-step agents (notebook 9), each step independently controls which skills are exposed via `allowed_skills`:\n",
+ "For multi-step agents (notebook 10), each step independently controls which skills are exposed via `allowed_skills`:\n",
"\n",
"- **omitted (`null`)** — default; all skills the agent declares are available\n",
"- **empty array (`[]`)** — no skills available in this step; the `invoke_skill` tool isn't exposed at all\n",
@@ -743,14 +834,21 @@
"\n",
"This pairs cleanly with the multi-step pattern: a *router* step might forbid skills entirely (force routing-only behavior), then transition into a *specialist* step that exposes exactly the right one. For example: a router step decides whether the message is an incident vs. a feature ask, then transitions to one of two follow-up steps that each expose only the matching skill.\n",
"\n",
- "Below is the *shape* of step configs with `allowed_skills`. We don't build a full multi-step agent here — see notebook 9 for that orchestration pattern."
+ "Below is the *shape* of step configs with `allowed_skills`. We don't build a full multi-step agent here — see notebook 10 for that orchestration pattern."
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "cell-21",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:22:45.294722Z",
+ "iopub.status.busy": "2026-05-28T22:22:45.294100Z",
+ "iopub.status.idle": "2026-05-28T22:22:45.300257Z",
+ "shell.execute_reply": "2026-05-28T22:22:45.299382Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -829,7 +927,7 @@
"\n",
"- **Skill** — your agent should sometimes adopt a different mindset/checklist/voice that's too long to keep in the system prompt every turn. The set of mindsets is small, fixed, and you can describe each one in a sentence or two. *(Today's notebook.)*\n",
"- **Tool** — the agent needs to *do* something (search a corpus with `corpora_search`, call an API with `web_get`, run code with `lambda`). Skills load *instructions*; they don't take actions.\n",
- "- **Sub-agent** — the work needs its own model, its own tool set, or its own multi-step flow. Heavier than a skill — see notebook 5.\n",
+ "- **Sub-agent** — the work needs its own model, its own tool set, or its own multi-step flow. Heavier than a skill — see notebook 6.\n",
"- **Just stuff the prompt** — the guidance applies to *every* turn and is short enough that lazy loading is more confusing than it is cheap.\n",
"\n",
"A common pairing in real Vectara deployments: a support copilot uses `corpora_search` (over your help-center / runbook corpus) to *retrieve* facts, AND skills to switch into the right *workflow* for each message. The corpus answers \"what's true,\" the skill answers \"what should I do about it.\"\n",
@@ -856,13 +954,20 @@
"cell_type": "code",
"execution_count": 9,
"id": "cell-24",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:22:45.304244Z",
+ "iopub.status.busy": "2026-05-28T22:22:45.303981Z",
+ "iopub.status.idle": "2026-05-28T22:22:47.272538Z",
+ "shell.execute_reply": "2026-05-28T22:22:47.271382Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Deleted Support Copilot agent: agt_support_copilot_707e\n"
+ "Deleted Support Copilot agent: agt_support_copilot_2014\n"
]
}
],
diff --git a/notebooks/api-examples/13-agent-steps.ipynb b/notebooks/api-examples/14-agent-steps.ipynb
similarity index 85%
rename from notebooks/api-examples/13-agent-steps.ipynb
rename to notebooks/api-examples/14-agent-steps.ipynb
index cf42ffe..261cf73 100644
--- a/notebooks/api-examples/13-agent-steps.ipynb
+++ b/notebooks/api-examples/14-agent-steps.ipynb
@@ -5,7 +5,7 @@
"id": "cell-0",
"metadata": {},
"source": [
- "
"
+ "
"
]
},
{
@@ -26,7 +26,7 @@
"4. Use **`reentry_step`** so follow-up user turns enter a Q&A phase instead of restarting the pipeline\n",
"5. Decide between steps, sub-agents, skills, and a single comprehensive prompt\n",
"\n",
- "> **How this differs from notebook 9.** Notebook 9 covers the **classifier-router fan-out** pattern: one classifier branches to one of N terminal handlers. This notebook covers **sequential pipelines** (each phase chains into the next), **conditional gating** (skip a path based on prior output), and **`reentry_step`** (multi-turn flows). The two notebooks are complementary — read both for the full step-orchestration picture."
+ "> **How this differs from notebook 10.** Notebook 10 covers the **classifier-router fan-out** pattern: one classifier branches to one of N terminal handlers. This notebook covers **sequential pipelines** (each phase chains into the next), **conditional gating** (skip a path based on prior output), and **`reentry_step`** (multi-turn flows). The two notebooks are complementary — read both for the full step-orchestration picture."
]
},
{
@@ -87,7 +87,14 @@
"cell_type": "code",
"execution_count": 1,
"id": "cell-5",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:22:49.543090Z",
+ "iopub.status.busy": "2026-05-28T22:22:49.542800Z",
+ "iopub.status.idle": "2026-05-28T22:22:49.600821Z",
+ "shell.execute_reply": "2026-05-28T22:22:49.600559Z"
+ }
+ },
"outputs": [],
"source": [
"import os\n",
@@ -110,7 +117,14 @@
"cell_type": "code",
"execution_count": 2,
"id": "cell-6",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:22:49.602145Z",
+ "iopub.status.busy": "2026-05-28T22:22:49.602044Z",
+ "iopub.status.idle": "2026-05-28T22:22:49.604634Z",
+ "shell.execute_reply": "2026-05-28T22:22:49.604368Z"
+ }
+ },
"outputs": [],
"source": [
"# Load the shared helpers (delete_and_create_agent).\n",
@@ -158,13 +172,20 @@
"cell_type": "code",
"execution_count": 3,
"id": "cell-8",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:22:49.605984Z",
+ "iopub.status.busy": "2026-05-28T22:22:49.605904Z",
+ "iopub.status.idle": "2026-05-28T22:22:53.567396Z",
+ "shell.execute_reply": "2026-05-28T22:22:53.566270Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Created agent 'Contract Triage' (key: agt_contract_triage_2466)\n"
+ "Created agent 'Contract Triage' (key: agt_contract_triage_7a6f)\n"
]
}
],
@@ -319,7 +340,14 @@
"cell_type": "code",
"execution_count": 4,
"id": "cell-10",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:22:53.573087Z",
+ "iopub.status.busy": "2026-05-28T22:22:53.572414Z",
+ "iopub.status.idle": "2026-05-28T22:22:53.581931Z",
+ "shell.execute_reply": "2026-05-28T22:22:53.581317Z"
+ }
+ },
"outputs": [],
"source": [
"def run_pipeline(agent_key, session_key, content, show_events=True, json_preview_chars=200):\n",
@@ -371,28 +399,41 @@
"cell_type": "code",
"execution_count": 5,
"id": "cell-11",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:22:53.584583Z",
+ "iopub.status.busy": "2026-05-28T22:22:53.584348Z",
+ "iopub.status.idle": "2026-05-28T22:23:01.549018Z",
+ "shell.execute_reply": "2026-05-28T22:23:01.547697Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Session Created: ase_contract_triage_nda_20260506-100821_a76d\n",
+ "Session Created: ase_contract_triage_nda_20260528-152253_a434\n",
"\n",
"Inbound document:\n",
"MUTUAL NON-DISCLOSURE AGREEMENT\n",
"\n",
"This Mutual Non-Disclosure Agreement (\"Agreement\") is entered into as of March 12, 2025\n",
"(\"Effective Date\") between Acme Robotics, Inc., a Delaware corporation (\"Acme\")...\n",
- "================================================================================\n",
+ "================================================================================\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
"\n",
"------ Pipeline Events ------\n",
"[structured_output]\n",
"{\n",
" \"doc_type\": \"nda\",\n",
- " \"confidence\": 0.95,\n",
- " \"reasoning\": \"The document is explicitly labeled as a Mutual Non-Disclosure Agreement and contains terms related to confidentiality and non-disclosure.\"\n",
- "...\n",
+ " \"confidence\": 1.0,\n",
+ " \"reasoning\": \"The document is titled 'Mutual Non-Disclosure Agreement' and includes terms specific to confidentiality obligations.\"\n",
+ "}\n",
"[step_transition] classify -> extract\n",
"[structured_output]\n",
"{\n",
@@ -401,15 +442,22 @@
" \"Northwind Analytics, Inc.\"\n",
" ],\n",
" \"effective_date\": \"March 12, 2025\",\n",
- " \"term\": \"two (2) years from the Effective Date\",\n",
- " \"governing_law\": \"\"...\n",
+ " \"term\": \"2 years\",\n",
+ " \"governing_law\": \"\",\n",
+ " \"termination_clause\": \"Eit...\n",
"[step_transition] extract -> flag_issues\n",
"[agent_output]\n",
- "One sentence: negotiate-first\n",
+ "# NDA Risk Review Report\n",
+ "\n",
+ "**One sentence**: Ship-as-is.\n",
"\n",
- "- Governing Law: The governing law clause is missing, which specifies the jurisdiction's laws that will govern the agreement and is crucial for dispute resolution. Remedy by adding a governing law clause to establish jurisdiction.\n",
+ "**Flags**:\n",
+ "- **Governing Law **: It is important to specify the governing law to determine which jurisdiction's laws will apply in case of a dispute. Remediate by determining the agreed governing law and including it in the agreement.\n",
"\n",
- "Fields to chase: Governing Law\n",
+ "**Fields that came back ''**:\n",
+ "- Governing Law\n",
+ "\n",
+ "No other material flags.\n",
"----------------------------\n"
]
}
@@ -492,32 +540,57 @@
"cell_type": "code",
"execution_count": 6,
"id": "cell-14",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:23:01.554475Z",
+ "iopub.status.busy": "2026-05-28T22:23:01.553928Z",
+ "iopub.status.idle": "2026-05-28T22:23:13.566967Z",
+ "shell.execute_reply": "2026-05-28T22:23:13.565292Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Deleted existing agent 'Contract Triage' (agt_contract_triage_2466)\n",
- "Created agent 'Contract Triage' (key: agt_contract_triage_a5e4)\n",
- "Session Created: ase_contract_triage_gate_20260506-100832_a9c8\n",
+ "Deleted existing agent 'Contract Triage' (agt_contract_triage_7a6f)\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Created agent 'Contract Triage' (key: agt_contract_triage_f6fd)\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Session Created: ase_contract_triage_gate_20260528-152306_5bc7\n",
"\n",
"Inbound document (not actually a contract):\n",
"Hey team — quick offsite recap. We covered Q3 goals on day 1 and the new\n",
"review process on day 2. Action items: (1) Priya to circulate the calibration rubric by Friday,\n",
"(2) Sam to set up the cross-functional channel in Slack. Lunch was very good. — Lina\n",
- "================================================================================\n",
+ "================================================================================\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
"\n",
"------ Pipeline Events ------\n",
"[structured_output]\n",
"{\n",
" \"doc_type\": \"other\",\n",
" \"confidence\": 0.95,\n",
- " \"reasoning\": \"The text is an informal recap of a meeting and not a contract.\"\n",
+ " \"reasoning\": \"The text is an informal recap of a meeting or event, not a contract.\"\n",
"}\n",
"[step_transition] classify -> exit_other\n",
"[agent_output]\n",
- "It appears that the text you provided is a summary or recap of an offsite meeting, rather than a contract. If you have an actual contract document that you need assistance with, please forward that specific document.\n",
+ "The document you provided appears to be a recap of an offsite meeting, which includes notes and action items rather than a contract. If you have a specific contractual document you need assistance with, please forward that, and I'll be happy to help.\n",
"----------------------------\n"
]
}
@@ -607,25 +680,44 @@
"cell_type": "code",
"execution_count": 7,
"id": "cell-17",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:23:13.571615Z",
+ "iopub.status.busy": "2026-05-28T22:23:13.571210Z",
+ "iopub.status.idle": "2026-05-28T22:23:27.093840Z",
+ "shell.execute_reply": "2026-05-28T22:23:27.092495Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Deleted existing agent 'Contract Triage' (agt_contract_triage_a5e4)\n",
- "Created agent 'Contract Triage' (key: agt_contract_triage_d721)\n",
- "Session Created: ase_contract_triage_q_a_20260506-100841_9bf0\n",
+ "Deleted existing agent 'Contract Triage' (agt_contract_triage_f6fd)\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Created agent 'Contract Triage' (key: agt_contract_triage_9594)\n",
+ "Session Created: ase_contract_triage_q_a_20260528-152318_f67b\n",
"\n",
"Turn 1: analyze the NDA (full pipeline)\n",
- "================================================================================\n",
+ "================================================================================\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
"\n",
"------ Pipeline Events ------\n",
"[structured_output]\n",
"{\n",
" \"doc_type\": \"nda\",\n",
" \"confidence\": 1.0,\n",
- " \"reasoning\": \"The document is titled 'Mutual Non-Disclosure Agreement' and outlines terms related to the confidentiality of shared information.\"\n",
+ " \"reasoning\": \"The document is explicitly titled 'Mutual Non-Disclosure Agreement' and details confidentiality obligations.\"\n",
"}\n",
"[step_transition] classify -> extract\n",
"[structured_output]\n",
@@ -635,40 +727,54 @@
" \"Northwind Analytics, Inc.\"\n",
" ],\n",
" \"effective_date\": \"March 12, 2025\",\n",
- " \"term\": \"2 years\",\n",
- " \"governing_law\": \"\",\n",
- " \"termination_clause\": \"Eit...\n",
+ " \"term\": \"two (2) years from the Effective Date\",\n",
+ " \"governing_law\": \"\"...\n",
"[step_transition] extract -> flag_issues\n",
"[agent_output]\n",
+ "# Contract Risk Review Report\n",
+ "\n",
"**Recommendation: Negotiate-first**\n",
"\n",
- "- **Survival Period of Confidentiality Obligations**: The agreement stipulates that confidentiality obligations survive for three years after termination. This is critical for ensuring long-term protection of confidential information. Consider negotiating a longer survival period for added protection.\n",
- "- **Termination Clause**: The ability for either party to terminate with 30 days' notice could potentially lead to abrupt cancellations that may not align with business interests. Consider renegotiating for a more extended notice period to provide more operati...\n",
+ "- **Governing Law:**\n",
+ " - **What it is:** The governing law clause specifies which jurisdiction's laws will be used to interpret the Agreement.\n",
+ " - **Why it matters:** It influences the legal proceedings and is crucial in case of a dispute, affecting the cost and convenience of litigation.\n",
+ " - **How to remediate:** Specify a governing law clause that is mutually agreeable to both parties.\n",
+ "\n",
+ "**Unknown Fields:**\n",
+ "\n",
+ "- **Governing Law:** The governing law clause was not specified and should be clarified with the other party.\n",
"----------------------------\n",
"\n",
"Turn 2: follow-up question (should land in qa, not classify)\n",
- "================================================================================\n",
+ "================================================================================\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
"\n",
"------ Pipeline Events ------\n",
"[agent_output]\n",
- "The term of this NDA is two years from the Effective Date, which is March 12, 2025. Yes, there is a survival period after termination; the confidentiality obligations survive for an additional three years after termination.\n",
+ "The term of the NDA is two (2) years from the Effective Date. Yes, there is a survival period after termination; confidentiality obligations survive for an additional 3 years after termination.\n",
"----------------------------\n"
]
},
{
"data": {
"text/plain": [
- "[{'id': 'aev_5a2c9fa4-c9c5-4ddf-8a89-b4a8783f3e6c',\n",
- " 'session_key': 'ase_contract_triage_q_a_20260506-100841_9bf0',\n",
- " 'created_at': '2026-05-06T17:08:50.178Z',\n",
+ "[{'id': 'aev_ba385985-372a-4eb1-a534-ee8c3993cc14',\n",
+ " 'session_key': 'ase_contract_triage_q_a_20260528-152318_f67b',\n",
+ " 'created_at': '2026-05-28T22:23:26.445Z',\n",
" 'type': 'input_message',\n",
" 'messages': [{'type': 'text',\n",
- " 'content': \"What's the term of this NDA, and is there a survival period after termination?\"}]},\n",
- " {'id': 'aev_57609702-e24d-4282-8580-9b45e22166bd',\n",
- " 'session_key': 'ase_contract_triage_q_a_20260506-100841_9bf0',\n",
- " 'created_at': '2026-05-06T17:08:51.444Z',\n",
+ " 'content': \"What's the term of this NDA, and is there a survival period after termination?\"}],\n",
+ " 'message_diffs': ['']},\n",
+ " {'id': 'aev_70e7de9e-2b0a-4fd1-a4a5-f928c698752e',\n",
+ " 'session_key': 'ase_contract_triage_q_a_20260528-152318_f67b',\n",
+ " 'created_at': '2026-05-28T22:23:27.094Z',\n",
" 'type': 'agent_output',\n",
- " 'content': 'The term of this NDA is two years from the Effective Date, which is March 12, 2025. Yes, there is a survival period after termination; the confidentiality obligations survive for an additional three years after termination.'}]"
+ " 'content': 'The term of the NDA is two (2) years from the Effective Date. Yes, there is a survival period after termination; confidentiality obligations survive for an additional 3 years after termination.'}]"
]
},
"execution_count": 7,
@@ -748,8 +854,8 @@
"## Step 5: When to Reach for Steps (Decision Aid)\n",
"\n",
"- **Steps** — a workflow has genuinely distinct phases (classify, extract, flag) where each phase benefits from its own focused prompt and tool set, *and* you want the system (not the LLM) deciding which phase runs when. Today's notebook.\n",
- "- **Sub-agents** — the work needs its own *isolated* context (start fresh), its own model, or a heavyweight tool surface. Sub-agents do **not** share session history with the parent. See notebook 5.\n",
- "- **Skills** (notebook 12) — the agent should sometimes adopt a different *mindset* for the same job. Skills load instructions on demand within a single conversation; they don't change which phase runs.\n",
+ "- **Sub-agents** — the work needs its own *isolated* context (start fresh), its own model, or a heavyweight tool surface. Sub-agents do **not** share session history with the parent. See notebook 6.\n",
+ "- **Skills** (notebook 13) — the agent should sometimes adopt a different *mindset* for the same job. Skills load instructions on demand within a single conversation; they don't change which phase runs.\n",
"- **One big prompt** — the workflow really is one phase. If you're not branching on intermediate outputs and not changing tool access, you don't need steps.\n",
"\n",
"### Anti-patterns\n",
@@ -774,13 +880,20 @@
"cell_type": "code",
"execution_count": 8,
"id": "cell-21",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:23:27.097885Z",
+ "iopub.status.busy": "2026-05-28T22:23:27.097355Z",
+ "iopub.status.idle": "2026-05-28T22:23:28.974279Z",
+ "shell.execute_reply": "2026-05-28T22:23:28.973002Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Deleted Contract Triage agent: agt_contract_triage_d721\n"
+ "Deleted Contract Triage agent: agt_contract_triage_9594\n"
]
}
],
diff --git a/notebooks/api-examples/2-data-ingestion.ipynb b/notebooks/api-examples/2-data-ingestion.ipynb
index e8bdf80..3d55207 100644
--- a/notebooks/api-examples/2-data-ingestion.ipynb
+++ b/notebooks/api-examples/2-data-ingestion.ipynb
@@ -46,7 +46,14 @@
"cell_type": "code",
"execution_count": 1,
"id": "cell-4",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T21:48:30.944268Z",
+ "iopub.status.busy": "2026-05-28T21:48:30.943929Z",
+ "iopub.status.idle": "2026-05-28T21:48:30.994786Z",
+ "shell.execute_reply": "2026-05-28T21:48:30.994490Z"
+ }
+ },
"outputs": [],
"source": [
"import os\n",
@@ -100,7 +107,14 @@
"cell_type": "code",
"execution_count": 2,
"id": "cell-6",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T21:48:30.996717Z",
+ "iopub.status.busy": "2026-05-28T21:48:30.996515Z",
+ "iopub.status.idle": "2026-05-28T21:48:30.999550Z",
+ "shell.execute_reply": "2026-05-28T21:48:30.999309Z"
+ }
+ },
"outputs": [],
"source": [
"# Key research papers about RAG, LLMs, and retrieval\n",
@@ -199,7 +213,14 @@
"cell_type": "code",
"execution_count": 3,
"id": "06e7a1ad",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T21:48:31.000743Z",
+ "iopub.status.busy": "2026-05-28T21:48:31.000642Z",
+ "iopub.status.idle": "2026-05-28T21:48:51.286597Z",
+ "shell.execute_reply": "2026-05-28T21:48:51.285956Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -221,26 +242,117 @@
"cell_type": "code",
"execution_count": 4,
"id": "cell-7",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T21:48:51.290107Z",
+ "iopub.status.busy": "2026-05-28T21:48:51.289662Z",
+ "iopub.status.idle": "2026-05-28T21:56:05.222161Z",
+ "shell.execute_reply": "2026-05-28T21:56:05.220388Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Downloading gpt3-language-models.pdf...\n",
- " ✓ Successfully uploaded gpt3-language-models.pdf\n",
- "Downloading rag-retrieval-augmented-generation.pdf...\n",
- " ✓ Successfully uploaded rag-retrieval-augmented-generation.pdf\n",
- "Downloading attention-is-all-you-need.pdf...\n",
- " ✓ Successfully uploaded attention-is-all-you-need.pdf\n",
- "Downloading beir-retrieval-benchmark.pdf...\n",
- " ✓ Successfully uploaded beir-retrieval-benchmark.pdf\n",
- "Downloading dense-passage-retrieval.pdf...\n",
- " ✓ Successfully uploaded dense-passage-retrieval.pdf\n",
- "Downloading hallucination-detection-naacl.pdf...\n",
- " ✓ Successfully uploaded hallucination-detection-naacl.pdf\n",
- "Downloading retrieval-evaluation-metrics.pdf...\n",
- " ✓ Successfully uploaded retrieval-evaluation-metrics.pdf\n",
+ "Downloading gpt3-language-models.pdf...\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " ✓ Successfully uploaded gpt3-language-models.pdf\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Downloading rag-retrieval-augmented-generation.pdf...\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " ✓ Successfully uploaded rag-retrieval-augmented-generation.pdf\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Downloading attention-is-all-you-need.pdf...\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " ✓ Successfully uploaded attention-is-all-you-need.pdf\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Downloading beir-retrieval-benchmark.pdf...\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " ✓ Successfully uploaded beir-retrieval-benchmark.pdf\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Downloading dense-passage-retrieval.pdf...\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " ✓ Successfully uploaded dense-passage-retrieval.pdf\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Downloading hallucination-detection-naacl.pdf...\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " ✓ Successfully uploaded hallucination-detection-naacl.pdf\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Downloading retrieval-evaluation-metrics.pdf...\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " ✓ Successfully uploaded retrieval-evaluation-metrics.pdf\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
"\n",
"=== Upload Summary ===\n",
"Total: 7, Successful: 7, Failed: 0\n"
@@ -336,7 +448,14 @@
"cell_type": "code",
"execution_count": 5,
"id": "cell-9",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T21:56:05.226541Z",
+ "iopub.status.busy": "2026-05-28T21:56:05.226207Z",
+ "iopub.status.idle": "2026-05-28T21:56:05.329211Z",
+ "shell.execute_reply": "2026-05-28T21:56:05.328621Z"
+ }
+ },
"outputs": [],
"source": [
"# Install required libraries for web crawling\n",
@@ -358,7 +477,14 @@
"cell_type": "code",
"execution_count": 6,
"id": "cell-10",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T21:56:05.332867Z",
+ "iopub.status.busy": "2026-05-28T21:56:05.332316Z",
+ "iopub.status.idle": "2026-05-28T21:56:05.346525Z",
+ "shell.execute_reply": "2026-05-28T21:56:05.345790Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -600,7 +726,14 @@
"cell_type": "code",
"execution_count": 7,
"id": "run-crawler",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T21:56:05.348715Z",
+ "iopub.status.busy": "2026-05-28T21:56:05.348493Z",
+ "iopub.status.idle": "2026-05-28T22:00:36.545470Z",
+ "shell.execute_reply": "2026-05-28T22:00:36.544928Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -613,38 +746,194 @@
"\n",
"Starting documentation crawler...\n",
"Max pages: 300 | progress reported every 50 pages\n",
- "\n",
- " [50/300] 49 extracted, 1 skipped, 0 failed\n",
- " ✗ Fetch 404: https://docs.vectara.com/docs/api-reference/indexing-apis/indexing\n",
- " ✗ Fetch 404: https://docs.vectara.com/docs/api-reference/admin-apis/admin\n",
- " ✗ Fetch 404: https://docs.vectara.com/docs/agent-os/agent-tools-overview\n",
- " [100/300] 93 extracted, 4 skipped, 3 failed\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " [50/300] 50 extracted, 0 skipped, 0 failed\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " ✗ Fetch 404: https://docs.vectara.com/docs/api-reference/indexing-apis/indexing\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " ✗ Fetch 404: https://docs.vectara.com/docs/api-reference/admin-apis/admin\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " [100/300] 95 extracted, 3 skipped, 2 failed\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " ✗ Fetch 404: https://docs.vectara.com/docs/agent-os/agent-tools-overview\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
" ✗ Fetch 404: https://docs.vectara.com/docs/agent/agent-tools\n",
- " ✗ Fetch 404: https://docs.vectara.com/docs/docs/agent/sessions\n",
- " ✗ Fetch 404: https://docs.vectara.com/docs/agents/agent-tools-overview\n",
- " ✗ Fetch 404: https://docs.vectara.com/docs/rest-api/tools/test\n",
- " ✗ Fetch 404: https://docs.vectara.com/docs/generation/custom-prompt-templates-customizations\n",
- " ✗ Fetch 404: https://docs.vectara.com/docs/introduction\n",
- " ✗ Fetch 404: https://docs.vectara.com/docs/deploy-and-scale\n",
- " [150/300] 135 extracted, 5 skipped, 10 failed\n",
- " ✗ Fetch 404: https://docs.vectara.com/docs/prompts/docs/learn/grounded-generation/select-a-summarizer\n",
- " ✗ Fetch 404: https://docs.vectara.com/docs/security/authorization/vectara-personas-and-access-patterns\n",
- " ✗ Fetch 404: https://docs.vectara.com/docs/security/authentication/overview\n",
- " ✗ Fetch 404: https://docs.vectara.com/docs/security/authentication/oauth\n",
- " ✗ Fetch 404: https://docs.vectara.com/docs/on-prem/guides/vectara-okta-oidc\n",
- " ✗ Fetch 404: https://docs.vectara.com/docs/learn/recommendation-systems/recommender-overview\n",
- " ✗ Fetch 404: https://docs.vectara.com/docs/quickstart\n",
- " [200/300] 176 extracted, 7 skipped, 17 failed\n",
- " [250/300] 221 extracted, 12 skipped, 17 failed\n",
- " ✗ Fetch 404: https://docs.vectara.com/docs/learn/semantic-search/add-custom-dimensions\n",
- " ✗ Fetch 404: https://docs.vectara.com/docs/build/prepare-data/metadata-filters-overview\n",
- " ✗ Fetch 404: https://docs.vectara.com/docs/rest-api-queries\n",
- " [300/300] 261 extracted, 19 skipped, 20 failed\n",
+ " ✗ Fetch 404: https://docs.vectara.com/docs/docs/agent/sessions\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " ✗ Fetch 404: https://docs.vectara.com/docs/agents/agent-tools-overview\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " ✗ Fetch 404: https://docs.vectara.com/docs/rest-api/tools/test\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " ✗ Fetch 404: https://docs.vectara.com/docs/generation/custom-prompt-templates-customizations\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " [150/300] 138 extracted, 4 skipped, 8 failed\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " ✗ Fetch 404: https://docs.vectara.com/docs/rest-api/api-reference\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " ✗ Fetch 404: https://docs.vectara.com/docs/platform-architecture/vectara-vs-other-solutions\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " ✗ Fetch 404: https://docs.vectara.com/docs/build/prepare-data/metadata-filters-overview\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " ✗ Fetch 404: https://docs.vectara.com/docs/agents/subagent-tool\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " ✗ Fetch 404: https://docs.vectara.com/docs/prompts/docs/learn/grounded-generation/select-a-summarizer\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " ✗ Fetch 404: https://docs.vectara.com/docs/security/authorization/vectara-personas-and-access-patterns\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " ✗ Fetch 404: https://docs.vectara.com/docs/security/authentication/overview\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " ✗ Fetch 404: https://docs.vectara.com/docs/security/authentication/oauth\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " [200/300] 177 extracted, 7 skipped, 16 failed\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " ✗ Fetch 404: https://docs.vectara.com/docs/on-prem/guides/vectara-okta-oidc\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " ✗ Fetch 404: https://docs.vectara.com/docs/learn/recommendation-systems/recommender-overview\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " ✗ Fetch 404: https://docs.vectara.com/docs/quickstart\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " [250/300] 219 extracted, 12 skipped, 19 failed\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " ✗ Fetch 404: https://docs.vectara.com/docs/learn/semantic-search/add-custom-dimensions\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " [300/300] 268 extracted, 12 skipped, 20 failed\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
"\n",
"=== Crawling Complete ===\n",
"Pages visited: 300\n",
- "Documents scraped: 261\n",
- "Skipped: 19 Failed: 20\n",
+ "Documents scraped: 268\n",
+ "Skipped: 12 Failed: 20\n",
"\n",
"Sample pages:\n",
" - About Vectara (general)\n",
@@ -696,7 +985,14 @@
"cell_type": "code",
"execution_count": 8,
"id": "3847d051",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:00:36.549228Z",
+ "iopub.status.busy": "2026-05-28T22:00:36.548983Z",
+ "iopub.status.idle": "2026-05-28T22:03:47.034144Z",
+ "shell.execute_reply": "2026-05-28T22:03:47.033067Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -718,22 +1014,65 @@
"cell_type": "code",
"execution_count": 9,
"id": "cell-12",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:03:47.039744Z",
+ "iopub.status.busy": "2026-05-28T22:03:47.039316Z",
+ "iopub.status.idle": "2026-05-28T22:13:28.962838Z",
+ "shell.execute_reply": "2026-05-28T22:13:28.960758Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Indexing scraped documentation... | progress reported every 50 pages\n",
- "\n",
- " [50/261] 50 indexed, 0 failed\n",
- " [100/261] 100 indexed, 0 failed\n",
- " [150/261] 150 indexed, 0 failed\n",
- " [200/261] 200 indexed, 0 failed\n",
- " [250/261] 250 indexed, 0 failed\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " [50/268] 50 indexed, 0 failed\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " [100/268] 100 indexed, 0 failed\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " [150/268] 150 indexed, 0 failed\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " [200/268] 200 indexed, 0 failed\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " [250/268] 250 indexed, 0 failed\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
"\n",
"=== Indexing Summary ===\n",
- "Total: 261, Successful: 261, Failed: 0\n"
+ "Total: 268, Successful: 268, Failed: 0\n"
]
}
],
@@ -820,7 +1159,14 @@
"cell_type": "code",
"execution_count": 10,
"id": "verify-code-cell",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:13:28.968980Z",
+ "iopub.status.busy": "2026-05-28T22:13:28.968555Z",
+ "iopub.status.idle": "2026-05-28T22:13:30.688034Z",
+ "shell.execute_reply": "2026-05-28T22:13:30.687498Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -828,45 +1174,57 @@
"text": [
"=== AI Research Papers Corpus ===\n",
"Expected: 7 documents\n",
- "\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
"Actual: 7 documents indexed\n",
"\n",
"Indexed papers:\n",
- " • Attention Is All You Need\n",
- " • Efficiently Teaching an Effective Dense Retriever with Balanced Topic Aware Sampling\n",
" • Language Models are Few-Shot Learners\n",
+ " • Attention Is All You Need\n",
" • Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks\n",
+ " • BEIR: A Heterogeneous Benchmark for Zero-shot Evaluation of Information Retrieval Models\n",
+ " • Efficiently Teaching an Effective Dense Retriever with Balanced Topic Aware Sampling\n",
" • Hallucination Detection in RAG Systems\n",
" • Retrieval Evaluation Metrics and Methods\n",
- " • BEIR: A Heterogeneous Benchmark for Zero-shot Evaluation of Information Retrieval Models\n",
"\n",
"✓ All 7 papers successfully indexed\n",
"\n",
"\n",
"=== Vectara Documentation Corpus ===\n",
- "Expected: 261 documents\n",
- "\n",
- "Actual: 261 documents indexed\n",
+ "Expected: 268 documents\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Actual: 268 documents indexed\n",
"\n",
"Sample indexed documentation (showing first 10):\n",
- " • Data ingestion (topic: general, type: guide)\n",
- " • Metadata filters (topic: general, type: guide)\n",
- " • Concepts (topic: general, type: guide)\n",
- " • S3 (topic: general, type: guide)\n",
- " • Hallucination evaluation (topic: general, type: guide)\n",
- " • FAQ and Q&A matching (topic: general, type: guide)\n",
- " • Vectara Postman Collection (topic: general, type: guide)\n",
- " • Vectara and LlamaIndex (topic: indexing, type: guide)\n",
- " • Vectara and Airbyte (topic: general, type: guide)\n",
- " • Security (topic: general, type: guide)\n",
+ " • About Vectara (topic: general, type: guide)\n",
+ " • Vectara Python SDK (topic: general, type: guide)\n",
+ " • Getting started (topic: general, type: guide)\n",
+ " • Quickstart (topic: general, type: guide)\n",
+ " • Build with coding agents (topic: agents, type: guide)\n",
+ " • Try it live — the playground (topic: general, type: guide)\n",
+ " • The application layer (topic: general, type: guide)\n",
+ " • The platform stack (topic: general, type: guide)\n",
+ " • Knowledge (topic: general, type: guide)\n",
+ " • Context & memory (topic: general, type: guide)\n",
"\n",
- "✓ All 261 documentation pages successfully indexed\n",
+ "✓ All 268 documentation pages successfully indexed\n",
"\n",
"\n",
"=== Overall Indexing Summary ===\n",
"Research Papers: 7/7 indexed\n",
- "Documentation: 261/261 indexed\n",
- "Total: 268 documents across both corpora\n"
+ "Documentation: 268/268 indexed\n",
+ "Total: 275 documents across both corpora\n"
]
}
],
diff --git a/notebooks/api-examples/3-document-deletion.ipynb b/notebooks/api-examples/3-document-deletion.ipynb
new file mode 100644
index 0000000..03a10d9
--- /dev/null
+++ b/notebooks/api-examples/3-document-deletion.ipynb
@@ -0,0 +1,621 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "cell-0",
+ "metadata": {},
+ "source": [
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "md-2",
+ "metadata": {},
+ "source": [
+ "# Deleting Documents from a Corpus\n",
+ "\n",
+ "This notebook shows how to remove documents from a Vectara corpus. It's self-contained: it creates its own corpus, adds a few documents (some with metadata, some without), then demonstrates the three deletion APIs:\n",
+ "\n",
+ "- **Delete one document by ID** — `DELETE /v2/corpora/{corpus_key}/documents/{document_id}` (returns `204`).\n",
+ "- **Bulk delete by metadata or IDs** — `DELETE /v2/corpora/{corpus_key}/documents` with a `metadata_filter` (e.g. `doc.category = 'finance'`) and/or a comma-separated `document_ids` list. It runs asynchronously by default (returns `202` with a `job_id`); pass `async=false` to wait for the result (`200`).\n",
+ "- **Delete everything** — `POST /v2/corpora/{corpus_key}/reset` empties the corpus but keeps it (returns `204`).\n",
+ "\n",
+ "Because this notebook owns its corpus, none of the other notebooks in the series depend on it — you can run it any time."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "md-3",
+ "metadata": {},
+ "source": [
+ "## About Vectara\n",
+ "\n",
+ "[Vectara](https://vectara.com/) is an Agent Platform for trusted enterprise AI — a unified Agentic RAG platform with built-in retrieval, orchestration, and governance. See [Notebook 1](1-corpus-creation.ipynb) for the full overview of features and deployment options."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "md-4",
+ "metadata": {},
+ "source": [
+ "## Setup\n",
+ "\n",
+ "This notebook only needs a `VECTARA_API_KEY` environment variable set to your personal API key."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "code-5",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T23:28:07.527041Z",
+ "iopub.status.busy": "2026-05-28T23:28:07.526774Z",
+ "iopub.status.idle": "2026-05-28T23:28:07.592775Z",
+ "shell.execute_reply": "2026-05-28T23:28:07.592547Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import json\n",
+ "import requests\n",
+ "from time import sleep\n",
+ "\n",
+ "api_key = os.environ['VECTARA_API_KEY']\n",
+ "BASE_URL = \"https://api.vectara.io/v2\"\n",
+ "\n",
+ "# Common headers (Content-Type is needed for corpus creation and indexing;\n",
+ "# it's harmless on the GET/DELETE/reset calls).\n",
+ "headers = {\n",
+ " \"x-api-key\": api_key,\n",
+ " \"Content-Type\": \"application/json\",\n",
+ "}\n",
+ "\n",
+ "# Dedicated corpus for this tutorial.\n",
+ "CORPUS_KEY = \"tutorial-document-deletion\"\n",
+ "\n",
+ "\n",
+ "def list_documents(metadata_filter=None):\n",
+ " \"\"\"Return all documents in CORPUS_KEY, paging through the list endpoint.\n",
+ "\n",
+ " Pass ``metadata_filter`` (e.g. \"doc.category = 'finance'\") to list only the\n",
+ " documents that match — the list endpoint accepts the same filter syntax as\n",
+ " the delete and query APIs.\n",
+ " \"\"\"\n",
+ " documents = []\n",
+ " page_key = None\n",
+ " while True:\n",
+ " params = {\"limit\": 100}\n",
+ " if page_key:\n",
+ " params[\"page_key\"] = page_key\n",
+ " if metadata_filter:\n",
+ " params[\"metadata_filter\"] = metadata_filter\n",
+ " resp = requests.get(\n",
+ " f\"{BASE_URL}/corpora/{CORPUS_KEY}/documents\",\n",
+ " headers=headers,\n",
+ " params=params,\n",
+ " timeout=30,\n",
+ " )\n",
+ " resp.raise_for_status()\n",
+ " data = resp.json()\n",
+ " documents.extend(data.get(\"documents\", []))\n",
+ " page_key = data.get(\"metadata\", {}).get(\"page_key\")\n",
+ " if not page_key:\n",
+ " break\n",
+ " return documents\n",
+ "\n",
+ "\n",
+ "def wait_until_count(expected, metadata_filter=None, attempts=10, interval=1):\n",
+ " \"\"\"Re-list until the document count equals ``expected``, then return it.\n",
+ "\n",
+ " A delete can take a moment to be reflected in the list endpoint, so instead\n",
+ " of guessing a fixed sleep we poll: re-list up to ``attempts`` times,\n",
+ " ``interval`` seconds apart, and return as soon as the count matches (or once\n",
+ " the attempts run out). Returns the final document list.\n",
+ " \"\"\"\n",
+ " documents = list_documents(metadata_filter=metadata_filter)\n",
+ " for _ in range(attempts):\n",
+ " if len(documents) == expected:\n",
+ " break\n",
+ " sleep(interval)\n",
+ " documents = list_documents(metadata_filter=metadata_filter)\n",
+ " return documents"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "md-6",
+ "metadata": {},
+ "source": [
+ "## Step 1: Create the corpus\n",
+ "\n",
+ "We create a corpus with two document-level filter attributes, `category` and `year`, so we can later delete documents by metadata. Corpus creation is idempotent (a `409` means it already exists). We then reset it so the notebook starts from an empty corpus even on a re-run."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "code-7",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T23:28:07.594130Z",
+ "iopub.status.busy": "2026-05-28T23:28:07.594036Z",
+ "iopub.status.idle": "2026-05-28T23:28:22.108460Z",
+ "shell.execute_reply": "2026-05-28T23:28:22.107827Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "✓ Created corpus 'tutorial-document-deletion'\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " Reset status: 204\n"
+ ]
+ }
+ ],
+ "source": [
+ "corpus_config = {\n",
+ " \"key\": CORPUS_KEY,\n",
+ " \"name\": \"Document Deletion Demo\",\n",
+ " \"description\": \"Self-contained corpus for the document-deletion tutorial\",\n",
+ " \"encoder_name\": \"boomerang-2023-q3\",\n",
+ " \"filter_attributes\": [\n",
+ " {\n",
+ " \"name\": \"category\",\n",
+ " \"level\": \"document\",\n",
+ " \"description\": \"Document category (e.g., news, finance)\",\n",
+ " \"indexed\": True,\n",
+ " \"type\": \"text\",\n",
+ " },\n",
+ " {\n",
+ " \"name\": \"year\",\n",
+ " \"level\": \"document\",\n",
+ " \"description\": \"Publication year\",\n",
+ " \"indexed\": True,\n",
+ " \"type\": \"integer\",\n",
+ " },\n",
+ " ],\n",
+ "}\n",
+ "\n",
+ "resp = requests.post(f\"{BASE_URL}/corpora\", headers=headers, json=corpus_config, timeout=30)\n",
+ "if resp.status_code == 201:\n",
+ " print(f\"✓ Created corpus '{CORPUS_KEY}'\")\n",
+ "elif resp.status_code == 409:\n",
+ " print(f\"✓ Corpus '{CORPUS_KEY}' already exists — reusing it\")\n",
+ "else:\n",
+ " raise RuntimeError(f\"Error creating corpus: {resp.status_code} - {resp.text}\")\n",
+ "\n",
+ "# Start from a clean slate so counts are predictable on re-runs.\n",
+ "reset = requests.post(f\"{BASE_URL}/corpora/{CORPUS_KEY}/reset\", headers=headers, timeout=60)\n",
+ "print(f\" Reset status: {reset.status_code}\") # 204 = emptied"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "md-8",
+ "metadata": {},
+ "source": [
+ "## Step 2: Add a few documents\n",
+ "\n",
+ "We index six small documents with the Core Indexing API. Four carry `category` and `year` metadata; two are left without any metadata to show that a metadata filter only ever touches the documents that actually have the matching field."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "code-9",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T23:28:22.111685Z",
+ "iopub.status.busy": "2026-05-28T23:28:22.111423Z",
+ "iopub.status.idle": "2026-05-28T23:28:30.883146Z",
+ "shell.execute_reply": "2026-05-28T23:28:30.882579Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " ✓ press-2023-vectara (201)\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " ✓ press-2024-launch (201)\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " ✓ finance-2023-report (201)\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " ✓ finance-2024-earnings (201)\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " ✓ note-alpha (201)\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " ✓ note-beta (201)\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Documents in corpus: 6\n"
+ ]
+ }
+ ],
+ "source": [
+ "documents = [\n",
+ " {\n",
+ " \"id\": \"press-2023-vectara\",\n",
+ " \"type\": \"core\",\n",
+ " \"document_parts\": [{\"text\": \"Vectara announced a new release in 2023.\"}],\n",
+ " \"metadata\": {\"category\": \"news\", \"year\": 2023},\n",
+ " },\n",
+ " {\n",
+ " \"id\": \"press-2024-launch\",\n",
+ " \"type\": \"core\",\n",
+ " \"document_parts\": [{\"text\": \"A major product launch happened in 2024.\"}],\n",
+ " \"metadata\": {\"category\": \"news\", \"year\": 2024},\n",
+ " },\n",
+ " {\n",
+ " \"id\": \"finance-2023-report\",\n",
+ " \"type\": \"core\",\n",
+ " \"document_parts\": [{\"text\": \"The 2023 annual financial report and outlook.\"}],\n",
+ " \"metadata\": {\"category\": \"finance\", \"year\": 2023},\n",
+ " },\n",
+ " {\n",
+ " \"id\": \"finance-2024-earnings\",\n",
+ " \"type\": \"core\",\n",
+ " \"document_parts\": [{\"text\": \"Quarterly earnings figures for 2024.\"}],\n",
+ " \"metadata\": {\"category\": \"finance\", \"year\": 2024},\n",
+ " },\n",
+ " # Two documents without any metadata.\n",
+ " {\n",
+ " \"id\": \"note-alpha\",\n",
+ " \"type\": \"core\",\n",
+ " \"document_parts\": [{\"text\": \"An untagged scratch note about nothing in particular.\"}],\n",
+ " },\n",
+ " {\n",
+ " \"id\": \"note-beta\",\n",
+ " \"type\": \"core\",\n",
+ " \"document_parts\": [{\"text\": \"Another untagged note kept for reference.\"}],\n",
+ " },\n",
+ "]\n",
+ "\n",
+ "for doc in documents:\n",
+ " resp = requests.post(\n",
+ " f\"{BASE_URL}/corpora/{CORPUS_KEY}/documents\",\n",
+ " headers=headers,\n",
+ " json=doc,\n",
+ " timeout=30,\n",
+ " )\n",
+ " ok = resp.status_code in (200, 201)\n",
+ " print(f\" {'✓' if ok else '✗'} {doc['id']} ({resp.status_code})\")\n",
+ "\n",
+ "# A freshly-indexed document is searchable right away but can take a moment to\n",
+ "# show up in the list endpoint, so wait until all of them are listed.\n",
+ "indexed = wait_until_count(len(documents))\n",
+ "print(f\"\\nDocuments in corpus: {len(indexed)}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "md-10",
+ "metadata": {},
+ "source": [
+ "## Inspect the documents\n",
+ "\n",
+ "List the documents so we can see their IDs and metadata. The document ID is what you pass to the single-document delete endpoint."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "code-11",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T23:28:30.885336Z",
+ "iopub.status.busy": "2026-05-28T23:28:30.885137Z",
+ "iopub.status.idle": "2026-05-28T23:28:31.264904Z",
+ "shell.execute_reply": "2026-05-28T23:28:31.264373Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "6 documents in 'tutorial-document-deletion':\n",
+ "\n",
+ " • id=press-2023-vectara category=news year=2023\n",
+ " • id=note-alpha category=- year=-\n",
+ " • id=note-beta category=- year=-\n",
+ " • id=press-2024-launch category=news year=2024\n",
+ " • id=finance-2023-report category=finance year=2023\n",
+ " • id=finance-2024-earnings category=finance year=2024\n"
+ ]
+ }
+ ],
+ "source": [
+ "docs = wait_until_count(len(documents))\n",
+ "print(f\"{len(docs)} documents in '{CORPUS_KEY}':\\n\")\n",
+ "for d in docs:\n",
+ " md_ = d.get(\"metadata\", {})\n",
+ " print(f\" • id={d['id']:24} category={md_.get('category', '-'):8} year={md_.get('year', '-')}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "md-12",
+ "metadata": {},
+ "source": [
+ "### 1. Delete a specific document by ID\n",
+ "\n",
+ "We delete `note-alpha` with the single-document delete endpoint. To remove several known IDs in one call, use the bulk endpoint with `document_ids=id1,id2,...` instead."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "code-13",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T23:28:31.267809Z",
+ "iopub.status.busy": "2026-05-28T23:28:31.267533Z",
+ "iopub.status.idle": "2026-05-28T23:28:32.153575Z",
+ "shell.execute_reply": "2026-05-28T23:28:32.153089Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Delete 'note-alpha' status: 204\n",
+ "Documents now: 5 (was 6)\n",
+ "✓ 'note-alpha' is gone\n"
+ ]
+ }
+ ],
+ "source": [
+ "target_id = \"note-alpha\"\n",
+ "resp = requests.delete(\n",
+ " f\"{BASE_URL}/corpora/{CORPUS_KEY}/documents/{target_id}\",\n",
+ " headers=headers,\n",
+ " timeout=30,\n",
+ ")\n",
+ "print(f\"Delete '{target_id}' status: {resp.status_code}\") # 204 = deleted\n",
+ "\n",
+ "remaining = wait_until_count(len(docs) - 1)\n",
+ "print(f\"Documents now: {len(remaining)} (was {len(docs)})\")\n",
+ "assert target_id not in {d['id'] for d in remaining}, \"document was not deleted\"\n",
+ "print(f\"✓ '{target_id}' is gone\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "md-14",
+ "metadata": {},
+ "source": [
+ "### 2. Delete documents by metadata filter\n",
+ "\n",
+ "The bulk delete endpoint accepts a `metadata_filter` using the same `doc.` syntax as a query filter. Here we delete every document whose `category` is `finance`. We pass `async=false` so the call waits for the deletion and returns the result instead of a `job_id` to poll.\n",
+ "\n",
+ "Note that the two untagged notes have no `category` field, so the filter never matches them."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "code-15",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T23:28:32.156067Z",
+ "iopub.status.busy": "2026-05-28T23:28:32.155688Z",
+ "iopub.status.idle": "2026-05-28T23:28:35.039705Z",
+ "shell.execute_reply": "2026-05-28T23:28:35.038959Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "2 documents match doc.category = 'finance':\n",
+ " • finance-2023-report\n",
+ " • finance-2024-earnings\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Bulk delete status: 200\n",
+ "{\n",
+ " \"response_type\": \"success\",\n",
+ " \"job_id\": \"job_SDIzYktHMzNHMlJpZkzTS1pH6sgmcyW5XtEaReE/K9iMZg==\",\n",
+ " \"cutoff_timestamp\": \"2026-05-28T23:28:32.552Z\",\n",
+ " \"deleted_count\": 2,\n",
+ " \"skipped_count\": 0\n",
+ "}\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Matching 'doc.category = 'finance'' after delete: 0\n",
+ "Total documents now: 3 (the untagged notes are untouched)\n"
+ ]
+ }
+ ],
+ "source": [
+ "metadata_filter = \"doc.category = 'finance'\"\n",
+ "\n",
+ "# Show which documents match before deleting.\n",
+ "matches = list_documents(metadata_filter=metadata_filter)\n",
+ "print(f\"{len(matches)} documents match {metadata_filter}:\")\n",
+ "for d in matches:\n",
+ " print(f\" • {d['id']}\")\n",
+ "\n",
+ "# Bulk delete them. async=false -> wait for completion and return results (200)\n",
+ "# instead of returning a job_id immediately (202).\n",
+ "resp = requests.delete(\n",
+ " f\"{BASE_URL}/corpora/{CORPUS_KEY}/documents\",\n",
+ " headers=headers,\n",
+ " params={\"metadata_filter\": metadata_filter, \"async\": \"false\"},\n",
+ " timeout=120,\n",
+ ")\n",
+ "print(f\"\\nBulk delete status: {resp.status_code}\")\n",
+ "try:\n",
+ " print(json.dumps(resp.json(), indent=2))\n",
+ "except ValueError:\n",
+ " pass # empty body\n",
+ "\n",
+ "# The list endpoint can lag a moment behind the delete, so poll until the\n",
+ "# filtered set is empty rather than guessing a fixed wait.\n",
+ "after = wait_until_count(0, metadata_filter=metadata_filter)\n",
+ "total = list_documents()\n",
+ "print(f\"\\nMatching '{metadata_filter}' after delete: {len(after)}\")\n",
+ "print(f\"Total documents now: {len(total)} (the untagged notes are untouched)\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "md-16",
+ "metadata": {},
+ "source": [
+ "### 3. Delete all documents (reset the corpus)\n",
+ "\n",
+ "To remove everything at once, reset the corpus. This deletes all documents and their data but keeps the corpus itself along with its configuration (encoder, filter attributes)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "code-17",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T23:28:35.042186Z",
+ "iopub.status.busy": "2026-05-28T23:28:35.041945Z",
+ "iopub.status.idle": "2026-05-28T23:28:43.595662Z",
+ "shell.execute_reply": "2026-05-28T23:28:43.594953Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Reset status: 204\n",
+ "Documents after reset: 0\n",
+ "✓ Corpus is empty\n"
+ ]
+ }
+ ],
+ "source": [
+ "resp = requests.post(\n",
+ " f\"{BASE_URL}/corpora/{CORPUS_KEY}/reset\",\n",
+ " headers=headers,\n",
+ " timeout=60,\n",
+ ")\n",
+ "print(f\"Reset status: {resp.status_code}\") # 204 = corpus emptied\n",
+ "\n",
+ "# Poll until the corpus reports empty (reset can take a moment to reflect).\n",
+ "remaining = wait_until_count(0)\n",
+ "print(f\"Documents after reset: {len(remaining)}\")\n",
+ "assert len(remaining) == 0, f\"expected empty corpus, found {len(remaining)}\"\n",
+ "print(\"✓ Corpus is empty\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "md-18",
+ "metadata": {},
+ "source": [
+ "### Clean up (optional)\n",
+ "\n",
+ "The corpus is now empty but still exists. If you don't need it anymore, delete the corpus itself."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "code-19",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T23:28:43.598394Z",
+ "iopub.status.busy": "2026-05-28T23:28:43.597935Z",
+ "iopub.status.idle": "2026-05-28T23:28:47.359885Z",
+ "shell.execute_reply": "2026-05-28T23:28:47.359388Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Delete corpus status: 204\n"
+ ]
+ }
+ ],
+ "source": [
+ "resp = requests.delete(f\"{BASE_URL}/corpora/{CORPUS_KEY}\", headers=headers, timeout=30)\n",
+ "print(f\"Delete corpus status: {resp.status_code}\") # 204 = deleted"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.2"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/api-examples/4-agent-api.ipynb b/notebooks/api-examples/4-agent-api.ipynb
deleted file mode 100644
index d15c1bb..0000000
--- a/notebooks/api-examples/4-agent-api.ipynb
+++ /dev/null
@@ -1,657 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "
"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Vectara Agent API Examples\n",
- "\n",
- "This notebook demonstrates the basics of how to use Vectara's Agent REST APIs directly to create and interact with AI agents.\n",
- "\n",
- "You'll learn how to:\n",
- "1. Create an agent with custom instructions\n",
- "2. Create agent sessions for conversations\n",
- "3. Send messages to agents and get responses\n",
- "4. Use streaming for real-time responses\n",
- "5. Manage conversation history\n",
- "6. Work with tools and tool servers"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## About Vectara\n",
- "\n",
- "[Vectara](https://vectara.com/) is an Agent Platform for trusted enterprise AI — a unified Agentic RAG platform with built-in retrieval, orchestration, and governance. See [Notebook 1](1-corpus-creation.ipynb) for the full overview of features and deployment options."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Getting Started\n",
- "\n",
- "This notebook assumes you've completed Notebooks 1-2, and potentially 3:\n",
- "- Notebook 1: Created two corpora (ai-research-papers and vectara-docs)\n",
- "- Notebook 2: Ingested AI research papers and Vectara documentation\n",
- "- Notebook 3: Queried the data with various techniques\n",
- "\n",
- "Now we'll create agents that can autonomously search and reason across this data."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Setup"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m26.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m26.1.1\u001b[0m\n",
- "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
- ]
- }
- ],
- "source": [
- "!pip install -q sseclient-py"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [],
- "source": [
- "import os\n",
- "import requests\n",
- "import json\n",
- "import uuid\n",
- "from datetime import datetime\n",
- "\n",
- "# Get credentials from environment variables\n",
- "api_key = os.environ['VECTARA_API_KEY']\n",
- "\n",
- "# Get corpus keys from environment (from Notebook 1)\n",
- "research_corpus_key = 'tutorial-ai-research-papers'\n",
- "docs_corpus_key = 'tutorial-vectara-docs'\n",
- "\n",
- "# Base API URL\n",
- "BASE_URL = \"https://api.vectara.io/v2\"\n",
- "\n",
- "# Common headers\n",
- "headers = {\n",
- " \"x-api-key\": api_key,\n",
- " \"Content-Type\": \"application/json\"\n",
- "}"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Step 1: Create a Basic Agent\n",
- "\n",
- "Create an agent with custom instructions that can search your corpus:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Checking if agent 'RAG Research Assistant' already exists...\n",
- "✓ Agent already exists!\n",
- " Agent Key: agt_rag_research_assistant_4627\n",
- " Agent Name: RAG Research Assistant\n",
- " Steps: ['first_step']\n"
- ]
- }
- ],
- "source": [
- "# Define agent configuration - this agent can access both corpora\n",
- "# Agent structure uses steps with instructions and tool configurations\n",
- "agent_name = \"RAG Research Assistant\"\n",
- "agent_config = {\n",
- " \"name\": agent_name,\n",
- " \"description\": \"Agent that can answer questions about RAG, embeddings, and retrieval from both research papers and documentation\",\n",
- " \"model\": { \"name\": \"gpt-4o\" },\n",
- " \"first_step_name\": \"main\",\n",
- " \"steps\": {\n",
- " \"main\": {\n",
- " \"instructions\": [\n",
- " {\n",
- " \"type\": \"inline\",\n",
- " \"name\": \"first set of instructions\",\n",
- " \"template\": \"\"\"\n",
- "You are an expert AI research assistant specializing in Retrieval Augmented Generation and AI Agents. \n",
- "You have access to both academic research papers and Vectara's technical documentation. \n",
- "Provide clear, accurate answers with citations. \n",
- "When answering, combine theoretical insights from research with practical implementation guidance from documentation.\n",
- " \"\"\"\n",
- " }\n",
- " ],\n",
- " \"output_parser\": {\"type\": \"default\"}\n",
- " }\n",
- " },\n",
- " \n",
- " \"tool_configurations\": {\n",
- " \"research_paper_search\": {\n",
- " \"type\": \"corpora_search\",\n",
- " \"query_configuration\": {\n",
- " \"search\": {\n",
- " \"corpora\": [\n",
- " {\n",
- " \"corpus_key\": research_corpus_key\n",
- " }\n",
- " ]\n",
- " },\n",
- " \"generation\": {\n",
- " \"generation_preset_name\": \"vectara-summary-table-md-query-ext-jan-2025-gpt-4o\",\n",
- " \"model_parameters\": {\n",
- " \"llm_name\": \"gpt-4o\",\n",
- " \"temperature\": 0.0\n",
- " }\n",
- " }\n",
- " }\n",
- " },\n",
- " \"vectara_doc_search\": {\n",
- " \"type\": \"corpora_search\",\n",
- " \"query_configuration\": {\n",
- " \"search\": {\n",
- " \"corpora\": [\n",
- " {\n",
- " \"corpus_key\": docs_corpus_key\n",
- " }\n",
- " ]\n",
- " },\n",
- " \"generation\": {\n",
- " \"generation_preset_name\": \"vectara-summary-table-md-query-ext-jan-2025-gpt-4o\",\n",
- " \"model_parameters\": {\n",
- " \"llm_name\": \"gpt-4o\",\n",
- " \"temperature\": 0.0\n",
- " }\n",
- " }\n",
- " }\n",
- " }\n",
- "\n",
- " }\n",
- "}\n",
- "\n",
- "# Inlined here to show the pagination pattern explicitly; notebooks 5+ import\n",
- "# the same helper as vectara_utils.find_agents_by_name.\n",
- "def find_agent_by_name(name):\n",
- " \"\"\"Find an agent by name, handling pagination.\"\"\"\n",
- " page_key = None\n",
- " while True:\n",
- " params = {'limit': 100}\n",
- " if page_key:\n",
- " params['page_key'] = page_key\n",
- " resp = requests.get(f\"{BASE_URL}/agents\", headers=headers, params=params)\n",
- " if resp.status_code != 200:\n",
- " break\n",
- " data = resp.json()\n",
- " for agent in data.get('agents', []):\n",
- " if agent.get('name') == name:\n",
- " return agent\n",
- " page_key = data.get('metadata', {}).get('page_key')\n",
- " if not page_key:\n",
- " break\n",
- " return None\n",
- "\n",
- "# Check if agent already exists\n",
- "print(f\"Checking if agent '{agent_name}' already exists...\")\n",
- "agent_key = None\n",
- "existing = find_agent_by_name(agent_name)\n",
- "if existing:\n",
- " agent_key = existing['key']\n",
- " print(f\"✓ Agent already exists!\")\n",
- " print(f\" Agent Key: {agent_key}\")\n",
- " print(f\" Agent Name: {existing['name']}\")\n",
- " print(f\" Steps: {list(existing.get('steps', {}).keys())}\")\n",
- "\n",
- "# Create the agent only if it doesn't exist\n",
- "if not agent_key:\n",
- " print(f\"Creating new agent '{agent_name}'...\")\n",
- " response = requests.post(f\"{BASE_URL}/agents\", headers=headers, json=agent_config)\n",
- " \n",
- " print(f\"Status Code: {response.status_code}\")\n",
- " if response.status_code == 201:\n",
- " agent_data = response.json()\n",
- " agent_key = agent_data[\"key\"]\n",
- " print(f\"✓ Agent Created!\")\n",
- " print(f\" Agent Key: {agent_key}\")\n",
- " print(f\" Agent Name: {agent_data['name']}\")\n",
- " print(f\" Steps: {list(agent_data.get('steps', {}).keys())}\")\n",
- " elif response.status_code == 409:\n",
- " # Agent was created between our check and create call; look it up again\n",
- " existing = find_agent_by_name(agent_name)\n",
- " if existing:\n",
- " agent_key = existing['key']\n",
- " print(f\"✓ Agent already exists!\")\n",
- " print(f\" Agent Key: {agent_key}\")\n",
- " else:\n",
- " print(f\"Error: Agent exists per API but could not be found in listing\")\n",
- " else:\n",
- " print(f\"Error: {response.text}\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Step 2: Create an Agent Session\n",
- "\n",
- "Sessions maintain conversation context across multiple turns:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Status Code: 201\n",
- "✓ Session Created!\n",
- " Session Name: Technical Support Chat 20260506-061509\n",
- " Session Key: ase_technical_support_chat_20260506-061509_096c\n"
- ]
- }
- ],
- "source": [
- "# Create a new session with a unique name to allow reruns\n",
- "session_name = f\"Technical Support Chat {datetime.now().strftime('%Y%m%d-%H%M%S')}\"\n",
- "session_config = {\n",
- " \"name\": session_name,\n",
- " \"metadata\": {\n",
- " \"user_type\": \"developer\",\n",
- " \"session_purpose\": \"api_questions\"\n",
- " }\n",
- "}\n",
- "\n",
- "url = f\"{BASE_URL}/agents/{agent_key}/sessions\"\n",
- "response = requests.post(url, headers=headers, json=session_config)\n",
- "\n",
- "print(f\"Status Code: {response.status_code}\")\n",
- "if response.status_code == 201:\n",
- " session_data = response.json()\n",
- " session_key = session_data[\"key\"]\n",
- " print(f\"✓ Session Created!\")\n",
- " print(f\" Session Name: {session_name}\")\n",
- " print(f\" Session Key: {session_key}\")\n",
- "else:\n",
- " print(f\"Error: {response.text}\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Step 3: Send Messages to the Agent\n",
- "\n",
- "Send a message and get a response:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "Agent Response:\n",
- "Retrieval-Augmented Generation (RAG) is a technique that enhances language generation models by integrating retrieval mechanisms. These models combine pre-trained parametric memory, such as neural networks, with non-parametric memory, like external data sources, to improve the quality and factual accuracy of generated content. RAG is particularly valuable in knowledge-intensive NLP tasks, where access to accurate and up-to-date information is crucial [vectara_1].\n",
- "\n",
- "To implement RAG using Vectara, you can utilize their Query APIs, which support data searching and allow for AI-powered summary generation. Vectara offers three types of queries catering to various search needs, suitable for applications such as enterprise search, chatbots, or building knowledge bases [vectara_2]. Additionally, Vectara provides a Custom RAG Prompt Engine, allowing developers to customize prompt templates with metadata, enhancing the generation process [vectara_4]. This approach can significantly improve the reliability and trustworthiness of AI models by grounding generated responses in actual data [vectara_3].\n"
- ]
- }
- ],
- "source": [
- "# Send a message to the agent\n",
- "# The correct format uses a messages array with message objects\n",
- "message_data = {\n",
- " \"messages\": [\n",
- " {\n",
- " \"type\": \"text\",\n",
- " \"content\": \"What is retrieval augmented generation and how can I implement it with Vectara?\"\n",
- " }\n",
- " ],\n",
- " \"stream_response\": False\n",
- "}\n",
- "\n",
- "url = f\"{BASE_URL}/agents/{agent_key}/sessions/{session_key}/events\"\n",
- "response = requests.post(url, headers=headers, json=message_data)\n",
- "\n",
- "if response.status_code == 201:\n",
- " event_data = response.json()\n",
- " print(f\"\\nAgent Response:\")\n",
- " # The response typically contains the assistant's message in the events\n",
- " if 'events' in event_data:\n",
- " for event in event_data['events']:\n",
- " if event.get('type') == 'agent_output':\n",
- " print(event.get('content', 'No content'))\n",
- " else:\n",
- " print(event_data)\n",
- "else:\n",
- " print(f\"Error: {response.text}\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Step 4: Multi-Turn Conversation\n",
- "\n",
- "The agent maintains conversation context automatically:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "User: What is hybrid search?\n",
- "\n",
- "Agent Response:\n",
- "Hybrid search is a method that integrates both semantic and lexical searching techniques to enhance the accuracy of information retrieval systems. This approach is particularly useful in scenarios where it is important to preserve exact terms, such as SKUs, part numbers, and internal identifiers. By combining these techniques, hybrid search can leverage the strengths of both neural search, which understands context and semantics, and traditional keyword search, which ensures the retrieval of specific terms or phrases [vectara_1].\n",
- "\n",
- "In Vectara's implementation, hybrid search achieves improved retrieval accuracy by balancing these two methods. This is done by adjusting a parameter called the Lambda option, which allows users to finely tune the balance between purely neural and purely lexical search approaches. The lexical interpolation value can be set between 0.0 (purely neural) and 1.0 (purely lexical), with optimal results often found between 0.01 and 0.1 [vectara_2][vectara_3].\n",
- "\n",
- "================================================================================\n",
- "\n"
- ]
- }
- ],
- "source": [
- "# First message\n",
- "message_1 = {\n",
- " \"messages\": [\n",
- " {\n",
- " \"type\": \"text\",\n",
- " \"content\": \"What is hybrid search?\"\n",
- " }\n",
- " ],\n",
- " \"stream_response\": False\n",
- "}\n",
- "\n",
- "url = f\"{BASE_URL}/agents/{agent_key}/sessions/{session_key}/events\"\n",
- "response = requests.post(url, headers=headers, json=message_1)\n",
- "\n",
- "print(\"User: What is hybrid search?\")\n",
- "\n",
- "if response.status_code == 201:\n",
- " event_data = response.json()\n",
- " print(f\"\\nAgent Response:\")\n",
- " if 'events' in event_data:\n",
- " for event in event_data['events']:\n",
- " if event.get('type') == 'agent_output':\n",
- " print(event.get('content', 'No content'))\n",
- " else:\n",
- " print(event_data)\n",
- "else:\n",
- " print(f\"Error: {response.text}\")\n",
- " \n",
- "print(\"\\n\" + \"=\"*80 + \"\\n\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "User: What are its main benefits?\n",
- "\n",
- "Agent Response:\n",
- "The main benefits of hybrid search include:\n",
- "\n",
- "1. **Enhanced Precision and Recall**: By combining semantic and lexical methods, hybrid search can improve both precision and recall in information retrieval, effectively capturing the context of semantic search while ensuring the inclusion of specific keywords or terms [vectara_1].\n",
- "\n",
- "2. **Flexibility in Retrieval**: Hybrid search offers flexibility in adjusting the emphasis between neural understanding and keyword specificity, allowing for tailored search experiences that meet different user needs or application requirements [vectara_2].\n",
- "\n",
- "3. **Improved Handling of Exact Matches**: It excels in cases where exact matches are crucial, such as with product codes, part numbers, and other identifiers, ensuring these elements are not overlooked in the retrieval process [vectara_1].\n",
- "\n",
- "4. **Balance Between Contextual Understanding and Specificity**: By adjusting parameters like the lexical interpolation value, hybrid search provides a balanced approach that can understand and process the depth and context of queries while also retrieving specific keyword information [vectara_3].\n",
- "\n",
- "5. **Optimized for Diverse Application Scenarios**: This approach is particularly beneficial in sectors that require highly accurate and contextually relevant search results, such as e-commerce (for SKU searches) and enterprise applications (for internal data retrieval) [vectara_1][vectara_3].\n"
- ]
- }
- ],
- "source": [
- "# Follow-up message (agent remembers context)\n",
- "message_2 = {\n",
- " \"messages\": [\n",
- " {\n",
- " \"type\": \"text\",\n",
- " \"content\": \"What are its main benefits?\"\n",
- " }\n",
- " ],\n",
- " \"stream_response\": False\n",
- "}\n",
- "\n",
- "response = requests.post(url, headers=headers, json=message_2)\n",
- "\n",
- "print(\"User: What are its main benefits?\")\n",
- "if response.status_code == 201:\n",
- " event_data = response.json()\n",
- " print(f\"\\nAgent Response:\")\n",
- " if 'events' in event_data:\n",
- " for event in event_data['events']:\n",
- " if event.get('type') == 'agent_output':\n",
- " print(event.get('content', 'No content'))\n",
- " else:\n",
- " print(event_data)\n",
- "else:\n",
- " print(f\"Error: {response.text}\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "User: Can you give me an example?\n",
- "\n",
- "Agent Response:\n",
- "Certainly! Here’s a simple example to illustrate how hybrid search functions:\n",
- "\n",
- "Imagine you are searching for a specific type of electronic component in an enterprise inventory system:\n",
- "\n",
- "**Search Query**: \"4GB RAM DDR4 SKU12345\"\n",
- "\n",
- "**Pure Semantic Search**:\n",
- "- Focuses on understanding the context and relation between the terms \"4GB RAM\" and \"DDR4\".\n",
- "- It might return results related to various RAM specifications and types but may overlook the exact SKU identifier.\n",
- "\n",
- "**Pure Lexical (Keyword) Search**:\n",
- "- Primarily looks for the exact match of the terms in the query including \"SKU12345\".\n",
- "- It ensures that only entries containing this specific SKU are returned, potentially missing context where similar items exist but do not share the SKU identifier.\n",
- "\n",
- "**Hybrid Search**:\n",
- "- Combines both approaches: it comprehends the contextual relevance of \"4GB RAM\" and \"DDR4\" while ensuring the exact match of \"SKU12345\" is considered.\n",
- "- The result is a more comprehensive search return that not only targets the exact product you might be looking for but also presents related options with similar specifications, enhancing decision-making.\n",
- "\n",
- "This balanced approach ensures that you get the most relevant results, capturing both the nuanced details and the specific needs of your query.\n"
- ]
- }
- ],
- "source": [
- "# Another follow-up\n",
- "message_3 = {\n",
- " \"messages\": [\n",
- " {\n",
- " \"type\": \"text\",\n",
- " \"content\": \"Can you give me an example?\"\n",
- " }\n",
- " ],\n",
- " \"stream_response\": False\n",
- "}\n",
- "\n",
- "response = requests.post(url, headers=headers, json=message_3)\n",
- "\n",
- "print(\"User: Can you give me an example?\")\n",
- "if response.status_code == 201:\n",
- " event_data = response.json()\n",
- " print(f\"\\nAgent Response:\")\n",
- " # The response typically contains the assistant's message in the events\n",
- " if 'events' in event_data:\n",
- " for event in event_data['events']:\n",
- " if event.get('type') == 'agent_output':\n",
- " print(event.get('content', 'No content'))\n",
- " else:\n",
- " print(event_data)\n",
- "else:\n",
- " print(f\"Error: {response.text}\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Step 5: Streaming Responses\n",
- "\n",
- "Vectara's Agent API supports Server-Sent Events (SSE) for streaming responses in real time. Instead of waiting for the complete response, you can process text chunks, tool calls, and thinking events as they arrive. Set `stream_response: True` in your message payload to enable streaming."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Streaming response:\n",
- "\n",
- "Embeddings in retrieval systems are crucial for transforming text data into a dense, numerical format that can be easily processed by machine learning algorithms. Here's a brief explanation of how embeddings work in these systems:\n",
- "\n",
- "1. **Transformation of Text to Vectors**: Embeddings convert words or phrases into fixed-sized vector representations. These vectors are high-dimensional, capturing semantic meanings and relationships between words beyond their literal text form.\n",
- "\n",
- "2. **Semantic Understanding**: Unlike traditional keyword matching, embeddings enable the retrieval system to understand the semantic similarity between queries and documents. For instance, embeddings can recognize that \"car\" and \"automobile\" are similar concepts even if the exact words don’t appear in both the query and the document.\n",
- "\n",
- "3. **Efficient Similarity Matching**: By representing both queries and documents as vectors in the same space, the retrieval system can efficiently calculate similarities between them using mathematical operations like cosine similarity. This enables fast and accurate retrieval based on conceptual similarity.\n",
- "\n",
- "4. **Contextual Sensitivity**: Modern embeddings, often generated by deep learning models like BERT, take into account the context in which words appear, allowing retrieval systems to distinguish between different meanings of the same word depending on its usage.\n",
- "\n",
- "5. **Scalable Indexing and Retrieval**: Once documents and queries are transformed into embeddings, indexing the data becomes more scalable, and the retrieval process can quickly traverse the embeddings space to find relevant documents.\n",
- "\n",
- "Overall, embeddings enhance retrieval systems by allowing them to perform contextually aware searches, improving both the relevance and accuracy of returned results.\n",
- "\n",
- "--- Stream complete ---\n"
- ]
- }
- ],
- "source": [
- "import sseclient\n",
- "\n",
- "# Send a message with streaming enabled\n",
- "message_data = {\n",
- " \"messages\": [\n",
- " {\n",
- " \"type\": \"text\",\n",
- " \"content\": \"Briefly explain how embeddings work in retrieval systems.\"\n",
- " }\n",
- " ],\n",
- " \"stream_response\": True\n",
- "}\n",
- "\n",
- "url = f\"{BASE_URL}/agents/{agent_key}/sessions/{session_key}/events\"\n",
- "response = requests.post(url, headers=headers, json=message_data, stream=True)\n",
- "\n",
- "print(\"Streaming response:\\n\")\n",
- "\n",
- "# Streaming endpoints return 200 OK (not 201 Created like the non-streaming create-event calls above)\n",
- "if response.ok:\n",
- " client = sseclient.SSEClient(response)\n",
- " for sse_event in client.events():\n",
- " try:\n",
- " event = json.loads(sse_event.data)\n",
- " except json.JSONDecodeError:\n",
- " continue\n",
- " event_type = event.get(\"type\", \"\")\n",
- "\n",
- " if event_type == \"streaming_agent_output\":\n",
- " # Print each text chunk as it arrives\n",
- " print(event.get(\"content\", \"\"), end=\"\", flush=True)\n",
- " elif event_type == \"streaming_agent_output_end\":\n",
- " print(\"\\n\\n--- Stream complete ---\")\n",
- " elif event_type == \"tool_input\":\n",
- " tool = event.get(\"tool_configuration_name\", \"unknown\")\n",
- " print(f\"[Calling tool: {tool}]\")\n",
- " elif event_type == \"tool_output\":\n",
- " tool = event.get(\"tool_configuration_name\", \"unknown\")\n",
- " print(f\"[Tool response received: {tool}]\")\n",
- " elif event_type == \"thinking\":\n",
- " print(f\"[Thinking: {event.get('content', '')[:80]}...]\")\n",
- "else:\n",
- " print(f\"Error {response.status_code}: {response.text}\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.12.2"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/notebooks/api-examples/3-query-api.ipynb b/notebooks/api-examples/4-query-api.ipynb
similarity index 58%
rename from notebooks/api-examples/3-query-api.ipynb
rename to notebooks/api-examples/4-query-api.ipynb
index 93b2d36..ec2af05 100644
--- a/notebooks/api-examples/3-query-api.ipynb
+++ b/notebooks/api-examples/4-query-api.ipynb
@@ -5,7 +5,7 @@
"id": "adf7d63d",
"metadata": {},
"source": [
- "
"
+ "
"
]
},
{
@@ -49,7 +49,14 @@
"cell_type": "code",
"execution_count": 1,
"id": "6019e01a",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:14:15.501953Z",
+ "iopub.status.busy": "2026-05-28T22:14:15.501693Z",
+ "iopub.status.idle": "2026-05-28T22:14:15.550851Z",
+ "shell.execute_reply": "2026-05-28T22:14:15.550644Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -105,7 +112,14 @@
"cell_type": "code",
"execution_count": 2,
"id": "40947545",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:14:15.552145Z",
+ "iopub.status.busy": "2026-05-28T22:14:15.552058Z",
+ "iopub.status.idle": "2026-05-28T22:14:21.428797Z",
+ "shell.execute_reply": "2026-05-28T22:14:21.427957Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -113,9 +127,9 @@
"text": [
"\n",
"=== Generated Summary ===\n",
- "Retrieval augmented generation (RAG) is a method that combines pre-trained parametric memory models, such as sequence-to-sequence (seq2seq) transformers, with non-parametric memory, which is typically a dense vector index of external data sources like Wikipedia. This approach uses a pre-trained neural retriever to access the non-parametric memory, allowing the model to retrieve relevant information to augment the generation process. RAG models are trained end-to-end and can condition on retrieved passages to enhance the generation of language tasks, providing a balance between the flexibility of parametric models and the performance of retrieval-based approaches [1], [2].\n",
+ "Retrieval augmented generation (RAG) is a method that combines pre-trained parametric memory models, such as sequence-to-sequence (seq2seq) transformers, with non-parametric memory, which is typically a dense vector index of external data sources like Wikipedia. This approach uses a pre-trained neural retriever to access the non-parametric memory, allowing the model to retrieve relevant information to augment the generation process. RAG models are trained end-to-end and can condition on retrieved passages to enhance the generation of language tasks, offering a blend of the flexibility of parametric models and the performance of retrieval-based approaches [1], [2].\n",
"\n",
- "=== Factual Consistency Score: 0.9921875 ===\n"
+ "=== Factual Consistency Score: 0.9453125 ===\n"
]
}
],
@@ -186,7 +200,14 @@
"cell_type": "code",
"execution_count": 3,
"id": "c154dd4b",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:14:21.433096Z",
+ "iopub.status.busy": "2026-05-28T22:14:21.432685Z",
+ "iopub.status.idle": "2026-05-28T22:14:21.439224Z",
+ "shell.execute_reply": "2026-05-28T22:14:21.438730Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -198,7 +219,7 @@
"--- Result 1 ---\n",
"Text: but have only explored open-domain extractive question answering. Here, we bring hybrid parametric\n",
"and non-parametric memory to the “workhorse of NLP,” i.e. sequence-to-sequence (seq2seq) models. We e...\n",
- "Score: 0.9823410511016846\n",
+ "Score: 0.9826627969741821\n",
"Document ID: rag-retrieval-augmented-generation.pdf\n",
"Metadata: {'PTEX.Fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'CreationDate': 'D:20210413004838Z', 'Keywords': '', 'Producer': 'pdfTeX-1.40.21', 'Author': '', 'Title': '', 'Creator': 'LaTeX with hyperref', 'ModDate': 'D:20210413004838Z', 'Trapped': '/False', 'Subject': '', 'source': 'arxiv', 'year': 2020, 'topic': 'RAG', 'title': 'Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks', 'authors': 'Lewis et al.'}\n",
"\n",
@@ -206,14 +227,14 @@
"Text: Additionally, providing provenance for their\n",
"decisions and updating their world knowledge remain open research problems. Pre-\n",
"trained models with a differentiable access mechanism to explicit non-para...\n",
- "Score: 0.8845396637916565\n",
+ "Score: 0.8854346871376038\n",
"Document ID: rag-retrieval-augmented-generation.pdf\n",
"Metadata: {'PTEX.Fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'CreationDate': 'D:20210413004838Z', 'Keywords': '', 'Producer': 'pdfTeX-1.40.21', 'Author': '', 'Title': '', 'Creator': 'LaTeX with hyperref', 'ModDate': 'D:20210413004838Z', 'Trapped': '/False', 'Subject': '', 'source': 'arxiv', 'year': 2020, 'topic': 'RAG', 'title': 'Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks', 'authors': 'Lewis et al.'}\n",
"\n",
"--- Result 3 ---\n",
"Text: 1 Introduction arXiv:2005.11401v4 [cs.CL] 12 Apr 2021 Retrieval-Augmented Generation for\n",
" Knowledge-Intensive NLP Tasks Patrick Lewisyz, Ethan Perez? Aleksandra Piktusy, Fabio Petroniy, Vladimir ...\n",
- "Score: 0.8307288885116577\n",
+ "Score: 0.8305426836013794\n",
"Document ID: rag-retrieval-augmented-generation.pdf\n",
"Metadata: {'PTEX.Fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'CreationDate': 'D:20210413004838Z', 'Keywords': '', 'Producer': 'pdfTeX-1.40.21', 'Author': '', 'Title': '', 'Creator': 'LaTeX with hyperref', 'ModDate': 'D:20210413004838Z', 'Trapped': '/False', 'Subject': '', 'source': 'arxiv', 'year': 2020, 'topic': 'RAG', 'title': 'Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks', 'authors': 'Lewis et al.'}\n",
"\n",
@@ -221,7 +242,7 @@
"Text: RAG combines\n",
"the generation flexibility of the “closed-book” (parametric only) approaches and the performance of\n",
"\"open-book\" retrieval-based approaches. Unlike REALM and T5+SSM, RAG enjoys strong resul...\n",
- "Score: 0.8290866017341614\n",
+ "Score: 0.827928364276886\n",
"Document ID: rag-retrieval-augmented-generation.pdf\n",
"Metadata: {'PTEX.Fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'CreationDate': 'D:20210413004838Z', 'Keywords': '', 'Producer': 'pdfTeX-1.40.21', 'Author': '', 'Title': '', 'Creator': 'LaTeX with hyperref', 'ModDate': 'D:20210413004838Z', 'Trapped': '/False', 'Subject': '', 'source': 'arxiv', 'year': 2020, 'topic': 'RAG', 'title': 'Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks', 'authors': 'Lewis et al.'}\n",
"\n",
@@ -229,7 +250,7 @@
"Text: Table 1 shows results for RAG along with state-of-the-art models. On all four open-domain QA\n",
"tasks, RAG sets a new state of the art (only on the T5-comparable split for TQA). RAG combines\n",
"the generati...\n",
- "Score: 0.7932559251785278\n",
+ "Score: 0.7943946719169617\n",
"Document ID: rag-retrieval-augmented-generation.pdf\n",
"Metadata: {'PTEX.Fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'CreationDate': 'D:20210413004838Z', 'Keywords': '', 'Producer': 'pdfTeX-1.40.21', 'Author': '', 'Title': '', 'Creator': 'LaTeX with hyperref', 'ModDate': 'D:20210413004838Z', 'Trapped': '/False', 'Subject': '', 'source': 'arxiv', 'year': 2020, 'topic': 'RAG', 'title': 'Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks', 'authors': 'Lewis et al.'}\n"
]
@@ -264,7 +285,14 @@
"cell_type": "code",
"execution_count": 4,
"id": "21facbac",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:14:21.442078Z",
+ "iopub.status.busy": "2026-05-28T22:14:21.441806Z",
+ "iopub.status.idle": "2026-05-28T22:14:29.492924Z",
+ "shell.execute_reply": "2026-05-28T22:14:29.492178Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -272,16 +300,18 @@
"text": [
"\n",
"=== Generated Summary (Multiple Corpora) ===\n",
- "Agents in Vectara work by utilizing tools to interact with data and external systems. They use conversational context and instructions to decide which tools to call and how to use the tools' responses to answer user queries. Vectara provides a variety of tools out-of-the-box, and users can also build their own. Agents can search through Vectara corpora to find relevant information, supporting custom search parameters and query configurations [1]. Vectara agents are designed to deliver grounded answers and safe actions with source citations, audit trails, and access controls, ensuring secure and efficient deployment in various environments [2]. Additionally, agents can maintain conversation sessions to remember context across multiple interactions, and they can be configured with custom instructions to control behavior and integrate with external APIs [3].\n",
+ "Agents work with Vectara by utilizing the platform's capabilities to create and manage AI agents with conversational abilities. Vectara provides an API-first platform where agents can be orchestrated using a hosted Agent API. This involves specifying agent instructions, tools, skills, and machine learning models that the agent can use. Agents interact with the platform through session and interaction APIs, allowing them to maintain context across multiple interactions [1], [4].\n",
"\n",
- "=== Factual Consistency Score: 1.0 ===\n",
+ "Agents can integrate with external systems through tools, connectors, and data sources. Tools allow agents to perform actions like API calls during interactions, connectors enable third-party systems to communicate with agents, and data sources feed information into the agent's workflow [2], [9]. Vectara supports various deployment environments, ensuring data security and compliance [8].\n",
+ "\n",
+ "=== Factual Consistency Score: 0.9921875 ===\n",
"\n",
"=== Result Sources ===\n",
- "1. Source: vectara_docs, Title: Built-in tools\n",
- "2. Source: vectara_docs, Title: About Vectara\n",
- "3. Source: vectara_docs, Title: REST APIs\n",
- "4. Source: vectara_docs, Title: About Vectara\n",
- "5. Source: vectara_docs, Title: Agents\n"
+ "1. Source: vectara_docs, Title: Understanding Vectara\n",
+ "2. Source: vectara_docs, Title: Tools & connectors\n",
+ "3. Source: vectara_docs, Title: Understanding Vectara\n",
+ "4. Source: vectara_docs, Title: REST APIs\n",
+ "5. Source: vectara_docs, Title: The application layer\n"
]
}
],
@@ -363,7 +393,14 @@
"cell_type": "code",
"execution_count": 5,
"id": "8fe7a8fe-cfb8-4132-a676-5aa2b38d21d8",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:14:29.496217Z",
+ "iopub.status.busy": "2026-05-28T22:14:29.495949Z",
+ "iopub.status.idle": "2026-05-28T22:14:35.967182Z",
+ "shell.execute_reply": "2026-05-28T22:14:35.965673Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -371,7 +408,7 @@
"text": [
"\n",
"=== Generated Summary (With Metadata Filter) ===\n",
- "I do not have enough information to answer the question accurately.\n",
+ "The key innovations in retrieval-augmented generation (RAG) include the development of techniques to enhance model reliability and factual consistency, particularly in addressing hallucinations in large language models (LLMs). Notable advancements include the creation of benchmarks and methodologies for evaluating and improving the factual grounding of LLM responses. For instance, the \"Facts Grounding Leaderboard\" benchmarks LLMs' ability to ground responses effectively to long-form inputs [1]. Additionally, the study by Patrick Lewis et al. explores retrieval-augmented generation techniques for knowledge-intensive NLP tasks, which is a significant step in addressing hallucinations and improving the factual accuracy of generated content [2].\n",
"\n",
"=== Number of results: 30 ===\n",
"\n",
@@ -456,276 +493,1329 @@
"cell_type": "code",
"execution_count": 6,
"id": "fe64d30c-eead-4f40-969e-be18033bdea0",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:14:35.971725Z",
+ "iopub.status.busy": "2026-05-28T22:14:35.971376Z",
+ "iopub.status.idle": "2026-05-28T22:14:40.698807Z",
+ "shell.execute_reply": "2026-05-28T22:14:40.697072Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
- "=== Streaming Response ===\n",
- "To use chunking with Vectara, you can choose between different chunking strategies depending on your needs. By default, Vectara uses sentence-based chunking, which creates one chunk per sentence and is optimal for most datasets. However, for larger documents or when you need to optimize performance, you can explicitly set a chunking strategy. The `max_chars_chunking_strategy` allows you to create larger chunks up to a specified character limit, balancing retrieval speed with contextual coherence [1], [5]. This flexibility in chunking strategies helps maintain semantic integrity across context boundaries while allowing for efficient data ingestion and retrieval [3].\n",
- "\n",
- "=== FCS: 0.92578125 ===\n",
- "\n",
- "\n"
+ "=== Streaming Response ===\n"
]
- }
- ],
- "source": [
- "# Streaming query request - query the documentation corpus\n",
- "streaming_request = {\n",
- " \"query\": \"How do I use chunking with Vectara\",\n",
- " \"stream_response\": True,\n",
- " \"search\": {\n",
- " \"corpora\": [\n",
- " {\n",
- " \"corpus_key\": docs_corpus_key,\n",
- " \"lexical_interpolation\": 0.005\n",
- " }\n",
- " ],\n",
- " \"limit\": 100,\n",
- " \"context_configuration\": {\n",
- " \"sentences_before\": 2,\n",
- " \"sentences_after\": 2\n",
- " },\n",
- " \"reranker\": {\n",
- " \"type\": \"chain\",\n",
- " \"rerankers\": [\n",
- " {\n",
- " \"type\": \"customer_reranker\",\n",
- " \"reranker_name\": \"qwen3-reranker\",\n",
- " \"limit\": 30\n",
- " },\n",
- " {\n",
- " \"type\": \"mmr\",\n",
- " \"diversity_bias\": 0.05\n",
- " }\n",
- " ],\n",
- " }\n",
- " },\n",
- " \"generation\": {\n",
- " \"generation_preset_name\": \"vectara-summary-ext-24-05-med-omni\",\n",
- " \"max_used_search_results\": 15,\n",
- " \"response_language\": \"eng\",\n",
- " \"enable_factual_consistency_score\": True\n",
- " }\n",
- "}\n",
- "\n",
- "# Make streaming request\n",
- "streaming_headers = headers.copy()\n",
- "streaming_headers['Accept'] = 'text/event-stream'\n",
- "\n",
- "response = requests.post(\n",
- " f\"{BASE_URL}/query\",\n",
- " headers=streaming_headers,\n",
- " json=streaming_request,\n",
- " stream=True\n",
- ")\n",
- "\n",
- "print(\"\\n=== Streaming Response ===\")\n",
- "if response.status_code == 200:\n",
- " for line in response.iter_lines():\n",
- " if line:\n",
- " line_str = line.decode('utf-8')\n",
- " if line_str.startswith('data:'):\n",
- " try:\n",
- " data = json.loads(line_str[5:]) # Remove 'data: ' prefix\n",
- " # Handle different event types\n",
- " if data.get('type') == 'generation_chunk':\n",
- " # Print generation text as it arrives\n",
- " print(data.get('generation_chunk', ''), end='', flush=True)\n",
- " elif data.get('type') == 'factual_consistency_score':\n",
- " print(f\"\\n\\n=== FCS: {data.get('factual_consistency_score')} ===\")\n",
- " elif data.get('type') == 'search_results':\n",
- " # Search results arrive before generation starts\n",
- " pass\n",
- " except json.JSONDecodeError:\n",
- " pass\n",
- " print(\"\\n\")\n",
- "else:\n",
- " print(f\"Error: {response.status_code}\")\n",
- " print(response.text)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "t98ft4j3tmg",
- "metadata": {},
- "source": [
- "## Example 5: Custom Prompts with prompt_template\n",
- "\n",
- "Vectara's Prompt Engine allows you to customize the LLM behavior beyond the default presets by using the `prompt_template` parameter. You can use Velocity Template syntax to access variables like:\n",
- "- `$vectaraQuery` - The user's query text\n",
- "- `$vectaraQueryResults` - Array of retrieved search results\n",
- "- `$vectaraLangCode` - ISO639 language code (e.g., \"eng\")\n",
- "- `$vectaraOutChars` - Character limit for output\n",
- "\n",
- "This example demonstrates how to create a custom prompt that formats responses in a specific way."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "id": "5wuzfuoidbb",
- "metadata": {},
- "outputs": [
+ },
{
"name": "stdout",
"output_type": "stream",
"text": [
- "\n",
- "=== Generated Response with Custom Prompt ===\n",
- "**Summary:**\n",
- "Improving retrieval quality in Retrieval-Augmented Generation (RAG) systems involves optimizing the retriever component to ensure it effectively retrieves relevant information for the task at hand. Key strategies include using learned retrieval mechanisms, adjusting the number of retrieved documents, and leveraging the flexibility of non-parametric memory models for dynamic updates.\n",
- "\n",
- "**Key Points:**\n",
- "\n",
- "- **Learned Retrieval:**\n",
- " - RAG systems benefit from learned retrieval mechanisms, which improve results across various tasks compared to fixed retrieval systems like BM25.\n",
- " - The retriever in RAG is initialized using the Dense Passage Retriever (DPR), which is trained with retrieval supervision on datasets like Natural Questions and TriviaQA.\n",
- "\n",
- "- **Comparison with BM25:**\n",
- " - While BM25, a word overlap-based retriever, performs well for entity-centric tasks like FEVER, learned retrieval is crucial for tasks such as Open-Domain QA, where it significantly enhances performance.\n",
- "\n",
- "- **Document Retrieval Flexibility:**\n",
- " - The number of documents retrieved at test time can be adjusted, affecting both performance and runtime. For instance, retrieving more documents generally improves Open-Domain QA results for RAG-Sequence, but performance peaks at a certain point for RAG-Token.\n",
- "\n",
- "- **Non-Parametric Memory Advantages:**\n",
- " - RAG's non-parametric memory allows for easy updates to the knowledge base at test time without retraining, unlike parametric-only models such as T5 or BART, which require further training to adapt to new information.\n",
- "\n",
- "- **Potential for Joint Pre-Training:**\n",
- " - Future improvements could explore joint pre-training of the retriever and generator components, potentially using objectives similar to those in BART, to enhance the interaction between parametric and non-parametric memories.\n",
- "\n",
- "- **Diversity in Generation:**\n",
- " - RAG-Sequence's generations are more diverse compared to RAG-Token and BART, without needing additional diversity-promoting techniques, which can contribute to improved retrieval quality by providing varied and contextually rich outputs. \n",
- "\n",
- "By focusing on these strategies, RAG systems can achieve better retrieval quality, leading to more accurate and contextually relevant outputs.\n",
- "\n",
- "=== Factual Consistency Score: 0.33203125 ===\n"
+ "To "
]
- }
- ],
- "source": [
- "# Custom prompt template using Velocity syntax\n",
- "# This example creates a structured response with sections and bullet points\n",
- "custom_prompt_template = [\n",
- " {\n",
- " \"role\": \"system\",\n",
- " \"content\": \"\"\"You are a helpful AI assistant that provides clear, well-structured answers. \n",
- "Your responses should be formatted with the following sections:\n",
- "1. A brief summary (2-3 sentences)\n",
- "2. Key points as bullet points\n",
- "\n",
- "Use the search results provided to ground your answer in facts.\"\"\"\n",
- " },\n",
- " {\n",
- " \"role\": \"user\", \n",
- " \"content\": \"\"\"Question: $vectaraQuery\n",
- "\n",
- "Retrieved Context:\n",
- "#foreach ($qResult in $vectaraQueryResults)\n",
- "- $qResult.text()\n",
- "#end\n",
- "\n",
- "Please provide a well-structured answer to the question above using the retrieved context. Format your response with clear sections as instructed.\"\"\"\n",
- " }\n",
- "]\n",
- "\n",
- "# Query request with custom prompt\n",
- "custom_prompt_request = {\n",
- " \"query\": \"How can I improve retrieval quality in RAG systems?\",\n",
- " \"search\": {\n",
- " \"corpora\": [\n",
- " {\n",
- " \"corpus_key\": research_corpus_key,\n",
- " \"lexical_interpolation\": 0.005\n",
- " }\n",
- " ],\n",
- " \"limit\": 100,\n",
- " \"context_configuration\": {\n",
- " \"sentences_before\": 2,\n",
- " \"sentences_after\": 2\n",
- " },\n",
- " \"reranker\": {\n",
- " \"type\": \"chain\",\n",
- " \"rerankers\": [\n",
- " {\n",
- " \"type\": \"customer_reranker\",\n",
- " \"reranker_name\": \"qwen3-reranker\",\n",
- " \"limit\": 30\n",
- " },\n",
- " {\n",
- " \"type\": \"mmr\",\n",
- " \"diversity_bias\": 0.05\n",
- " }\n",
- " ],\n",
- " }\n",
- " },\n",
- " \"generation\": {\n",
- " \"prompt_template\": json.dumps(custom_prompt_template),\n",
- " \"generation_preset_name\": \"vectara-summary-ext-24-05-med-omni\",\n",
- " \"max_used_search_results\": 10,\n",
- " \"response_language\": \"eng\",\n",
- " \"enable_factual_consistency_score\": True\n",
- " },\n",
- " \"save_history\": True\n",
- "}\n",
- "\n",
- "response = requests.post(f\"{BASE_URL}/query\", headers=headers, json=custom_prompt_request)\n",
- "\n",
- "if response.status_code == 200:\n",
- " result = response.json()\n",
- " print(\"\\n=== Generated Response with Custom Prompt ===\")\n",
- " print(result['summary'])\n",
- " print(f\"\\n=== Factual Consistency Score: {result.get('factual_consistency_score', 'N/A')} ===\")\n",
- "else:\n",
- " print(f\"Error: {response.status_code}\")\n",
- " print(response.text)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "w354a6255rs",
- "metadata": {},
- "source": [
- "### Advanced Custom Prompt: Accessing Metadata\n",
- "\n",
- "You can also access document and part metadata within your custom prompts using:\n",
- "- `$qResult.docMetadata()` - Access document-level metadata\n",
- "- `$qResult.partMetadata()` - Access chunk-level metadata\n",
- "- `.get('fieldname')` - Retrieve specific metadata fields\n",
- "\n",
- "This example shows how to create a prompt that includes citation information with metadata."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "id": "mbsnuv6fnfe",
- "metadata": {},
- "outputs": [
+ },
{
"name": "stdout",
"output_type": "stream",
"text": [
+ "use c"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "hun"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "kin"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "g"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " wit"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "h Vec"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "tar"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "a, you nee"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "d to"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " confi"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "gur"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "e the chu"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "nking s"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "trat"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "egy durin"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "g the docu"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ment ing"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "e"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "st"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ion"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " pr"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ocess. Ve"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ctar"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "a suppor"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ts two"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " pr"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "imary chunk"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "i"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ng strate"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "gies: "
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "sent"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ence"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "-ba"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "sed"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " and m"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "a"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "x-ch"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ars-based"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ ". The "
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "sen"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "tence chu"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "nking st"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "rate"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "gy cr"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "eate"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "s one par"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "t pe"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "r s"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ente"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "nce and "
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "is the d"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "efa"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ult"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " setting "
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "if no spe"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "cif"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ic s"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "t"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "rate"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "gy i"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "s s"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "et."
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " The m"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ax-"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "chars chu"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "nking "
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "strate"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "gy accumul"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ates "
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "se"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ntenc"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "es int"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "o "
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "a part unt"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "il a speci"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "fied c"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "har"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "acter li"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "m"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "it "
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "is"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " reache"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "d. If a s"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ingle se"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ntenc"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "e exce"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "e"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ds "
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "this "
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "lim"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "it, it"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " will b"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "e spli"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "t"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " acr"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "oss "
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "part"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "s. Y"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ou can"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " se"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "t the chu"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "nking "
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "stra"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "te"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "gy us"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ing"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " the `chu"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "king_s"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "tra"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "tegy`"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " fie"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ld in yo"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "u"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "r API r"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "equest,"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " eith"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "er duri"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ng "
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "file uplo"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ad or doc"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "u"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ment"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " indexing. "
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "For struct"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "u"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "red d"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ocuments"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ ", e"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ach se"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ct"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ion is chunked"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " "
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "independent"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ly, prese"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "rving sema"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "nt"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ic i"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "nte"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "g"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "rity"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " "
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[1],"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " "
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[2], [5]."
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "\n",
+ "=== FCS: 0.9921875 ===\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Streaming query request - query the documentation corpus\n",
+ "streaming_request = {\n",
+ " \"query\": \"How do I use chunking with Vectara\",\n",
+ " \"stream_response\": True,\n",
+ " \"search\": {\n",
+ " \"corpora\": [\n",
+ " {\n",
+ " \"corpus_key\": docs_corpus_key,\n",
+ " \"lexical_interpolation\": 0.005\n",
+ " }\n",
+ " ],\n",
+ " \"limit\": 100,\n",
+ " \"context_configuration\": {\n",
+ " \"sentences_before\": 2,\n",
+ " \"sentences_after\": 2\n",
+ " },\n",
+ " \"reranker\": {\n",
+ " \"type\": \"chain\",\n",
+ " \"rerankers\": [\n",
+ " {\n",
+ " \"type\": \"customer_reranker\",\n",
+ " \"reranker_name\": \"qwen3-reranker\",\n",
+ " \"limit\": 30\n",
+ " },\n",
+ " {\n",
+ " \"type\": \"mmr\",\n",
+ " \"diversity_bias\": 0.05\n",
+ " }\n",
+ " ],\n",
+ " }\n",
+ " },\n",
+ " \"generation\": {\n",
+ " \"generation_preset_name\": \"vectara-summary-ext-24-05-med-omni\",\n",
+ " \"max_used_search_results\": 15,\n",
+ " \"response_language\": \"eng\",\n",
+ " \"enable_factual_consistency_score\": True\n",
+ " }\n",
+ "}\n",
+ "\n",
+ "# Make streaming request\n",
+ "streaming_headers = headers.copy()\n",
+ "streaming_headers['Accept'] = 'text/event-stream'\n",
+ "\n",
+ "response = requests.post(\n",
+ " f\"{BASE_URL}/query\",\n",
+ " headers=streaming_headers,\n",
+ " json=streaming_request,\n",
+ " stream=True\n",
+ ")\n",
+ "\n",
+ "print(\"\\n=== Streaming Response ===\")\n",
+ "if response.status_code == 200:\n",
+ " for line in response.iter_lines():\n",
+ " if line:\n",
+ " line_str = line.decode('utf-8')\n",
+ " if line_str.startswith('data:'):\n",
+ " try:\n",
+ " data = json.loads(line_str[5:]) # Remove 'data: ' prefix\n",
+ " # Handle different event types\n",
+ " if data.get('type') == 'generation_chunk':\n",
+ " # Print generation text as it arrives\n",
+ " print(data.get('generation_chunk', ''), end='', flush=True)\n",
+ " elif data.get('type') == 'factual_consistency_score':\n",
+ " print(f\"\\n\\n=== FCS: {data.get('factual_consistency_score')} ===\")\n",
+ " elif data.get('type') == 'search_results':\n",
+ " # Search results arrive before generation starts\n",
+ " pass\n",
+ " except json.JSONDecodeError:\n",
+ " pass\n",
+ " print(\"\\n\")\n",
+ "else:\n",
+ " print(f\"Error: {response.status_code}\")\n",
+ " print(response.text)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "t98ft4j3tmg",
+ "metadata": {},
+ "source": [
+ "## Example 5: Custom Prompts with prompt_template\n",
+ "\n",
+ "Vectara's Prompt Engine allows you to customize the LLM behavior beyond the default presets by using the `prompt_template` parameter. You can use Velocity Template syntax to access variables like:\n",
+ "- `$vectaraQuery` - The user's query text\n",
+ "- `$vectaraQueryResults` - Array of retrieved search results\n",
+ "- `$vectaraLangCode` - ISO639 language code (e.g., \"eng\")\n",
+ "- `$vectaraOutChars` - Character limit for output\n",
+ "\n",
+ "This example demonstrates how to create a custom prompt that formats responses in a specific way."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "5wuzfuoidbb",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:14:40.704081Z",
+ "iopub.status.busy": "2026-05-28T22:14:40.703416Z",
+ "iopub.status.idle": "2026-05-28T22:14:47.557680Z",
+ "shell.execute_reply": "2026-05-28T22:14:47.556105Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "=== Generated Response with Custom Prompt ===\n",
+ "**Summary:**\n",
+ "Improving retrieval quality in Retrieval-Augmented Generation (RAG) systems involves optimizing the retriever component to ensure it effectively retrieves relevant information for the task at hand. Key strategies include using learned retrieval mechanisms, adjusting the number of retrieved documents, and leveraging the flexibility of non-parametric memory models.\n",
+ "\n",
+ "**Key Points:**\n",
+ "\n",
+ "- **Learned Retrieval:**\n",
+ " - RAG systems benefit from learned retrieval mechanisms, which improve results across various tasks compared to fixed retrieval systems like BM25.\n",
+ " - The retriever in RAG is initialized using DPR's retriever, which is trained with retrieval supervision on datasets like Natural Questions and TriviaQA.\n",
+ "\n",
+ "- **Comparison with BM25:**\n",
+ " - While BM25, a word overlap-based retriever, performs well for certain tasks like FEVER due to its entity-centric nature, learned retrieval generally provides better results for open-domain QA tasks.\n",
+ "\n",
+ "- **Document Retrieval Flexibility:**\n",
+ " - Adjusting the number of documents retrieved at test time can impact performance. For instance, retrieving more documents generally improves results for RAG-Sequence, but there is an optimal number for RAG-Token.\n",
+ "\n",
+ "- **Non-Parametric Memory Advantages:**\n",
+ " - RAG's non-parametric memory allows for easy updates to the retrieval index without retraining, providing flexibility and adaptability to new information.\n",
+ "\n",
+ "- **End-to-End Training:**\n",
+ " - Training the retriever and generator components of RAG end-to-end, treating retrieved documents as latent variables, can enhance the system's ability to generate accurate and contextually relevant responses.\n",
+ "\n",
+ "- **Future Directions:**\n",
+ " - Exploring joint pre-training of the retriever and generator components could further improve retrieval quality and overall system performance.\n",
+ "\n",
+ "By focusing on these strategies, RAG systems can achieve more accurate and contextually relevant retrieval, enhancing their performance in knowledge-intensive NLP tasks.\n",
+ "\n",
+ "=== Factual Consistency Score: 0.4296875 ===\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Custom prompt template using Velocity syntax\n",
+ "# This example creates a structured response with sections and bullet points\n",
+ "custom_prompt_template = [\n",
+ " {\n",
+ " \"role\": \"system\",\n",
+ " \"content\": \"\"\"You are a helpful AI assistant that provides clear, well-structured answers. \n",
+ "Your responses should be formatted with the following sections:\n",
+ "1. A brief summary (2-3 sentences)\n",
+ "2. Key points as bullet points\n",
+ "\n",
+ "Use the search results provided to ground your answer in facts.\"\"\"\n",
+ " },\n",
+ " {\n",
+ " \"role\": \"user\", \n",
+ " \"content\": \"\"\"Question: $vectaraQuery\n",
+ "\n",
+ "Retrieved Context:\n",
+ "#foreach ($qResult in $vectaraQueryResults)\n",
+ "- $qResult.text()\n",
+ "#end\n",
+ "\n",
+ "Please provide a well-structured answer to the question above using the retrieved context. Format your response with clear sections as instructed.\"\"\"\n",
+ " }\n",
+ "]\n",
+ "\n",
+ "# Query request with custom prompt\n",
+ "custom_prompt_request = {\n",
+ " \"query\": \"How can I improve retrieval quality in RAG systems?\",\n",
+ " \"search\": {\n",
+ " \"corpora\": [\n",
+ " {\n",
+ " \"corpus_key\": research_corpus_key,\n",
+ " \"lexical_interpolation\": 0.005\n",
+ " }\n",
+ " ],\n",
+ " \"limit\": 100,\n",
+ " \"context_configuration\": {\n",
+ " \"sentences_before\": 2,\n",
+ " \"sentences_after\": 2\n",
+ " },\n",
+ " \"reranker\": {\n",
+ " \"type\": \"chain\",\n",
+ " \"rerankers\": [\n",
+ " {\n",
+ " \"type\": \"customer_reranker\",\n",
+ " \"reranker_name\": \"qwen3-reranker\",\n",
+ " \"limit\": 30\n",
+ " },\n",
+ " {\n",
+ " \"type\": \"mmr\",\n",
+ " \"diversity_bias\": 0.05\n",
+ " }\n",
+ " ],\n",
+ " }\n",
+ " },\n",
+ " \"generation\": {\n",
+ " \"prompt_template\": json.dumps(custom_prompt_template),\n",
+ " \"generation_preset_name\": \"vectara-summary-ext-24-05-med-omni\",\n",
+ " \"max_used_search_results\": 10,\n",
+ " \"response_language\": \"eng\",\n",
+ " \"enable_factual_consistency_score\": True\n",
+ " },\n",
+ " \"save_history\": True\n",
+ "}\n",
+ "\n",
+ "response = requests.post(f\"{BASE_URL}/query\", headers=headers, json=custom_prompt_request)\n",
+ "\n",
+ "if response.status_code == 200:\n",
+ " result = response.json()\n",
+ " print(\"\\n=== Generated Response with Custom Prompt ===\")\n",
+ " print(result['summary'])\n",
+ " print(f\"\\n=== Factual Consistency Score: {result.get('factual_consistency_score', 'N/A')} ===\")\n",
+ "else:\n",
+ " print(f\"Error: {response.status_code}\")\n",
+ " print(response.text)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "w354a6255rs",
+ "metadata": {},
+ "source": [
+ "### Advanced Custom Prompt: Accessing Metadata\n",
+ "\n",
+ "You can also access document and part metadata within your custom prompts using:\n",
+ "- `$qResult.docMetadata()` - Access document-level metadata\n",
+ "- `$qResult.partMetadata()` - Access chunk-level metadata\n",
+ "- `.get('fieldname')` - Retrieve specific metadata fields\n",
+ "\n",
+ "This example shows how to create a prompt that includes citation information with metadata."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "mbsnuv6fnfe",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:14:47.562491Z",
+ "iopub.status.busy": "2026-05-28T22:14:47.561901Z",
+ "iopub.status.idle": "2026-05-28T22:14:54.765870Z",
+ "shell.execute_reply": "2026-05-28T22:14:54.764142Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "=== Response with Metadata-Rich Custom Prompt ===\n",
+ "Retrieval-Augmented Generation (RAG) systems, while offering significant advantages in knowledge-intensive NLP tasks, face several challenges as highlighted in the research papers.\n",
"\n",
- "=== Response with Metadata-Rich Custom Prompt ===\n",
- "Retrieval-Augmented Generation (RAG) systems, while offering significant advancements in knowledge-intensive NLP tasks, face several challenges as highlighted in the research papers.\n",
- "\n",
- "1. **Factuality and Bias**: One of the primary challenges with RAG systems is the reliance on external knowledge sources like Wikipedia, which may not always be entirely factual or free from bias. This can lead to the generation of misleading or biased content. The paper \"Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks\" by the authors notes that similar concerns apply to RAG as to other language models like GPT-2, including the potential for generating abusive, fake, or misleading content, impersonating others, or automating spam/phishing content (Title: \"Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks\").\n",
- "\n",
- "2. **Provenance and Knowledge Updating**: Another challenge is providing provenance for the decisions made by RAG systems and updating their world knowledge. The paper mentions that these remain open research problems, as pre-trained models with differentiable access to explicit non-parametric memory have primarily been explored for extractive tasks, not generative ones (Title: \"Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks\").\n",
+ "1. **Factuality and Bias**: One of the primary challenges with RAG systems is the reliance on external knowledge sources like Wikipedia, which may not always be entirely factual or free from bias. This can lead to the generation of misleading or biased content. The paper \"Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks\" notes that similar concerns apply to RAG as to other language models like GPT-2, including the potential for generating abusive, fake, or misleading content, impersonating others, or automating spam/phishing content (Title: Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks).\n",
"\n",
- "3. **Retrieval Collapse**: The phenomenon of \"retrieval collapse\" is a significant issue where the retrieval component of the RAG system learns to retrieve the same documents regardless of the input. This results in the generator ignoring the documents, causing the RAG model to perform similarly to simpler models like BART. This collapse might occur due to a lack of explicit requirement for factual knowledge in certain tasks or due to longer target sequences that provide less informative gradients for the retriever (Title: \"Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks\").\n",
+ "2. **Provenance and Knowledge Updating**: Another challenge is providing provenance for the decisions made by RAG systems and updating their world knowledge. The paper highlights that these remain open research problems, as pre-trained models with differentiable access to explicit non-parametric memory have primarily been explored for extractive tasks, not generative ones (Title: Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks).\n",
"\n",
- "4. **Decoding Complexity**: The RAG-Sequence and RAG-Token models require different decoding strategies, which can complicate the generation process. For instance, RAG-Sequence does not allow for a conventional per-token likelihood, necessitating separate beam searches for each document, which can be computationally intensive and complex (Title: \"Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks\").\n",
+ "3. **Retrieval Collapse**: The phenomenon of \"retrieval collapse\" is a significant issue where the retrieval component of the RAG system learns to retrieve the same documents regardless of the input. This can lead to the generator ignoring the retrieved documents, causing the RAG model to perform similarly to a standard model like BART, without the benefits of retrieval augmentation. This collapse may occur due to a lack of explicit requirement for factual knowledge in certain tasks or due to longer target sequences that provide less informative gradients for the retriever (Title: Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks).\n",
"\n",
- "5. **Hallucination and Control**: Although RAG models tend to hallucinate less compared to other models like BART, the challenge of ensuring factual correctness and control over the generated content remains. The paper highlights that while RAG models are more grounded in factual knowledge, they are not immune to generating incorrect or less factual content (Title: \"Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks\").\n",
+ "4. **Decoding Complexity**: The RAG-Sequence and RAG-Token models require different approaches to approximate the maximum likelihood during decoding. The complexity arises because RAG-Sequence does not break into a conventional per-token likelihood, necessitating separate beam searches for each document, which complicates the decoding process (Title: Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks).\n",
"\n",
- "These challenges highlight the need for ongoing research and development to improve the reliability, factual accuracy, and efficiency of RAG systems in various applications.\n",
+ "These challenges highlight the need for ongoing research and development to address the limitations of RAG systems, ensuring they can be effectively and safely deployed in various applications.\n",
"\n",
- "=== Factual Consistency Score: 0.31445312 ===\n"
+ "=== Factual Consistency Score: 0.796875 ===\n"
]
}
],
diff --git a/notebooks/api-examples/5-agent-api.ipynb b/notebooks/api-examples/5-agent-api.ipynb
new file mode 100644
index 0000000..05c6710
--- /dev/null
+++ b/notebooks/api-examples/5-agent-api.ipynb
@@ -0,0 +1,1489 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Vectara Agent API Examples\n",
+ "\n",
+ "This notebook demonstrates the basics of how to use Vectara's Agent REST APIs directly to create and interact with AI agents.\n",
+ "\n",
+ "You'll learn how to:\n",
+ "1. Create an agent with custom instructions\n",
+ "2. Create agent sessions for conversations\n",
+ "3. Send messages to agents and get responses\n",
+ "4. Use streaming for real-time responses\n",
+ "5. Manage conversation history\n",
+ "6. Work with tools and tool servers"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## About Vectara\n",
+ "\n",
+ "[Vectara](https://vectara.com/) is an Agent Platform for trusted enterprise AI — a unified Agentic RAG platform with built-in retrieval, orchestration, and governance. See [Notebook 1](1-corpus-creation.ipynb) for the full overview of features and deployment options."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Getting Started\n",
+ "\n",
+ "This notebook assumes you've completed Notebooks 1-2, and potentially 3-4:\n",
+ "- Notebook 1: Created two corpora (ai-research-papers and vectara-docs)\n",
+ "- Notebook 2: Ingested AI research papers and Vectara documentation\n",
+ "- Notebook 3: Deleted documents from a corpus\n",
+ "- Notebook 4: Queried the data with various techniques\n",
+ "\n",
+ "Now we'll create agents that can autonomously search and reason across this data."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Setup"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:14:57.298254Z",
+ "iopub.status.busy": "2026-05-28T22:14:57.297536Z",
+ "iopub.status.idle": "2026-05-28T22:14:59.217793Z",
+ "shell.execute_reply": "2026-05-28T22:14:59.217445Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\r\n",
+ "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m26.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m26.1.1\u001b[0m\r\n",
+ "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\r\n"
+ ]
+ }
+ ],
+ "source": [
+ "!pip install -q sseclient-py"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:14:59.219374Z",
+ "iopub.status.busy": "2026-05-28T22:14:59.219285Z",
+ "iopub.status.idle": "2026-05-28T22:14:59.262623Z",
+ "shell.execute_reply": "2026-05-28T22:14:59.262404Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import requests\n",
+ "import json\n",
+ "import uuid\n",
+ "from datetime import datetime\n",
+ "\n",
+ "# Get credentials from environment variables\n",
+ "api_key = os.environ['VECTARA_API_KEY']\n",
+ "\n",
+ "# Get corpus keys from environment (from Notebook 1)\n",
+ "research_corpus_key = 'tutorial-ai-research-papers'\n",
+ "docs_corpus_key = 'tutorial-vectara-docs'\n",
+ "\n",
+ "# Base API URL\n",
+ "BASE_URL = \"https://api.vectara.io/v2\"\n",
+ "\n",
+ "# Common headers\n",
+ "headers = {\n",
+ " \"x-api-key\": api_key,\n",
+ " \"Content-Type\": \"application/json\"\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Step 1: Create a Basic Agent\n",
+ "\n",
+ "Create an agent with custom instructions that can search your corpus:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:14:59.264034Z",
+ "iopub.status.busy": "2026-05-28T22:14:59.263914Z",
+ "iopub.status.idle": "2026-05-28T22:15:01.563618Z",
+ "shell.execute_reply": "2026-05-28T22:15:01.562892Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Checking if agent 'RAG Research Assistant' already exists...\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "✓ Agent already exists!\n",
+ " Agent Key: agt_rag_research_assistant_4627\n",
+ " Agent Name: RAG Research Assistant\n",
+ " Steps: ['first_step']\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Define agent configuration - this agent can access both corpora\n",
+ "# Agent structure uses steps with instructions and tool configurations\n",
+ "agent_name = \"RAG Research Assistant\"\n",
+ "agent_config = {\n",
+ " \"name\": agent_name,\n",
+ " \"description\": \"Agent that can answer questions about RAG, embeddings, and retrieval from both research papers and documentation\",\n",
+ " \"model\": { \"name\": \"gpt-4o\" },\n",
+ " \"first_step_name\": \"main\",\n",
+ " \"steps\": {\n",
+ " \"main\": {\n",
+ " \"instructions\": [\n",
+ " {\n",
+ " \"type\": \"inline\",\n",
+ " \"name\": \"first set of instructions\",\n",
+ " \"template\": \"\"\"\n",
+ "You are an expert AI research assistant specializing in Retrieval Augmented Generation and AI Agents. \n",
+ "You have access to both academic research papers and Vectara's technical documentation. \n",
+ "Provide clear, accurate answers with citations. \n",
+ "When answering, combine theoretical insights from research with practical implementation guidance from documentation.\n",
+ " \"\"\"\n",
+ " }\n",
+ " ],\n",
+ " \"output_parser\": {\"type\": \"default\"}\n",
+ " }\n",
+ " },\n",
+ " \n",
+ " \"tool_configurations\": {\n",
+ " \"research_paper_search\": {\n",
+ " \"type\": \"corpora_search\",\n",
+ " \"query_configuration\": {\n",
+ " \"search\": {\n",
+ " \"corpora\": [\n",
+ " {\n",
+ " \"corpus_key\": research_corpus_key\n",
+ " }\n",
+ " ]\n",
+ " },\n",
+ " \"generation\": {\n",
+ " \"generation_preset_name\": \"vectara-summary-table-md-query-ext-jan-2025-gpt-4o\",\n",
+ " \"model_parameters\": {\n",
+ " \"llm_name\": \"gpt-4o\",\n",
+ " \"temperature\": 0.0\n",
+ " }\n",
+ " }\n",
+ " }\n",
+ " },\n",
+ " \"vectara_doc_search\": {\n",
+ " \"type\": \"corpora_search\",\n",
+ " \"query_configuration\": {\n",
+ " \"search\": {\n",
+ " \"corpora\": [\n",
+ " {\n",
+ " \"corpus_key\": docs_corpus_key\n",
+ " }\n",
+ " ]\n",
+ " },\n",
+ " \"generation\": {\n",
+ " \"generation_preset_name\": \"vectara-summary-table-md-query-ext-jan-2025-gpt-4o\",\n",
+ " \"model_parameters\": {\n",
+ " \"llm_name\": \"gpt-4o\",\n",
+ " \"temperature\": 0.0\n",
+ " }\n",
+ " }\n",
+ " }\n",
+ " }\n",
+ "\n",
+ " }\n",
+ "}\n",
+ "\n",
+ "# Inlined here to show the pagination pattern explicitly; notebooks 6+ import\n",
+ "# the same helper as vectara_utils.find_agents_by_name.\n",
+ "def find_agent_by_name(name):\n",
+ " \"\"\"Find an agent by name, handling pagination.\"\"\"\n",
+ " page_key = None\n",
+ " while True:\n",
+ " params = {'limit': 100}\n",
+ " if page_key:\n",
+ " params['page_key'] = page_key\n",
+ " resp = requests.get(f\"{BASE_URL}/agents\", headers=headers, params=params)\n",
+ " if resp.status_code != 200:\n",
+ " break\n",
+ " data = resp.json()\n",
+ " for agent in data.get('agents', []):\n",
+ " if agent.get('name') == name:\n",
+ " return agent\n",
+ " page_key = data.get('metadata', {}).get('page_key')\n",
+ " if not page_key:\n",
+ " break\n",
+ " return None\n",
+ "\n",
+ "# Check if agent already exists\n",
+ "print(f\"Checking if agent '{agent_name}' already exists...\")\n",
+ "agent_key = None\n",
+ "existing = find_agent_by_name(agent_name)\n",
+ "if existing:\n",
+ " agent_key = existing['key']\n",
+ " print(f\"✓ Agent already exists!\")\n",
+ " print(f\" Agent Key: {agent_key}\")\n",
+ " print(f\" Agent Name: {existing['name']}\")\n",
+ " print(f\" Steps: {list(existing.get('steps', {}).keys())}\")\n",
+ "\n",
+ "# Create the agent only if it doesn't exist\n",
+ "if not agent_key:\n",
+ " print(f\"Creating new agent '{agent_name}'...\")\n",
+ " response = requests.post(f\"{BASE_URL}/agents\", headers=headers, json=agent_config)\n",
+ " \n",
+ " print(f\"Status Code: {response.status_code}\")\n",
+ " if response.status_code == 201:\n",
+ " agent_data = response.json()\n",
+ " agent_key = agent_data[\"key\"]\n",
+ " print(f\"✓ Agent Created!\")\n",
+ " print(f\" Agent Key: {agent_key}\")\n",
+ " print(f\" Agent Name: {agent_data['name']}\")\n",
+ " print(f\" Steps: {list(agent_data.get('steps', {}).keys())}\")\n",
+ " elif response.status_code == 409:\n",
+ " # Agent was created between our check and create call; look it up again\n",
+ " existing = find_agent_by_name(agent_name)\n",
+ " if existing:\n",
+ " agent_key = existing['key']\n",
+ " print(f\"✓ Agent already exists!\")\n",
+ " print(f\" Agent Key: {agent_key}\")\n",
+ " else:\n",
+ " print(f\"Error: Agent exists per API but could not be found in listing\")\n",
+ " else:\n",
+ " print(f\"Error: {response.text}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Step 2: Create an Agent Session\n",
+ "\n",
+ "Sessions maintain conversation context across multiple turns:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:15:01.566278Z",
+ "iopub.status.busy": "2026-05-28T22:15:01.566007Z",
+ "iopub.status.idle": "2026-05-28T22:15:01.747458Z",
+ "shell.execute_reply": "2026-05-28T22:15:01.746651Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Status Code: 201\n",
+ "✓ Session Created!\n",
+ " Session Name: Technical Support Chat 20260528-151501\n",
+ " Session Key: ase_technical_support_chat_20260528-151501_655e\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Create a new session with a unique name to allow reruns\n",
+ "session_name = f\"Technical Support Chat {datetime.now().strftime('%Y%m%d-%H%M%S')}\"\n",
+ "session_config = {\n",
+ " \"name\": session_name,\n",
+ " \"metadata\": {\n",
+ " \"user_type\": \"developer\",\n",
+ " \"session_purpose\": \"api_questions\"\n",
+ " }\n",
+ "}\n",
+ "\n",
+ "url = f\"{BASE_URL}/agents/{agent_key}/sessions\"\n",
+ "response = requests.post(url, headers=headers, json=session_config)\n",
+ "\n",
+ "print(f\"Status Code: {response.status_code}\")\n",
+ "if response.status_code == 201:\n",
+ " session_data = response.json()\n",
+ " session_key = session_data[\"key\"]\n",
+ " print(f\"✓ Session Created!\")\n",
+ " print(f\" Session Name: {session_name}\")\n",
+ " print(f\" Session Key: {session_key}\")\n",
+ "else:\n",
+ " print(f\"Error: {response.text}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Step 3: Send Messages to the Agent\n",
+ "\n",
+ "Send a message and get a response:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:15:01.752637Z",
+ "iopub.status.busy": "2026-05-28T22:15:01.752165Z",
+ "iopub.status.idle": "2026-05-28T22:15:12.527275Z",
+ "shell.execute_reply": "2026-05-28T22:15:12.526317Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Agent Response:\n",
+ "Retrieval Augmented Generation (RAG) is a method that combines pre-trained language models with external retrieval capabilities, allowing these models to access both parametric and non-parametric memory. This approach is particularly suited for knowledge-intensive tasks as it involves retrieving relevant documents or data that can enhance the outputs of language generation models by grounding their responses in retrieved facts [research_paper_1].\n",
+ "\n",
+ "To implement RAG using Vectara, you can take advantage of its flexible prompt customization features. Vectara enables the integration of retrieved documents and their metadata into the generation prompts, improving the relevance and accuracy of the generated responses. The platform provides a Custom RAG Prompt Engine that allows developers to craft prompts leveraging diverse prompt variables and functions, adaptable for various applications such as enterprise search, chatbots, or knowledge bases [vectara_doc_1], [vectara_doc_4].\n",
+ "\n",
+ "For practical implementation:\n",
+ "1. Customize prompt templates to include the retrieved information [vectara_doc_1].\n",
+ "2. Use Vectara's Query APIs, which support RAG by allowing you to query your data and generate summaries [vectara_doc_3].\n",
+ "3. Index your documents into corpora and perform semantic searches to generate contextually grounded answers or summaries [vectara_doc_2].\n",
+ "\n",
+ "This system handles prompt management automatically, but it also offers developers the flexibility to introduce customizations as needed [vectara_doc_4], [vectara_doc_5].\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Send a message to the agent\n",
+ "# The correct format uses a messages array with message objects\n",
+ "message_data = {\n",
+ " \"messages\": [\n",
+ " {\n",
+ " \"type\": \"text\",\n",
+ " \"content\": \"What is retrieval augmented generation and how can I implement it with Vectara?\"\n",
+ " }\n",
+ " ],\n",
+ " \"stream_response\": False\n",
+ "}\n",
+ "\n",
+ "url = f\"{BASE_URL}/agents/{agent_key}/sessions/{session_key}/events\"\n",
+ "response = requests.post(url, headers=headers, json=message_data)\n",
+ "\n",
+ "if response.status_code == 201:\n",
+ " event_data = response.json()\n",
+ " print(f\"\\nAgent Response:\")\n",
+ " # The response typically contains the assistant's message in the events\n",
+ " if 'events' in event_data:\n",
+ " for event in event_data['events']:\n",
+ " if event.get('type') == 'agent_output':\n",
+ " print(event.get('content', 'No content'))\n",
+ " else:\n",
+ " print(event_data)\n",
+ "else:\n",
+ " print(f\"Error: {response.text}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Step 4: Multi-Turn Conversation\n",
+ "\n",
+ "The agent maintains conversation context automatically:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:15:12.531460Z",
+ "iopub.status.busy": "2026-05-28T22:15:12.530938Z",
+ "iopub.status.idle": "2026-05-28T22:15:21.076016Z",
+ "shell.execute_reply": "2026-05-28T22:15:21.075089Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "User: What is hybrid search?\n",
+ "\n",
+ "Agent Response:\n",
+ "Hybrid search is an approach that combines the capabilities of both semantic search and keyword-based search. Within the context of Vectara, a hybrid search involves using a semantic index that is both writable and filterable, allowing for efficient retrieval of multilingual data. This type of search can handle structured storage with metadata like user_id, session_id, topic, and indexed_at, blending elements from memory, knowledge, and cache in multi-corpus scenarios. Vectara’s platform supports this setup with built-in chain rerankers, citation capabilities, and role-based access control (RBAC), ensuring users access only the information they are authorized to see [vectara_doc_1], [vectara_doc_2].\n",
+ "\n",
+ "================================================================================\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "# First message\n",
+ "message_1 = {\n",
+ " \"messages\": [\n",
+ " {\n",
+ " \"type\": \"text\",\n",
+ " \"content\": \"What is hybrid search?\"\n",
+ " }\n",
+ " ],\n",
+ " \"stream_response\": False\n",
+ "}\n",
+ "\n",
+ "url = f\"{BASE_URL}/agents/{agent_key}/sessions/{session_key}/events\"\n",
+ "response = requests.post(url, headers=headers, json=message_1)\n",
+ "\n",
+ "print(\"User: What is hybrid search?\")\n",
+ "\n",
+ "if response.status_code == 201:\n",
+ " event_data = response.json()\n",
+ " print(f\"\\nAgent Response:\")\n",
+ " if 'events' in event_data:\n",
+ " for event in event_data['events']:\n",
+ " if event.get('type') == 'agent_output':\n",
+ " print(event.get('content', 'No content'))\n",
+ " else:\n",
+ " print(event_data)\n",
+ "else:\n",
+ " print(f\"Error: {response.text}\")\n",
+ " \n",
+ "print(\"\\n\" + \"=\"*80 + \"\\n\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:15:21.080112Z",
+ "iopub.status.busy": "2026-05-28T22:15:21.079781Z",
+ "iopub.status.idle": "2026-05-28T22:15:25.248537Z",
+ "shell.execute_reply": "2026-05-28T22:15:25.246635Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "User: What are its main benefits?\n",
+ "\n",
+ "Agent Response:\n",
+ "Hybrid search, particularly as implemented by platforms like Vectara, offers several benefits:\n",
+ "\n",
+ "1. **Comprehensive Search Coverage**: By combining semantic search with keyword-based search, hybrid search ensures a more thorough and nuanced approach to finding information, capturing both the context and specific keywords in a query [vectara_doc_2].\n",
+ "\n",
+ "2. **Enhanced Precision and Relevance**: The use of semantic understanding allows hybrid search to consider the meaning and context behind words, improving the relevance of search results compared to traditional keyword-only searches [vectara_doc_1].\n",
+ "\n",
+ "3. **Structured Data Retrieval**: Hybrid search supports the use of metadata and structured filters (such as user_id, topic, and session_id) that enhance the ability to retrieve precise and contextually relevant information, enabling more targeted and efficient searches [vectara_doc_1].\n",
+ "\n",
+ "4. **Multilingual Capabilities**: The inclusion of semantic indexing makes hybrid searches more adept at handling multiple languages, benefiting global applications [vectara_doc_1].\n",
+ "\n",
+ "5. **Access Control and Security**: With features like RBAC (role-based access control), hybrid search ensures that users access only the chunks of data they are permitted to see, enhancing security and compliance [vectara_doc_1].\n",
+ "\n",
+ "6. **Improved User Experience**: The integration of chain rerankers and citation capabilities helps refine and rank the search results, leading to a better user experience by presenting the most relevant information first [vectara_doc_1].\n",
+ "\n",
+ "By leveraging these benefits, hybrid search vastly improves the effectiveness and reliability of information retrieval systems, making them suitable for diverse applications from enterprise search to AI-driven analytics.\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Follow-up message (agent remembers context)\n",
+ "message_2 = {\n",
+ " \"messages\": [\n",
+ " {\n",
+ " \"type\": \"text\",\n",
+ " \"content\": \"What are its main benefits?\"\n",
+ " }\n",
+ " ],\n",
+ " \"stream_response\": False\n",
+ "}\n",
+ "\n",
+ "response = requests.post(url, headers=headers, json=message_2)\n",
+ "\n",
+ "print(\"User: What are its main benefits?\")\n",
+ "if response.status_code == 201:\n",
+ " event_data = response.json()\n",
+ " print(f\"\\nAgent Response:\")\n",
+ " if 'events' in event_data:\n",
+ " for event in event_data['events']:\n",
+ " if event.get('type') == 'agent_output':\n",
+ " print(event.get('content', 'No content'))\n",
+ " else:\n",
+ " print(event_data)\n",
+ "else:\n",
+ " print(f\"Error: {response.text}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:15:25.253596Z",
+ "iopub.status.busy": "2026-05-28T22:15:25.253151Z",
+ "iopub.status.idle": "2026-05-28T22:15:29.970133Z",
+ "shell.execute_reply": "2026-05-28T22:15:29.969619Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "User: Can you give me an example?\n",
+ "\n",
+ "Agent Response:\n",
+ "Certainly! Let's consider an example of how hybrid search could be used in a real-world scenario:\n",
+ "\n",
+ "**Scenario: Customer Support System**\n",
+ "\n",
+ "Imagine a large company that provides customer support across multiple countries. Their support system needs to handle a variety of customer queries, which can be highly specific or very general. The company has a comprehensive database of support documents, past ticket resolutions, product manuals, and FAQs.\n",
+ "\n",
+ "**Hybrid Search Benefits:**\n",
+ "\n",
+ "1. **Semantic Understanding**: When a customer inputs a query like, \"I can't connect my printer to Wi-Fi,\" hybrid search uses semantic understanding to relate this to similar issues, broader connectivity problems, and specific printer models mentioned across documents.\n",
+ "\n",
+ "2. **Keyword Matching**: At the same time, the system utilizes keyword-based matching to surface documents specifically mentioning \"printer\" and \"Wi-Fi,\" ensuring that the relevant details are prioritized.\n",
+ "\n",
+ "3. **Multilingual Support**: If the customer queries in another language, the semantic layer understands and translates the intent, searching relevant documents in various languages.\n",
+ "\n",
+ "4. **Meta-data Filtering**: The system applies filters based on customer tags like region or known product ownership, allowing personalized resolution suggestions.\n",
+ "\n",
+ "5. **Role-Based Access**: Internally, customer support agents might see more detailed diagnostic documents that are not available to consumers, thanks to RBAC features.\n",
+ "\n",
+ "6. **Efficient Retrieval and Citing**: The customer receives a response that not only links to the most relevant document but also cites sections of previous successful solutions as examples, enhancing trust and satisfaction.\n",
+ "\n",
+ "In this example, by using hybrid search, the company effectively combines semantic insights and precise keyword detection to deliver efficient and accurate support resolutions, improving both customer and agent experiences.\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Another follow-up\n",
+ "message_3 = {\n",
+ " \"messages\": [\n",
+ " {\n",
+ " \"type\": \"text\",\n",
+ " \"content\": \"Can you give me an example?\"\n",
+ " }\n",
+ " ],\n",
+ " \"stream_response\": False\n",
+ "}\n",
+ "\n",
+ "response = requests.post(url, headers=headers, json=message_3)\n",
+ "\n",
+ "print(\"User: Can you give me an example?\")\n",
+ "if response.status_code == 201:\n",
+ " event_data = response.json()\n",
+ " print(f\"\\nAgent Response:\")\n",
+ " # The response typically contains the assistant's message in the events\n",
+ " if 'events' in event_data:\n",
+ " for event in event_data['events']:\n",
+ " if event.get('type') == 'agent_output':\n",
+ " print(event.get('content', 'No content'))\n",
+ " else:\n",
+ " print(event_data)\n",
+ "else:\n",
+ " print(f\"Error: {response.text}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Step 5: Streaming Responses\n",
+ "\n",
+ "Vectara's Agent API supports Server-Sent Events (SSE) for streaming responses in real time. Instead of waiting for the complete response, you can process text chunks, tool calls, and thinking events as they arrive. Set `stream_response: True` in your message payload to enable streaming."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:15:29.974647Z",
+ "iopub.status.busy": "2026-05-28T22:15:29.974391Z",
+ "iopub.status.idle": "2026-05-28T22:15:35.678196Z",
+ "shell.execute_reply": "2026-05-28T22:15:35.676615Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Streaming response:\n",
+ "\n",
+ "[Calling tool: research_paper_search]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[Tool response received: research_paper_search]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Emb"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "eddings"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " work"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " in"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " retrieval"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " systems"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " by"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " transforming"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " documents"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " and"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " queries"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " into"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " dense"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " vector"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " representations"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ ","
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " allowing"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " for"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " efficient"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " comparison"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " to"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " determine"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " relev"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ancy"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "."
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " Each"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " document"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " is"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " encoded"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " into"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " a"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " vector"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " through"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " a"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " document"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " encoder"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ ","
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " and"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " these"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " vectors"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " are"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " stored"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " in"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " a"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " Maximum"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " Inner"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " Product"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " Search"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " ("
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "M"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "IPS"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ ")"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " index"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ ","
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " often"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " using"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " tools"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " like"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " FA"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ISS"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "."
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " FA"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ISS"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " utilizes"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " a"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " Hier"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "archical"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " Navig"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "able"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " Small"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " World"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " approximation"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " to"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " enable"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " fast"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " and"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " efficient"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " retrieval"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " of"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " sem"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "antically"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " similar"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " documents"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "."
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " This"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " process"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " facilitates"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " the"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " quick"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " identification"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " of"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " relevant"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " information"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " based"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " on"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " the"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " semantic"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " content"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " of"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " queries"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " ["
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "research"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "_p"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "aper"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "_"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "1"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "]."
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "\n",
+ "--- Stream complete ---\n"
+ ]
+ }
+ ],
+ "source": [
+ "import sseclient\n",
+ "\n",
+ "# Send a message with streaming enabled\n",
+ "message_data = {\n",
+ " \"messages\": [\n",
+ " {\n",
+ " \"type\": \"text\",\n",
+ " \"content\": \"Briefly explain how embeddings work in retrieval systems.\"\n",
+ " }\n",
+ " ],\n",
+ " \"stream_response\": True\n",
+ "}\n",
+ "\n",
+ "url = f\"{BASE_URL}/agents/{agent_key}/sessions/{session_key}/events\"\n",
+ "response = requests.post(url, headers=headers, json=message_data, stream=True)\n",
+ "\n",
+ "print(\"Streaming response:\\n\")\n",
+ "\n",
+ "# Streaming endpoints return 200 OK (not 201 Created like the non-streaming create-event calls above)\n",
+ "if response.ok:\n",
+ " client = sseclient.SSEClient(response)\n",
+ " for sse_event in client.events():\n",
+ " try:\n",
+ " event = json.loads(sse_event.data)\n",
+ " except json.JSONDecodeError:\n",
+ " continue\n",
+ " event_type = event.get(\"type\", \"\")\n",
+ "\n",
+ " if event_type == \"streaming_agent_output\":\n",
+ " # Print each text chunk as it arrives\n",
+ " print(event.get(\"content\", \"\"), end=\"\", flush=True)\n",
+ " elif event_type == \"streaming_agent_output_end\":\n",
+ " print(\"\\n\\n--- Stream complete ---\")\n",
+ " elif event_type == \"tool_input\":\n",
+ " tool = event.get(\"tool_configuration_name\", \"unknown\")\n",
+ " print(f\"[Calling tool: {tool}]\")\n",
+ " elif event_type == \"tool_output\":\n",
+ " tool = event.get(\"tool_configuration_name\", \"unknown\")\n",
+ " print(f\"[Tool response received: {tool}]\")\n",
+ " elif event_type == \"thinking\":\n",
+ " print(f\"[Thinking: {event.get('content', '')[:80]}...]\")\n",
+ "else:\n",
+ " print(f\"Error {response.status_code}: {response.text}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.2"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/notebooks/api-examples/5-sub-agents.ipynb b/notebooks/api-examples/6-sub-agents.ipynb
similarity index 72%
rename from notebooks/api-examples/5-sub-agents.ipynb
rename to notebooks/api-examples/6-sub-agents.ipynb
index 59615fc..0d62fd1 100644
--- a/notebooks/api-examples/5-sub-agents.ipynb
+++ b/notebooks/api-examples/6-sub-agents.ipynb
@@ -5,7 +5,7 @@
"id": "cell-0",
"metadata": {},
"source": [
- "
"
+ "
"
]
},
{
@@ -48,11 +48,12 @@
"source": [
"## Getting Started\n",
"\n",
- "This notebook assumes you've completed Notebooks 1-4:\n",
+ "This notebook assumes you've completed Notebooks 1-5:\n",
"- Notebook 1: Created two corpora (ai-research-papers and vectara-docs)\n",
"- Notebook 2: Ingested AI research papers and Vectara documentation\n",
- "- Notebook 3: Queried the data with various techniques\n",
- "- Notebook 4: Created agents that can search and reason across data\n",
+ "- Notebook 3: Deleted documents from a corpus\n",
+ "- Notebook 4: Queried the data with various techniques\n",
+ "- Notebook 5: Created agents that can search and reason across data\n",
"\n",
"Now we'll create a multi-agent system where specialized sub-agents handle domain-specific tasks."
]
@@ -69,7 +70,14 @@
"cell_type": "code",
"execution_count": 1,
"id": "cell-6",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:15:38.267234Z",
+ "iopub.status.busy": "2026-05-28T22:15:38.266599Z",
+ "iopub.status.idle": "2026-05-28T22:15:38.319147Z",
+ "shell.execute_reply": "2026-05-28T22:15:38.318943Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -111,7 +119,14 @@
"cell_type": "code",
"execution_count": 2,
"id": "da112909",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:15:38.320314Z",
+ "iopub.status.busy": "2026-05-28T22:15:38.320221Z",
+ "iopub.status.idle": "2026-05-28T22:15:38.322580Z",
+ "shell.execute_reply": "2026-05-28T22:15:38.322399Z"
+ }
+ },
"outputs": [],
"source": [
"# Load the shared helpers (delete_and_create_agent / delete_and_create_tool).\n",
@@ -167,14 +182,27 @@
"cell_type": "code",
"execution_count": 3,
"id": "cell-10",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:15:38.323801Z",
+ "iopub.status.busy": "2026-05-28T22:15:38.323721Z",
+ "iopub.status.idle": "2026-05-28T22:15:44.096867Z",
+ "shell.execute_reply": "2026-05-28T22:15:44.095590Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Deleted existing agent 'Research Paper Analyst' (agt_research_paper_analyst_8d72)\n",
- "Created agent 'Research Paper Analyst' (key: agt_research_paper_analyst_8c1a)\n"
+ "Deleted existing agent 'Research Paper Analyst' (agt_research_paper_analyst_8c1a)\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Created agent 'Research Paper Analyst' (key: agt_research_paper_analyst_38f5)\n"
]
}
],
@@ -266,14 +294,27 @@
"cell_type": "code",
"execution_count": 4,
"id": "cell-12",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:15:44.100881Z",
+ "iopub.status.busy": "2026-05-28T22:15:44.100431Z",
+ "iopub.status.idle": "2026-05-28T22:15:49.658751Z",
+ "shell.execute_reply": "2026-05-28T22:15:49.657430Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Deleted existing agent 'Documentation Expert' (agt_documentation_expert_52b0)\n",
- "Created agent 'Documentation Expert' (key: agt_documentation_expert_258b)\n"
+ "Deleted existing agent 'Documentation Expert' (agt_documentation_expert_258b)\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Created agent 'Documentation Expert' (key: agt_documentation_expert_8494)\n"
]
}
],
@@ -352,14 +393,27 @@
"cell_type": "code",
"execution_count": 5,
"id": "8j6idifi4m3",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:15:49.664363Z",
+ "iopub.status.busy": "2026-05-28T22:15:49.663910Z",
+ "iopub.status.idle": "2026-05-28T22:15:55.921143Z",
+ "shell.execute_reply": "2026-05-28T22:15:55.920274Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Deleted existing agent 'Web Search Expert' (agt_web_search_expert_8ec0)\n",
- "Created agent 'Web Search Expert' (key: agt_web_search_expert_5f51)\n"
+ "Deleted existing agent 'Web Search Expert' (agt_web_search_expert_5f51)\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Created agent 'Web Search Expert' (key: agt_web_search_expert_e837)\n"
]
}
],
@@ -433,15 +487,34 @@
"cell_type": "code",
"execution_count": 6,
"id": "e3umya5qan8",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:15:55.924700Z",
+ "iopub.status.busy": "2026-05-28T22:15:55.924511Z",
+ "iopub.status.idle": "2026-05-28T22:16:03.087599Z",
+ "shell.execute_reply": "2026-05-28T22:16:03.086023Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Deleted orchestrator agent: agt_ai_research_orchestrator_f197\n",
- "Deleted existing tool 'vectara_api_validator' (tol_6328)\n",
- "Created tool 'vectara_api_validator' (id: tol_6329)\n"
+ "Deleted orchestrator agent: agt_ai_research_orchestrator_1378\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Deleted existing tool 'vectara_api_validator' (tol_6329)\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Created tool 'vectara_api_validator' (id: tol_6922)\n"
]
}
],
@@ -627,13 +700,20 @@
"cell_type": "code",
"execution_count": 7,
"id": "cell-14",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:16:03.092693Z",
+ "iopub.status.busy": "2026-05-28T22:16:03.092329Z",
+ "iopub.status.idle": "2026-05-28T22:16:11.981468Z",
+ "shell.execute_reply": "2026-05-28T22:16:11.980497Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Created agent 'AI Research Orchestrator' (key: agt_ai_research_orchestrator_1378)\n"
+ "Created agent 'AI Research Orchestrator' (key: agt_ai_research_orchestrator_139c)\n"
]
}
],
@@ -724,7 +804,14 @@
"cell_type": "code",
"execution_count": 8,
"id": "cell-16",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:16:11.986123Z",
+ "iopub.status.busy": "2026-05-28T22:16:11.985446Z",
+ "iopub.status.idle": "2026-05-28T22:16:11.995891Z",
+ "shell.execute_reply": "2026-05-28T22:16:11.995399Z"
+ }
+ },
"outputs": [],
"source": [
"# Helper function to send messages and display responses\n",
@@ -794,13 +881,20 @@
"cell_type": "code",
"execution_count": 9,
"id": "cell-17",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:16:11.998508Z",
+ "iopub.status.busy": "2026-05-28T22:16:11.998252Z",
+ "iopub.status.idle": "2026-05-28T22:16:12.203504Z",
+ "shell.execute_reply": "2026-05-28T22:16:12.202169Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Session Created: ase_orchestrator_session_20260506-061800_83e8\n"
+ "Session Created: ase_orchestrator_session_20260528-151611_7856\n"
]
}
],
@@ -838,7 +932,14 @@
"cell_type": "code",
"execution_count": 10,
"id": "cell-19",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:16:12.209156Z",
+ "iopub.status.busy": "2026-05-28T22:16:12.208940Z",
+ "iopub.status.idle": "2026-05-28T22:16:27.195718Z",
+ "shell.execute_reply": "2026-05-28T22:16:27.194654Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -846,39 +947,56 @@
"text": [
"User: What are the key innovations in RAG?\n",
"\n",
- "================================================================================\n",
+ "================================================================================\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
"\n",
"------ Agent Events ------\n",
"Event: input_message\n",
"Event: tool_input\n",
" Tool: research_analyst\n",
- " Input: Research the latest academic findings and theoretical advancements in Retrieval-Augmented Generation (RAG) models. Focus on key innovations and breakthroughs that have been identified in recent studies....\n",
+ " Input: Identify the latest academic insights and theoretical advancements in Retrieval-Augmented Generation (RAG) models, focusing on innovations and recent findings....\n",
"Event: tool_input\n",
" Tool: web_search_expert\n",
- " Input: Find current news, articles, and blogs that discuss recent innovations in Retrieval-Augmented Generation (RAG) technologies and models, highlighting key advancements and trends....\n",
+ " Input: Search for the latest trends, innovations, and advancements in Retrieval-Augmented Generation (RAG) models, including recent news, updates from conferences, and industry reports....\n",
"Event: tool_output\n",
" Tool: research_analyst\n",
- " Response (200 chars): I cannot retrieve the latest research on Retrieval-Augmented Generation (RAG) models at the moment due to a technical issue. Please try again later or specify another query....\n",
+ " Response (200 chars): I encountered an issue while attempting to retrieve the latest advancements in Retrieval-Augmented Generation (RAG) models from my available resources. If you have access to specific research database...\n",
"Event: tool_output\n",
" Tool: web_search_expert\n",
- " Response (200 chars): Recent advancements in Retrieval-Augmented Generation (RAG) technologies have focused on enhancing the integration of large language models (LLMs) with reliable retrieval methods to provide domain-spe...\n",
+ " Response (200 chars): Retrieval-Augmented Generation (RAG) models have seen significant advancements and innovations in 2023. Here's a summary of the latest trends and innovations:\n",
+ "\n",
+ "1. **Integration of Large Language Model...\n",
"Event: agent_output\n",
"-------------------------\n",
"\n",
"\n",
"Agent Response:\n",
"\n",
- "Recent advancements in Retrieval-Augmented Generation (RAG) technologies have been notable for several key innovations:\n",
+ "Retrieval-Augmented Generation (RAG) models have recently witnessed several key innovations and advancements, both in academic research and industry practice. Here are some highlights from the latest findings:\n",
"\n",
- "1. **Improved Retrieval Accuracy**: Enhancing the precision of information retrieval has been a major focus, allowing large language models (LLMs) to generate more accurate and contextually relevant content. This involves utilizing more sophisticated techniques to retrieve domain-specific data.\n",
+ "1. **Integration of Large Language Models (LLMs)**:\n",
+ " - RAG models now successfully integrate large language models with retrieval systems to enhance response accuracy and relevancy by including real-world context and current data. This integration allows for custom dataset utilization, which significantly enhances their adaptability and usefulness across diverse domains.\n",
"\n",
- "2. **Integration of Autonomous AI Agents**: A significant trend is the incorporation of autonomous AI agents into RAG systems, known as Agentic Retrieval-Augmented Generation (Agentic RAG). These agents help in autonomously deciding what information is needed and accessing it, improving the contextual accuracy of generated outputs [source](https://www.linkedin.com/pulse/retrieval-augmented-generation-rag-comprehensive-analysis-janvier-ienoe).\n",
+ "2. **Market Growth**:\n",
+ " - There is a marked increase in the market for RAG technologies, with projections indicating growth from USD 1.94 billion in 2025 to USD 9.86 billion by 2030, at a compound annual growth rate (CAGR) of 38.4%. This reflects a broadening interest and adoption of RAG technologies across various sectors.\n",
"\n",
- "3. **Technological Advancements**: Current developments have addressed traditional RAG limitations by improving architectures and processing speeds, making models more efficient. These improvements facilitate the broader application of RAG across different fields [source](https://arxiv.org/abs/2410.12837).\n",
+ "3. **Applications in Industry**:\n",
+ " - RAG models offer significant potential in fields that require rapid data adaptation, like medical research. They can continuously update with the latest findings, thereby preventing obsolescence in rapidly evolving disciplines.\n",
"\n",
- "4. **Application in Various Domains**: RAG technology is increasingly tailored for specific applications, such as education and enterprise solutions, highlighting its importance in delivering precise and relevant data where needed [source](https://www.sciencedirect.com/science/article/pii/S2666920X25000578).\n",
+ "4. **Technological Enhancements**:\n",
+ " - Current innovations have focused on enhancing retrieval mechanisms and integrating generative models, thereby producing more contextually relevant and data-driven responses. This enhancement is crucial for the improved accuracy and usability of language models in various applications.\n",
"\n",
- "These innovations reflect an ongoing effort to optimize the use of retrieved information in generative models, enhancing their practical value in real-world applications. For more detailed analyses, see articles from platforms like [MofoTech](https://mofotech.mofo.com/topics/retrieval-augmented-generation-rag-to-generate-richer-outputs) and [Medium](https://medium.com/nyu-ds-review/recent-evolution-of-rag-1e132df9fb36).\n"
+ "5. **Research and Analysis**:\n",
+ " - Recent research underscores the potential of RAG models across multiple fields, emphasizing the critical role of integrating external knowledge bases to optimize generative AI models.\n",
+ "\n",
+ "These developments highlight the dynamic and transformative potential of RAG models in accessing and utilizing up-to-date information, thereby contributing significantly to advancing AI technologies across various sectors. \n",
+ "\n",
+ "For further detailed information, you may refer to articles and studies from sources such as [Medium](https://medium.com/emalpha/innovations-in-retrieval-augmented-generation-8e6e70f95629), [LinkedIn](https://www.linkedin.com/pulse/retrieval-augmented-generation-rag-comprehensive-analysis-janvier-ienoe), and market research reports, which delve deeper into these advancements.\n"
]
}
],
@@ -911,7 +1029,14 @@
"cell_type": "code",
"execution_count": 11,
"id": "cell-23",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:16:27.199557Z",
+ "iopub.status.busy": "2026-05-28T22:16:27.198927Z",
+ "iopub.status.idle": "2026-05-28T22:16:49.431979Z",
+ "shell.execute_reply": "2026-05-28T22:16:49.431249Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -919,16 +1044,22 @@
"text": [
"User: Give me working code to search a corpus for AI research papers using hybrid search with Vectara's API.\n",
"\n",
- "================================================================================\n",
+ "================================================================================\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
"\n",
"------ Agent Events ------\n",
"Event: input_message\n",
"Event: tool_input\n",
" Tool: docs_expert\n",
- " Input: Provide a code example for using the Vectara API to perform a hybrid search in a corpus specifically tailored for AI research papers. Ensure the code is compatible with Vectara's v2 API and includes proper formats for the request payload....\n",
+ " Input: Provide a complete example of a JSON payload and code to perform a hybrid search on a corpus for AI research papers using Vectara's v2 API. Ensure the documentation details how to structure the request properly, and remember that the code must be validated for v2 API specifications....\n",
"Event: tool_output\n",
" Tool: docs_expert\n",
- " Response (200 chars): To perform a hybrid search using the Vectara v2 API for AI research papers, you can configure your search query to blend semantic and lexical retrieval modes. Here's a code example demonstrating this ...\n",
+ " Response (200 chars): I couldn't find a pre-existing valid JSON payload example specifically for hybrid search with the Vectara v2 API. However, I can craft a typical example for you, focusing on a hybrid search approach i...\n",
"Event: tool_input\n",
" Tool: api_validator\n",
" Validating endpoint: search\n",
@@ -940,62 +1071,111 @@
"\n",
"Agent Response:\n",
"\n",
- "Here's a validated code example to perform a hybrid search using Vectara's v2 API for AI research papers. This setup combines semantic and lexical search to provide comprehensive results:\n",
+ "Here is a working example of how to perform a hybrid search using Vectara's v2 API for AI research papers. This example includes a JSON payload and Python code for making the HTTP request. The code has been validated to ensure it adheres to the v2 API specifications.\n",
+ "\n",
+ "### JSON Payload for Hybrid Search\n",
+ "\n",
+ "```json\n",
+ "{\n",
+ " \"query\": \"AI research papers\",\n",
+ " \"search\": {\n",
+ " \"corpora\": [\n",
+ " {\n",
+ " \"corpus_key\": \"your-corpus-key\",\n",
+ " \"lexical_interpolation\": 0.5,\n",
+ " \"semantics\": \"default\"\n",
+ " }\n",
+ " ],\n",
+ " \"offset\": 0,\n",
+ " \"limit\": 10,\n",
+ " \"context_configuration\": {\n",
+ " \"sentences_before\": 1,\n",
+ " \"sentences_after\": 1\n",
+ " },\n",
+ " \"reranker\": {\n",
+ " \"type\": \"chain\",\n",
+ " \"rerankers\": [\n",
+ " {\n",
+ " \"type\": \"mmr\",\n",
+ " \"diversity_bias\": 0.1\n",
+ " }\n",
+ " ]\n",
+ " },\n",
+ " \"metadata_filters\": [\n",
+ " {\n",
+ " \"key\": \"field\",\n",
+ " \"value\": \"ai-research\"\n",
+ " }\n",
+ " ]\n",
+ " },\n",
+ " \"generation\": {\n",
+ " \"enabled\": false\n",
+ " },\n",
+ " \"save_history\": false\n",
+ "}\n",
+ "```\n",
+ "\n",
+ "### Python Code for Making the API Request\n",
"\n",
"```python\n",
"import requests\n",
+ "import json\n",
"\n",
- "# Define the Vectara API endpoint for searching\n",
"url = \"https://api.vectara.io/v2/search\"\n",
- "\n",
- "# Construct the headers with your API key\n",
"headers = {\n",
- " \"Authorization\": \"Bearer YOUR_API_KEY\",\n",
- " \"Content-Type\": \"application/json\"\n",
+ " \"Content-Type\": \"application/json\",\n",
+ " \"Authorization\": \"Bearer YOUR_API_KEY\"\n",
"}\n",
"\n",
- "# Define the payload for the hybrid search\n",
"payload = {\n",
- " \"query\": {\n",
- " \"query\": \"AI research papers\",\n",
- " \"c\": [\n",
+ " \"query\": \"AI research papers\",\n",
+ " \"search\": {\n",
+ " \"corpora\": [\n",
" {\n",
- " \"corpus_key\": \"your_corpus_key\",\n",
- " \"lexical_interpolation\": 0.025, # 2.5% lexical interpolation\n",
+ " \"corpus_key\": \"your-corpus-key\",\n",
+ " \"lexical_interpolation\": 0.5,\n",
" \"semantics\": \"default\"\n",
" }\n",
" ],\n",
" \"offset\": 0,\n",
- " \"limit\": 10, # Limit to top 10 results\n",
- " \"context_config\": {\n",
- " \"sentences_before\": 2,\n",
- " \"sentences_after\": 2\n",
+ " \"limit\": 10,\n",
+ " \"context_configuration\": {\n",
+ " \"sentences_before\": 1,\n",
+ " \"sentences_after\": 1\n",
" },\n",
" \"reranker\": {\n",
- " \"type\": \"mmr\", # Use Maximal Marginal Relevance for re-ranking\n",
- " \"diversity_bias\": 0.05 # Small diversity bias\n",
- " }\n",
- " }\n",
+ " \"type\": \"chain\",\n",
+ " \"rerankers\": [\n",
+ " {\n",
+ " \"type\": \"mmr\",\n",
+ " \"diversity_bias\": 0.1\n",
+ " }\n",
+ " ]\n",
+ " },\n",
+ " \"metadata_filters\": [\n",
+ " {\n",
+ " \"key\": \"field\",\n",
+ " \"value\": \"ai-research\"\n",
+ " }\n",
+ " ]\n",
+ " },\n",
+ " \"generation\": {\n",
+ " \"enabled\": false\n",
+ " },\n",
+ " \"save_history\": false\n",
"}\n",
"\n",
- "# Send the search request\n",
- "response = requests.post(url, json=payload, headers=headers)\n",
+ "response = requests.post(url, headers=headers, data=json.dumps(payload))\n",
"\n",
- "# Check the response status and handle errors if any\n",
- "if response.status_code == 200:\n",
- " search_results = response.json()\n",
- " print(search_results)\n",
- "else:\n",
- " print(f\"Error: {response.status_code}, {response.text}\")\n",
+ "print(response.json())\n",
"```\n",
"\n",
- "### Key Configuration Options\n",
- "\n",
- "- **`lexical_interpolation`**: Controls the balance between semantic and lexical search. A lower value (0.025) emphasizes semantic search with a touch of lexical matching.\n",
- "- **`reranker`**: The `mmr` (Maximal Marginal Relevance) helps balance relevance and diversity.\n",
- "- **`context_config`**: Defines the number of sentences to show before and after each match.\n",
+ "### Explanation\n",
+ "- **`corpus_key`**: Replace with your specific corpus identifier.\n",
+ "- **`lexical_interpolation`**: Adjusts the balance between semantic and lexical search.\n",
+ "- **`metadata_filters`**: Ensures the search is filtered to include only AI research papers.\n",
"\n",
- "Replace `\"YOUR_API_KEY\"` and `\"your_corpus_key\"` with your actual API key and the specific corpus key for AI research papers. This code will give a mix of semantic and lexical search results tailored for AI research topics. For more configurations, you can refer to the [Vectara Documentation](https://docs.vectara.com).\n"
+ "Ensure you replace `\"YOUR_API_KEY\"` and `\"your-corpus-key\"` with actual values from your access credentials. This setup allows you to perform a hybrid search effectively, using both semantic and lexical components for enhanced search capabilities.\n"
]
}
],
@@ -1028,7 +1208,14 @@
"cell_type": "code",
"execution_count": 12,
"id": "7elqfu25p1k",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:16:49.434619Z",
+ "iopub.status.busy": "2026-05-28T22:16:49.434398Z",
+ "iopub.status.idle": "2026-05-28T22:17:02.540953Z",
+ "shell.execute_reply": "2026-05-28T22:17:02.540421Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -1036,35 +1223,45 @@
"text": [
"User: What are the latest developments in RAG technology in 2025? Are there any new techniques or frameworks that have emerged recently?\n",
"\n",
- "================================================================================\n",
+ "================================================================================\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
"\n",
"------ Agent Events ------\n",
"Event: input_message\n",
"Event: tool_input\n",
" Tool: web_search_expert\n",
- " Input: Search for the most recent developments in Retrieval-Augmented Generation (RAG) technology as of 2025. Specifically, look for new techniques, frameworks, or innovations that have emerged recently in this field....\n",
+ " Input: Identify the most recent developments in Retrieval-Augmented Generation (RAG) technology up to the year 2025, focusing on any newly emerged techniques, frameworks, or notable innovations in the field....\n",
"Event: tool_output\n",
" Tool: web_search_expert\n",
- " Response (200 chars): As of 2025, Retrieval-Augmented Generation (RAG) technology has seen significant advancements. Here are some of the most recent developments in this field:\n",
+ " Response (200 chars): Recent developments in Retrieval-Augmented Generation (RAG) technology highlight its transformative impact and rapid growth in various industries by 2025. Here are some of the latest advancements:\n",
"\n",
- "1. **Adaptive Retrieval Mechanisms**: New t...\n",
+ "1....\n",
"Event: agent_output\n",
"-------------------------\n",
"\n",
"\n",
"Agent Response:\n",
"\n",
- "As of 2025, Retrieval-Augmented Generation (RAG) technology has undergone significant advancements, introducing new techniques and frameworks that further enhance its capabilities:\n",
+ "As of 2025, Retrieval-Augmented Generation (RAG) technology has seen several notable advancements that highlight its increasing impact and adoption across various industries. Here are some of the latest developments:\n",
"\n",
- "1. **Adaptive Retrieval Mechanisms**: Newly developed techniques focus on adaptive retrieval that can dynamically adjust based on user intent and context. This approach significantly improves the precision and relevance of the information retrieved, offering a more tailored user experience [source](https://www.chitika.com/retrieval-augmented-generation-rag-the-definitive-guide-2025/).\n",
+ "1. **Market Growth and Adoption**:\n",
+ " - The RAG market is projected to grow substantially, from USD 1.94 billion in 2025 to USD 9.86 billion by 2030, translating to a compound annual growth rate (CAGR) of 38.4%. This significant growth is mainly driven by adoption in key sectors such as healthcare, finance, and e-commerce, where RAG enhances output accuracy and context-awareness. It powers applications like real-time chatbots, semantic search engines, and personalized knowledge retrieval systems, illustrating its versatility and effectiveness across different use cases ([MarketsandMarkets](https://www.marketsandmarkets.com/Market-Reports/retrieval-augmented-generation-rag-market-135976317.html)).\n",
"\n",
- "2. **Agentic RAG**: Recent innovations have introduced Agentic RAG, which enables models to display planning and decision-making abilities. This allows RAG systems to function more autonomously, moving beyond rigid, pre-defined paths [source](https://ai.plainenglish.io/rag-in-2025-the-new-evolution-of-retrieval-augmented-generation-with-real-world-examples-12347636b6fe).\n",
+ "2. **Innovations in RAG Architectures**:\n",
+ " - New architectures, such as Multi-RAG systems, are emerging, designed specifically for multimodal applications like adaptive video understanding. These advancements suggest significant progress in processing and generating content from diverse data types, enhancing the AI's ability to interact with multiple modalities ([LinkedIn Article](https://www.linkedin.com/pulse/retrieval-augmented-generation-rag-comprehensive-analysis-janvier-ienoe)).\n",
"\n",
- "3. **Real-time Knowledge Graphs**: The integration of real-time knowledge graphs has become a significant component in RAG technology. This development helps in presenting up-to-date and contextually relevant data within the generation process, further refining the accuracy and applicability of the content generated [source](https://www.linkedin.com/pulse/state-retrieval-augmented-generation-2025-why-we-hern%C3%A1ndez-guti%C3%A9rrez-rnq0c).\n",
+ "3. **Technical Evolutions**:\n",
+ " - Technical advancements in RAG involve sophisticated integration mechanisms that allow retrieval components to work seamlessly with generative models. This integration taps into vast external databases, enhancing the capabilities of AI systems in tasks such as chatbots and content generation, making the systems more efficient and adaptable ([Glean](https://www.glean.com/blog/rag-retrieval-augmented-generation)).\n",
"\n",
- "4. **Multimodal Retrieval**: Progress has also been made in expanding RAG to incorporate multimodal retrieval, which combines textual data with images and other forms of data. This broadens the scope and accuracy of information that can be generated, thus improving the systems' ability to serve complex queries [source](https://www.linkedin.com/pulse/state-retrieval-augmented-generation-2025-why-we-hern%C3%A1ndez-guti%C3%A9rrez-rnq0c).\n",
+ "4. **Security Enhancements**:\n",
+ " - As RAG systems mature, ensuring secure data access and maintaining user privacy is a growing priority. New implementations are being developed that include robust user permissions and protect data integrity while utilizing RAG technology, thereby addressing privacy concerns effectively ([LinkedIn Article](https://www.linkedin.com/pulse/retrieval-augmented-generation-rag-comprehensive-analysis-janvier-ienoe)).\n",
"\n",
- "These advancements reflect a substantial enhancement in RAG systems' capabilities, demonstrating their increased utility and application across diverse industries.\n"
+ "These advancements highlight a trend toward developing more integrated, secure, and versatile AI systems. By leveraging extensive external knowledge, RAG technology is poised to further enhance human-AI interactions and provide more value across various applications.\n"
]
}
],
diff --git a/notebooks/api-examples/6-artifacts.ipynb b/notebooks/api-examples/7-artifacts.ipynb
similarity index 77%
rename from notebooks/api-examples/6-artifacts.ipynb
rename to notebooks/api-examples/7-artifacts.ipynb
index e461941..7c9a567 100644
--- a/notebooks/api-examples/6-artifacts.ipynb
+++ b/notebooks/api-examples/7-artifacts.ipynb
@@ -5,7 +5,7 @@
"id": "cell-0",
"metadata": {},
"source": [
- "
"
+ "
"
]
},
{
@@ -47,12 +47,13 @@
"source": [
"## Getting Started\n",
"\n",
- "This notebook assumes you've completed Notebooks 1-5:\n",
+ "This notebook assumes you've completed Notebooks 1-6:\n",
"- Notebook 1: Created corpora\n",
"- Notebook 2: Ingested data\n",
- "- Notebook 3: Queried data\n",
- "- Notebook 4: Created agents and sessions\n",
- "- Notebook 5: Built multi-agent workflows with sub-agents\n",
+ "- Notebook 3: Deleted documents from a corpus\n",
+ "- Notebook 4: Queried data\n",
+ "- Notebook 5: Created agents and sessions\n",
+ "- Notebook 6: Built multi-agent workflows with sub-agents\n",
"\n",
"Now we'll extend agent capabilities by working with file artifacts."
]
@@ -69,7 +70,14 @@
"cell_type": "code",
"execution_count": 1,
"id": "64a7d951-e1aa-4e3d-b97a-8d32baf97715",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:17:05.156618Z",
+ "iopub.status.busy": "2026-05-28T22:17:05.156360Z",
+ "iopub.status.idle": "2026-05-28T22:17:05.160961Z",
+ "shell.execute_reply": "2026-05-28T22:17:05.160431Z"
+ }
+ },
"outputs": [],
"source": [
"# No additional dependencies required beyond requests"
@@ -79,7 +87,14 @@
"cell_type": "code",
"execution_count": 2,
"id": "cell-6",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:17:05.163110Z",
+ "iopub.status.busy": "2026-05-28T22:17:05.162916Z",
+ "iopub.status.idle": "2026-05-28T22:17:05.219013Z",
+ "shell.execute_reply": "2026-05-28T22:17:05.218776Z"
+ }
+ },
"outputs": [],
"source": [
"import os\n",
@@ -129,7 +144,14 @@
"cell_type": "code",
"execution_count": 3,
"id": "cell-8",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:17:05.220487Z",
+ "iopub.status.busy": "2026-05-28T22:17:05.220363Z",
+ "iopub.status.idle": "2026-05-28T22:17:05.222748Z",
+ "shell.execute_reply": "2026-05-28T22:17:05.222542Z"
+ }
+ },
"outputs": [],
"source": [
"# Load the shared helpers (delete_and_create_agent / delete_and_create_tool).\n",
@@ -153,13 +175,20 @@
"cell_type": "code",
"execution_count": 4,
"id": "cell-9",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:17:05.223790Z",
+ "iopub.status.busy": "2026-05-28T22:17:05.223697Z",
+ "iopub.status.idle": "2026-05-28T22:17:09.035396Z",
+ "shell.execute_reply": "2026-05-28T22:17:09.034770Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Created agent 'Document Analyst' (key: agt_document_analyst_c291)\n"
+ "Created agent 'Document Analyst' (key: agt_document_analyst_0cf2)\n"
]
}
],
@@ -217,14 +246,21 @@
"cell_type": "code",
"execution_count": 5,
"id": "cell-11",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:17:09.039109Z",
+ "iopub.status.busy": "2026-05-28T22:17:09.038803Z",
+ "iopub.status.idle": "2026-05-28T22:17:09.314719Z",
+ "shell.execute_reply": "2026-05-28T22:17:09.313865Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "✓ Session Created: ase_artifact_demo_20260506-062049_8113\n",
- " Session Name: Artifact Demo 20260506-062049\n"
+ "✓ Session Created: ase_artifact_demo_20260528-151709_0f3c\n",
+ " Session Name: Artifact Demo 20260528-151709\n"
]
}
],
@@ -276,7 +312,14 @@
"cell_type": "code",
"execution_count": 6,
"id": "cell-13",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:17:09.318206Z",
+ "iopub.status.busy": "2026-05-28T22:17:09.317759Z",
+ "iopub.status.idle": "2026-05-28T22:17:09.322430Z",
+ "shell.execute_reply": "2026-05-28T22:17:09.321977Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -332,7 +375,14 @@
"cell_type": "code",
"execution_count": 7,
"id": "cell-14",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:17:09.325127Z",
+ "iopub.status.busy": "2026-05-28T22:17:09.324840Z",
+ "iopub.status.idle": "2026-05-28T22:17:14.076253Z",
+ "shell.execute_reply": "2026-05-28T22:17:14.075070Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -342,43 +392,45 @@
"{\n",
" \"events\": [\n",
" {\n",
- " \"id\": \"aev_232fb1fc-7812-461c-a426-dbba70de1c6c\",\n",
- " \"session_key\": \"ase_artifact_demo_20260506-062049_8113\",\n",
- " \"created_at\": \"2026-05-06T13:20:49.844Z\",\n",
+ " \"id\": \"aev_b4b09e35-9165-4155-9433-f1b5384e7fcf\",\n",
+ " \"session_key\": \"ase_artifact_demo_20260528-151709_0f3c\",\n",
+ " \"created_at\": \"2026-05-28T22:17:09.627Z\",\n",
" \"type\": \"artifact_upload\",\n",
" \"artifacts\": [\n",
" {\n",
- " \"artifact_id\": \"art_fresh_rotating_gouda\",\n",
+ " \"artifact_id\": \"art_ring_agate_bearskin\",\n",
" \"filename\": \"q3_sales_report.md\",\n",
" \"mime_type\": \"text/markdown\",\n",
" \"size_bytes\": 774\n",
" }\n",
- " ]\n",
+ " ],\n",
+ " \"agent_upload_message\": \"A file has been uploaded to your workspace:\\n\\n- **q3_sales_report.md** (text/markdown, 774 B) \\u2014 artifact_id: `art_ring_agate_bearskin`\\n\\nUse tools to access the full content if needed.\"\n",
" },\n",
" {\n",
- " \"id\": \"aev_2ada433d-6fb3-471a-8538-f0bfe22e8435\",\n",
- " \"session_key\": \"ase_artifact_demo_20260506-062049_8113\",\n",
- " \"created_at\": \"2026-05-06T13:20:50.894Z\",\n",
+ " \"id\": \"aev_615e7811-6d3a-4b84-baf8-9884cd970d86\",\n",
+ " \"session_key\": \"ase_artifact_demo_20260528-151709_0f3c\",\n",
+ " \"created_at\": \"2026-05-28T22:17:10.101Z\",\n",
" \"type\": \"tool_input\",\n",
- " \"tool_call_id\": \"call_bYVo4mDGvw5ujNQG4KuWeSGb\",\n",
+ " \"tool_call_id\": \"call_cnrfvzrMKP3qzsDhbXTyN5WJ\",\n",
" \"tool_configuration_name\": \"artifact_read\",\n",
" \"tool_name\": \"artifact_read\",\n",
" \"tool_input\": {\n",
- " \"artifact_id\": \"art_fresh_rotating_gouda\",\n",
+ " \"artifact_id\": \"art_ring_agate_bearskin\",\n",
" \"encoding\": \"raw\"\n",
" },\n",
- " \"tool_type\": \"artifact_read\"\n",
+ " \"tool_type\": \"artifact_read\",\n",
+ " \"argument_override_paths\": []\n",
" },\n",
" {\n",
- " \"id\": \"aev_3462fbc1-47a9-423a-b70c-4a11ba825b00\",\n",
- " \"session_key\": \"ase_artifact_demo_20260506-062049_8113\",\n",
- " \"created_at\": \"2026-05-06T13:20:51.039Z\",\n",
+ " \"id\": \"aev_3962bbe1-fa42-44e7-b375-0d575d0ffe2b\",\n",
+ " \"session_key\": \"ase_artifact_demo_20260528-151709_0f3c\",\n",
+ " \"created_at\": \"2026-05-28T22:17:10.157Z\",\n",
" \"type\": \"tool_output\",\n",
- " \"tool_call_id\": \"call_bYVo4mDGvw5ujNQG4KuWeSGb\",\n",
+ " \"tool_call_id\": \"call_cnrfvzrMKP3qzsDhbXTyN5WJ\",\n",
" \"tool_configuration_name\": \"artifact_read\",\n",
" \"tool_name\": \"artifact_read\",\n",
" \"tool_output\": {\n",
- " \"artifact_id\": \"art_fresh_rotating_gouda\",\n",
+ " \"artifact_id\": \"art_ring_agate_bearskin\",\n",
" \"content\": \"# Quarterly Sales Report - Q3 2024\\n\\n## Executive Summary\\nQ3 2024 showed strong growth across all product lines with total revenue of $4.2M,\\nrepresenting a 23% increase over Q2 2024.\\n\\n## Key Metrics\\n- Total Revenue: $4,200,000\\n- New Customers: 127\\n- Customer Retention Rate: 94%\\n- Average Deal Size: $33,071\\n\\n## Product Performance\\n1. Enterprise Suite: $2.1M (50% of revenue)\\n2. Professional Plan: $1.3M (31% of revenue)\\n3. Starter Plan: $800K (19% of revenue)\\n\\n## Regional Breakdown\\n- North America: 45% ($1.89M)\\n- Europe: 30% ($1.26M)\\n- Asia Pacific: 25% ($1.05M)\\n\\n## Q4 Outlook\\nBased on current pipeline, we project Q4 revenue of $4.8-5.2M.\\nKey initiatives include:\\n- Launch of new AI features\\n- Expansion into Latin American markets\\n- Enterprise customer success program\\n\",\n",
" \"filename\": \"q3_sales_report.md\",\n",
" \"mime_type\": \"text/markdown\",\n",
@@ -391,17 +443,17 @@
" \"error\": false\n",
" },\n",
" {\n",
- " \"id\": \"aev_8872d035-6956-4f50-8311-82c9d4c695ae\",\n",
- " \"session_key\": \"ase_artifact_demo_20260506-062049_8113\",\n",
- " \"created_at\": \"2026-05-06T13:20:52.693Z\",\n",
+ " \"id\": \"aev_e2a54229-6af4-485a-b6ce-9e0501489e03\",\n",
+ " \"session_key\": \"ase_artifact_demo_20260528-151709_0f3c\",\n",
+ " \"created_at\": \"2026-05-28T22:17:14.073Z\",\n",
" \"type\": \"agent_output\",\n",
- " \"content\": \"The content of the \\\"q3_sales_report.md\\\" file is as follows:\\n\\n# Quarterly Sales Report - Q3 2024\\n\\n## Executive Summary\\nQ3 2024 showed strong growth across all product lines with a total revenue of $4.2M, representing a 23% increase over Q2 2024.\\n\\n## Key Metrics\\n- Total Revenue: $4,200,000\\n- New Customers: 127\\n- Customer Retention Rate: 94%\\n- Average Deal Size: $33,071\\n\\n## Product Performance\\n1. Enterprise Suite: $2.1M (50% of revenue)\\n2. Professional Plan: $1.3M (31% of revenue)\\n3. Starter Plan: $800K (19% of revenue)\\n\\n## Regional Breakdown\\n- North America: 45% ($1.89M)\\n- Europe: 30% ($1.26M)\\n- Asia Pacific: 25% ($1.05M)\\n\\n## Q4 Outlook\\nBased on the current pipeline, we project Q4 revenue of $4.8-5.2M. Key initiatives include:\\n- Launch of new AI features\\n- Expansion into Latin American markets\\n- Enterprise customer success program\\n\\nThis report provides a comprehensive overview of the sales performance in Q3 2024, with detailed metrics, product performance, and regional sales details, alongside projections and strategic plans for Q4.\"\n",
+ " \"content\": \"The **Q3 Sales Report** provides a comprehensive overview of the financial performance for the third quarter of 2024. Here's a detailed summary of the report:\\n\\n### Executive Summary\\n- **Total Revenue:** $4.2 million, demonstrating a 23% increase over the previous quarter (Q2 2024).\\n\\n### Key Metrics\\n- **New Customers Acquired:** 127\\n- **Customer Retention Rate:** 94%\\n- **Average Deal Size:** $33,071\\n\\n### Product Performance\\n- **Enterprise Suite:** $2.1 million (50% of total revenue)\\n- **Professional Plan:** $1.3 million (31% of total revenue)\\n- **Starter Plan:** $800,000 (19% of total revenue)\\n\\n### Regional Breakdown\\n- **North America:** 45% of revenue, amounting to $1.89 million\\n- **Europe:** 30% of revenue, amounting to $1.26 million\\n- **Asia Pacific:** 25% of revenue, amounting to $1.05 million\\n\\n### Q4 Outlook\\n- Projected Q4 revenue ranges from $4.8 million to $5.2 million.\\n- Strategic initiatives include:\\n - Launching new AI features\\n - Expanding into Latin American markets\\n - Implementing a customer success program for enterprise clients\\n\\nThis report outlines strong growth patterns and provides insights into future expansions and initiatives, showing a continued positive outlook for the company's financial status into the next quarter.\"\n",
" }\n",
" ],\n",
- " \"session_key\": \"ase_artifact_demo_20260506-062049_8113\"\n",
+ " \"session_key\": \"ase_artifact_demo_20260528-151709_0f3c\"\n",
"}\n",
"\n",
- "Artifact ID: art_fresh_rotating_gouda\n"
+ "Artifact ID: art_ring_agate_bearskin\n"
]
}
],
@@ -456,7 +508,14 @@
"cell_type": "code",
"execution_count": 8,
"id": "cell-16",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:17:14.082468Z",
+ "iopub.status.busy": "2026-05-28T22:17:14.081981Z",
+ "iopub.status.idle": "2026-05-28T22:17:14.222283Z",
+ "shell.execute_reply": "2026-05-28T22:17:14.220685Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -466,7 +525,7 @@
"{\n",
" \"artifacts\": [\n",
" {\n",
- " \"artifact_id\": \"art_fresh_rotating_gouda\",\n",
+ " \"artifact_id\": \"art_ring_agate_bearskin\",\n",
" \"filename\": \"q3_sales_report.md\",\n",
" \"mime_type\": \"text/markdown\",\n",
" \"size_bytes\": 774,\n",
@@ -475,12 +534,12 @@
" \"original_filename\": \"q3_sales_report.md\"\n",
" },\n",
" \"ttl_days\": 30,\n",
- " \"created_at\": \"2026-05-06T13:20:49.685Z\",\n",
- " \"updated_at\": \"2026-05-06T13:20:49.685Z\"\n",
+ " \"created_at\": \"2026-05-28T22:17:09.500Z\",\n",
+ " \"updated_at\": \"2026-05-28T22:17:09.500Z\"\n",
" }\n",
" ],\n",
" \"metadata\": {\n",
- " \"page_key\": \"AlIaqMP9vVVs2Ns8Bf3A89ZVScfUHvmowoTDlmx1ZIazD_RR37XM4r_CwqpKBBYjOLZW6wjTbMPnt9iYiXThSaXjmph6yJOSTzpPHxgDQ7prAUffK_h61ITYte-V0ppzjhNmCm_IR19ld1IiY9bCpAMG2zr1hJbO4vq8Opj9\"\n",
+ " \"page_key\": \"N_MVNpdk3e7sG8s6RFvEiAiDr07TiRZPr5DR9bP1XCBzRifgHIqbyWXatCH_QNGcg4KBcw4l4CI0iIEL8qilzUkVqjPmV0ptx-4r_O6pts-OK-KLdD9aoJyeirOyFlDuQCw0EPSahIAZbBpmM8jnjJD-HghPB6LZJ9k9hNXR\"\n",
" }\n",
"}\n"
]
@@ -517,7 +576,14 @@
"cell_type": "code",
"execution_count": 9,
"id": "cell-20",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:17:14.227257Z",
+ "iopub.status.busy": "2026-05-28T22:17:14.226831Z",
+ "iopub.status.idle": "2026-05-28T22:17:14.234986Z",
+ "shell.execute_reply": "2026-05-28T22:17:14.234226Z"
+ }
+ },
"outputs": [],
"source": [
"# Helper function to chat with the agent\n",
@@ -566,7 +632,14 @@
"cell_type": "code",
"execution_count": 10,
"id": "cell-21",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:17:14.238240Z",
+ "iopub.status.busy": "2026-05-28T22:17:14.237901Z",
+ "iopub.status.idle": "2026-05-28T22:17:20.081639Z",
+ "shell.execute_reply": "2026-05-28T22:17:20.080586Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -575,7 +648,13 @@
"User: I've uploaded a Q3 sales report. Can you analyze it and tell me the key insights?\n",
"\n",
"================================================================================\n",
- "\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
"\n",
"------ Agent Events ------\n",
"Event: input_message\n",
@@ -584,30 +663,28 @@
"\n",
"Agent Response:\n",
"\n",
- "Based on the Q3 2024 Sales Report, here are the key insights:\n",
+ "The Q3 sales report provides several key insights into the company's performance for the third quarter of 2024:\n",
"\n",
- "1. **Strong Revenue Growth**: The total revenue for Q3 2024 was $4.2 million, marking a substantial increase of 23% over the previous quarter (Q2 2024). This indicates robust growth across all product lines.\n",
+ "### Financial Performance\n",
+ "- **Total Revenue for Q3:** $4.2 million, which is a significant 23% increase from Q2 2024. This indicates strong growth and effective sales strategies over the previous quarter.\n",
"\n",
- "2. **Key Metrics**:\n",
- " - The company acquired 127 new customers.\n",
- " - There was a high customer retention rate of 94%, suggesting strong customer satisfaction and loyalty.\n",
- " - The average deal size was $33,071, reflecting healthy transaction values.\n",
+ "### Customer Metrics\n",
+ "- **New Customers:** The company attracted 127 new customers, suggesting effective customer acquisition strategies.\n",
+ "- **Customer Retention Rate:** High at 94%, indicating strong customer satisfaction and loyalty.\n",
+ "- **Average Deal Size:** $33,071, reflecting the value of transactions and possibly a focus on higher-value customer segments.\n",
"\n",
- "3. **Product Performance**:\n",
- " - The Enterprise Suite was the top performer, contributing $2.1 million, which is 50% of the total revenue.\n",
- " - The Professional Plan generated $1.3 million, accounting for 31% of revenue.\n",
- " - The Starter Plan brought in $800K, which is 19% of the total revenue.\n",
+ "### Product Line Performance\n",
+ "- The **Enterprise Suite** generated $2.1 million, accounting for 50% of total revenue. This shows its strong positioning as the leading product offering.\n",
+ "- The **Professional Plan** followed with $1.3 million (31% of revenue), and the **Starter Plan** brought in $800,000 (19% of revenue), indicating that all product tiers are contributing significantly to the revenue stream.\n",
"\n",
- "4. **Geographical Insights**:\n",
- " - North America was the leading region, making up 45% of the total revenue with $1.89 million.\n",
- " - Europe contributed 30% ($1.26 million).\n",
- " - Asia Pacific accounted for 25% ($1.05 million).\n",
+ "### Geographic Revenue Distribution\n",
+ "- The company's revenue is well distributed across regions with **North America** leading at 45% ($1.89 million), followed by **Europe** at 30% ($1.26 million), and **Asia Pacific** at 25% ($1.05 million). This shows a strong global market presence.\n",
"\n",
- "5. **Q4 Outlook**:\n",
- " - The revenue for Q4 2024 is projected to be between $4.8 million and $5.2 million, based on the current sales pipeline.\n",
- " - Strategic initiatives include launching new AI features, expanding into Latin American markets, and implementing an enterprise customer success program.\n",
+ "### Future Outlook (Q4 Projections)\n",
+ "- The company anticipates revenue between $4.8 million and $5.2 million for Q4, suggesting confidence in continued growth.\n",
+ "- Key initiatives planned include launching new AI features, expanding into Latin American markets, and developing an enterprise customer success program, highlighting the company's focus on innovation, market expansion, and customer relationship enhancements.\n",
"\n",
- "These insights highlight strong all-around performance in Q3 2024 and a strategic focus for continued growth in Q4. The report showcases effective sales strategies, product performance across different regions, and a forward-looking plan aimed at sustaining momentum.\n"
+ "These insights provide a comprehensive view of the company's current strengths and strategic direction for future growth.\n"
]
}
],
@@ -641,7 +718,14 @@
"cell_type": "code",
"execution_count": 11,
"id": "cell-23",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:17:20.086570Z",
+ "iopub.status.busy": "2026-05-28T22:17:20.085945Z",
+ "iopub.status.idle": "2026-05-28T22:17:28.180483Z",
+ "shell.execute_reply": "2026-05-28T22:17:28.179434Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -650,7 +734,13 @@
"User: Based on the sales report, please create a one-page high level executive brief that I can share with the board. Save it as an artifact.\n",
"\n",
"================================================================================\n",
- "\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
"\n",
"------ Agent Events ------\n",
"Event: input_message\n",
@@ -664,43 +754,43 @@
"\n",
"Agent Response:\n",
"\n",
- "Here is a draft of the executive brief based on the Q3 2024 Sales Report:\n",
+ "Here's a high-level executive brief based on the Q3 sales report, which you can share with the board:\n",
"\n",
"---\n",
"\n",
- "**Executive Brief: Q3 2024 Performance Overview**\n",
+ "# Executive Brief: Q3 2024 Sales Performance\n",
"\n",
- "**Introduction**\n",
- "Q3 2024 marked an exceptional quarter for our organization, demonstrating strong sales and growth metrics across our product lines and geographies. Our strategies in customer acquisition and retention have yielded impressive results, positioning us favorably for future prospects.\n",
+ "## Overview\n",
+ "The third quarter of 2024 demonstrated remarkable growth and operational success. Total quarterly revenue reached $4.2 million, marking a substantial 23% increase over Q2 2024.\n",
"\n",
- "**Financial Highlights**\n",
- "- **Total Revenue**: Achieved $4.2 million, up by 23% quarter-over-quarter.\n",
- "- **New Customers**: Successfully added 127 new clients, enhancing our market penetration.\n",
- "- **Customer Retention**: Maintained a robust retention rate of 94%, highlighting customer satisfaction.\n",
- "- **Average Deal Size**: Recorded at $33,071, reflecting the value delivered to clients.\n",
+ "## Financial Highlights\n",
+ "- **Total Revenue:** $4.2 million\n",
+ "- **Revenue Growth:** 23% over Q2 2024\n",
+ "- **New Customers Acquired:** 127\n",
+ "- **Customer Retention Rate:** 94%\n",
+ "- **Average Deal Size:** $33,071\n",
"\n",
- "**Product Line Performance**\n",
- "- **Enterprise Suite**: Dominated with $2.1 million, contributing to 50% of total revenue.\n",
- "- **Professional Plan**: Secured $1.3 million, making up 31% of our revenue stream.\n",
- "- **Starter Plan**: Accounted for $800K, representing 19% of total revenues.\n",
+ "## Product Line Performance\n",
+ "- **Enterprise Suite:** Generated $2.1 million (50% of total revenue)\n",
+ "- **Professional Plan:** Generated $1.3 million (31% of total revenue)\n",
+ "- **Starter Plan:** Generated $800,000 (19% of total revenue)\n",
"\n",
- "**Regional Performance**\n",
- "- **North America**: Led with 45% of revenue, bringing in $1.89 million.\n",
- "- **Europe**: Contributed 30%, translating to $1.26 million.\n",
- "- **Asia Pacific**: Accounted for 25%, equivalent to $1.05 million.\n",
+ "## Regional Performance\n",
+ "- **North America:** Accounted for 45% of revenue, totaling $1.89 million\n",
+ "- **Europe:** Accounted for 30% of revenue, totaling $1.26 million\n",
+ "- **Asia Pacific:** Accounted for 25% of revenue, totaling $1.05 million\n",
"\n",
- "**Outlook for Q4 2024**\n",
- "Projections indicate revenue will fall between $4.8 and $5.2 million. Strategic initiatives include:\n",
- "- Launch of cutting-edge AI features to enhance product offerings.\n",
- "- Expansion into promising Latin American markets.\n",
- "- Strengthening our Enterprise Customer Success program to boost retention and satisfaction.\n",
+ "## Strategic Initiatives and Outlook\n",
+ "Looking ahead to Q4, expected revenue is between $4.8 million and $5.2 million. Key strategic initiatives include:\n",
+ "- **AI Features:** Launch of new AI-driven product enhancements\n",
+ "- **Market Expansion:** Focus on entering and establishing presence in Latin American markets\n",
+ "- **Customer Success:** Development of an enterprise-level customer success program\n",
"\n",
- "**Conclusion**\n",
- "Q3's results reflect our team's dedication and the efficacy of our business strategies. Moving forward, we are committed to further innovation and market expansion, ensuring sustained growth and shareholder value.\n",
+ "The continued execution of these strategies is expected to further strengthen our market position and drive sustained growth.\n",
"\n",
"---\n",
"\n",
- "I will now create this executive brief as a document file and save it as an artifact for you.\n"
+ "I will now save this brief as a document artifact for distribution.\n"
]
}
],
@@ -724,7 +814,14 @@
"cell_type": "code",
"execution_count": 12,
"id": "cell-24",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:17:28.184029Z",
+ "iopub.status.busy": "2026-05-28T22:17:28.183763Z",
+ "iopub.status.idle": "2026-05-28T22:17:28.333621Z",
+ "shell.execute_reply": "2026-05-28T22:17:28.332549Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -734,7 +831,7 @@
"{\n",
" \"artifacts\": [\n",
" {\n",
- " \"artifact_id\": \"art_poised_mesquite_cashew\",\n",
+ " \"artifact_id\": \"art_cultured_season_college\",\n",
" \"filename\": \"q3_sales_report.md.md\",\n",
" \"mime_type\": \"text/markdown\",\n",
" \"size_bytes\": 822,\n",
@@ -742,15 +839,15 @@
" \"metadata\": {\n",
" \"output_format\": \"markdown\",\n",
" \"original_format\": \"text/markdown\",\n",
- " \"original_artifact_id\": \"art_fresh_rotating_gouda\"\n",
+ " \"original_artifact_id\": \"art_ring_agate_bearskin\"\n",
" },\n",
- " \"description\": \"Executive Brief based on Q3 2024 Sales Report\",\n",
+ " \"description\": \"Executive Brief for Board - Q3 2024 Sales Performance\",\n",
" \"ttl_days\": 30,\n",
- " \"created_at\": \"2026-05-06T13:20:59.731Z\",\n",
- " \"updated_at\": \"2026-05-06T13:20:59.731Z\"\n",
+ " \"created_at\": \"2026-05-28T22:17:25.794Z\",\n",
+ " \"updated_at\": \"2026-05-28T22:17:25.794Z\"\n",
" },\n",
" {\n",
- " \"artifact_id\": \"art_fresh_rotating_gouda\",\n",
+ " \"artifact_id\": \"art_ring_agate_bearskin\",\n",
" \"filename\": \"q3_sales_report.md\",\n",
" \"mime_type\": \"text/markdown\",\n",
" \"size_bytes\": 774,\n",
@@ -759,12 +856,12 @@
" \"original_filename\": \"q3_sales_report.md\"\n",
" },\n",
" \"ttl_days\": 30,\n",
- " \"created_at\": \"2026-05-06T13:20:49.685Z\",\n",
- " \"updated_at\": \"2026-05-06T13:20:49.685Z\"\n",
+ " \"created_at\": \"2026-05-28T22:17:09.500Z\",\n",
+ " \"updated_at\": \"2026-05-28T22:17:09.500Z\"\n",
" }\n",
" ],\n",
" \"metadata\": {\n",
- " \"page_key\": \"njVgfnvXtKyTClFBzuMRM0F1F0ES2tXo5oRdNZSiCw_IjGw02BJTtI8ZQjphASWPcIkWr7_m9byQWtLpcRKRfblcdHchDNyM3pMFjAJgprhnkDRdLEMZKb2338Bt6H9QOfHFEJwA7QSfcPHUwupcbXHD98YDRVgMAG56m-zw\"\n",
+ " \"page_key\": \"sjmW6-KDIvFdz4wJY2m3lFFhFyQLjw73JrdksAURFMedKagz27Ku-cOmIz9PxqXyIFxoZooUtcdNmArU8o9Tsoz-Fg3JackA8jkx5cCp7_M_vtGRTsu4LHAJN89qMy731j-O2GCFLCYtNVfU4s5YI2wVUEqhV0pvBtovM9pD\"\n",
" }\n",
"}\n"
]
@@ -798,7 +895,14 @@
"cell_type": "code",
"execution_count": 13,
"id": "cell-26",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:17:28.337457Z",
+ "iopub.status.busy": "2026-05-28T22:17:28.336907Z",
+ "iopub.status.idle": "2026-05-28T22:17:31.424749Z",
+ "shell.execute_reply": "2026-05-28T22:17:31.423496Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -825,68 +929,70 @@
"{\n",
" \"events\": [\n",
" {\n",
- " \"id\": \"aev_0ec1954f-fc0e-4058-8f94-8acf5e5f2424\",\n",
- " \"session_key\": \"ase_artifact_demo_20260506-062049_8113\",\n",
- " \"created_at\": \"2026-05-06T13:21:02.158Z\",\n",
+ " \"id\": \"aev_8b7358f2-323f-471c-9635-896ac57737e3\",\n",
+ " \"session_key\": \"ase_artifact_demo_20260528-151709_0f3c\",\n",
+ " \"created_at\": \"2026-05-28T22:17:29.444Z\",\n",
" \"type\": \"artifact_upload\",\n",
" \"artifacts\": [\n",
" {\n",
- " \"artifact_id\": \"art_radial_spiky_era\",\n",
+ " \"artifact_id\": \"art_happy_plural_braid\",\n",
" \"filename\": \"revenue_chart.png\",\n",
" \"mime_type\": \"image/png\",\n",
" \"size_bytes\": 20604\n",
" }\n",
- " ]\n",
+ " ],\n",
+ " \"agent_upload_message\": \"A file has been uploaded to your workspace:\\n\\n- **revenue_chart.png** (image/png, 20.1 KiB) \\u2014 artifact_id: `art_happy_plural_braid`\\n\\nUse tools to access the full content if needed.\"\n",
" },\n",
" {\n",
- " \"id\": \"aev_49406fdc-7388-4986-a0dd-30f622c5ab70\",\n",
- " \"session_key\": \"ase_artifact_demo_20260506-062049_8113\",\n",
- " \"created_at\": \"2026-05-06T13:21:02.703Z\",\n",
+ " \"id\": \"aev_802fc986-8926-4ab6-9255-986efbcb9f86\",\n",
+ " \"session_key\": \"ase_artifact_demo_20260528-151709_0f3c\",\n",
+ " \"created_at\": \"2026-05-28T22:17:30.121Z\",\n",
" \"type\": \"tool_input\",\n",
- " \"tool_call_id\": \"call_KePfYESrPOTc4ShxWXnkxhcG\",\n",
+ " \"tool_call_id\": \"call_vMglyZsC1KEt8qflnlj6dPKO\",\n",
" \"tool_configuration_name\": \"image_read\",\n",
" \"tool_name\": \"image_read\",\n",
" \"tool_input\": {\n",
- " \"artifact_id\": \"art_radial_spiky_era\",\n",
- " \"detail\": \"auto\"\n",
+ " \"artifact_id\": \"art_happy_plural_braid\",\n",
+ " \"detail\": \"high\"\n",
" },\n",
- " \"tool_type\": \"image_read\"\n",
+ " \"tool_type\": \"image_read\",\n",
+ " \"argument_override_paths\": []\n",
" },\n",
" {\n",
- " \"id\": \"aev_7fea4803-2d42-4176-b66d-78781541f19f\",\n",
- " \"session_key\": \"ase_artifact_demo_20260506-062049_8113\",\n",
- " \"created_at\": \"2026-05-06T13:21:02.876Z\",\n",
+ " \"id\": \"aev_eb4f2b7f-f6ba-4c72-982b-911b6675ccb5\",\n",
+ " \"session_key\": \"ase_artifact_demo_20260528-151709_0f3c\",\n",
+ " \"created_at\": \"2026-05-28T22:17:30.187Z\",\n",
" \"type\": \"tool_output\",\n",
- " \"tool_call_id\": \"call_KePfYESrPOTc4ShxWXnkxhcG\",\n",
+ " \"tool_call_id\": \"call_vMglyZsC1KEt8qflnlj6dPKO\",\n",
" \"tool_configuration_name\": \"image_read\",\n",
" \"tool_name\": \"image_read\",\n",
" \"tool_output\": {\n",
- " \"id\": \"aev_66b99157-5778-49e9-9bc6-09c9a4958ee2\",\n",
- " \"session_key\": \"ase_artifact_demo_20260506-062049_8113\",\n",
- " \"created_at\": \"2026-05-06T13:21:02.873Z\",\n",
+ " \"id\": \"aev_b324c79f-3b93-453d-b1de-0000e69426b5\",\n",
+ " \"session_key\": \"ase_artifact_demo_20260528-151709_0f3c\",\n",
+ " \"created_at\": \"2026-05-28T22:17:30.184Z\",\n",
" \"type\": \"image_read\",\n",
- " \"artifact_id\": \"art_radial_spiky_era\",\n",
- " \"detail\": \"auto\"\n",
+ " \"artifact_id\": \"art_happy_plural_braid\",\n",
+ " \"detail\": \"high\"\n",
" },\n",
" \"error\": false\n",
" },\n",
" {\n",
- " \"id\": \"aev_66b99157-5778-49e9-9bc6-09c9a4958ee2\",\n",
- " \"session_key\": \"ase_artifact_demo_20260506-062049_8113\",\n",
- " \"created_at\": \"2026-05-06T13:21:02.873Z\",\n",
+ " \"id\": \"aev_b324c79f-3b93-453d-b1de-0000e69426b5\",\n",
+ " \"session_key\": \"ase_artifact_demo_20260528-151709_0f3c\",\n",
+ " \"created_at\": \"2026-05-28T22:17:30.184Z\",\n",
" \"type\": \"image_read\",\n",
- " \"artifact_id\": \"art_radial_spiky_era\",\n",
- " \"detail\": \"auto\"\n",
+ " \"artifact_id\": \"art_happy_plural_braid\",\n",
+ " \"detail\": \"high\"\n",
" },\n",
" {\n",
- " \"id\": \"aev_8327de67-3cce-4788-b1ab-daaaf5954b98\",\n",
- " \"session_key\": \"ase_artifact_demo_20260506-062049_8113\",\n",
- " \"created_at\": \"2026-05-06T13:21:06.075Z\",\n",
+ " \"id\": \"aev_0aa55270-09a3-4f4e-99c4-9d3a2647539a\",\n",
+ " \"session_key\": \"ase_artifact_demo_20260528-151709_0f3c\",\n",
+ " \"created_at\": \"2026-05-28T22:17:31.425Z\",\n",
" \"type\": \"agent_output\",\n",
- " \"content\": \"The uploaded image is a bar chart titled \\\"Q3 2024 Revenue by Region ($M)\\\". It shows revenue distribution across three regions:\\n\\n- **North America**: $1.89 million\\n- **Europe**: $1.26 million\\n- **Asia Pacific**: $1.05 million\\n\\nThis visual representation aligns with the geographic revenue breakdown provided in the Q3 2024 Sales Report. North America leads in revenue, followed by Europe and Asia Pacific.\"\n",
+ " \"content\": \"The chart illustrates the Q3 2024 revenue distribution by region:\\n\\n- **North America**: $1.89M\\n- **Europe**: $1.26M\\n- **Asia Pacific**: $1.05M\\n\\nNorth America leads in revenue contribution, followed by Europe and Asia Pacific, aligning with the details from the sales report. This visualization effectively highlights the regional performance for the quarter.\"\n",
" }\n",
" ],\n",
- " \"session_key\": \"ase_artifact_demo_20260506-062049_8113\"\n",
+ " \"session_key\": \"ase_artifact_demo_20260528-151709_0f3c\"\n",
"}\n"
]
}
@@ -955,7 +1061,14 @@
"cell_type": "code",
"execution_count": 14,
"id": "cell-27",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:17:31.428036Z",
+ "iopub.status.busy": "2026-05-28T22:17:31.427487Z",
+ "iopub.status.idle": "2026-05-28T22:17:33.596974Z",
+ "shell.execute_reply": "2026-05-28T22:17:33.595742Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -964,7 +1077,13 @@
"User: I've uploaded a revenue chart. Can you describe what you see and provide any insights?\n",
"\n",
"================================================================================\n",
- "\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
"\n",
"------ Agent Events ------\n",
"Event: input_message\n",
@@ -973,24 +1092,21 @@
"\n",
"Agent Response:\n",
"\n",
- "The revenue chart for Q3 2024 displays the revenue generated from three regions:\n",
- "\n",
- "1. **North America**: Leading with $1.89 million.\n",
- "2. **Europe**: Following with $1.26 million.\n",
- "3. **Asia Pacific**: Generating $1.05 million.\n",
+ "The revenue chart for Q3 2024 shows the following regional breakdown:\n",
"\n",
- "### Insights:\n",
+ "### Revenue by Region\n",
+ "- **North America**: $1.89 million\n",
+ "- **Europe**: $1.26 million\n",
+ "- **Asia Pacific**: $1.05 million\n",
"\n",
- "- **North America** is the largest contributor to revenue, highlighting the region's strong market presence or customer base.\n",
- "- **Europe** also shows significant revenue, suggesting solid customer engagement or growth in that area.\n",
- "- **Asia Pacific** contributes a substantial portion, showing potential for expansion and increased market share.\n",
+ "### Insights\n",
+ "1. **North America** leads in revenue, contributing nearly half of the total for the quarter. This suggests a strong market presence and possibly higher sales activities or larger deals in that region.\n",
+ " \n",
+ "2. **Europe** follows, with a significant portion, indicating robust performance but with room for increased market penetration or upselling.\n",
"\n",
- "### Strategic Considerations:\n",
+ "3. **Asia Pacific** generates $1.05 million, showing it as a substantial market, but perhaps with potential for growth and expansion efforts.\n",
"\n",
- "- Consider strategies to further penetrate the North American market given its current success.\n",
- "- Evaluate growth opportunities in Europe and Asia Pacific to balance regional revenue streams and mitigate risks.\n",
- "\n",
- "This chart provides a clear visual of regional performance, reinforcing data from the Q3 sales report.\n"
+ "Overall, the chart underscores the importance of maintaining and expanding strong market strategies in these regions to sustain and enhance growth.\n"
]
}
],
@@ -1024,13 +1140,20 @@
"cell_type": "code",
"execution_count": 15,
"id": "cell-32",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:17:33.603018Z",
+ "iopub.status.busy": "2026-05-28T22:17:33.602109Z",
+ "iopub.status.idle": "2026-05-28T22:17:35.532677Z",
+ "shell.execute_reply": "2026-05-28T22:17:35.531896Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Deleted agent: agt_document_analyst_c291\n",
+ "Deleted agent: agt_document_analyst_0cf2\n",
"Removed: /tmp/q3_sales_report.md\n",
"Removed: /tmp/revenue_chart.png\n"
]
diff --git a/notebooks/api-examples/7-lambda-tools-data-analysis.ipynb b/notebooks/api-examples/8-lambda-tools-data-analysis.ipynb
similarity index 76%
rename from notebooks/api-examples/7-lambda-tools-data-analysis.ipynb
rename to notebooks/api-examples/8-lambda-tools-data-analysis.ipynb
index 3d5c4ab..b9aa2f1 100644
--- a/notebooks/api-examples/7-lambda-tools-data-analysis.ipynb
+++ b/notebooks/api-examples/8-lambda-tools-data-analysis.ipynb
@@ -5,7 +5,7 @@
"id": "cell-0",
"metadata": {},
"source": [
- "
"
+ "
"
]
},
{
@@ -49,13 +49,14 @@
"source": [
"## Getting Started\n",
"\n",
- "This notebook assumes you've completed Notebooks 1-6:\n",
+ "This notebook assumes you've completed Notebooks 1-7:\n",
"- Notebook 1: Created corpora\n",
"- Notebook 2: Ingested data\n",
- "- Notebook 3: Queried data\n",
- "- Notebook 4: Created agents and sessions\n",
- "- Notebook 5: Built multi-agent workflows with Lambda tools\n",
- "- Notebook 6: Worked with file artifacts\n",
+ "- Notebook 3: Deleted documents from a corpus\n",
+ "- Notebook 4: Queried data\n",
+ "- Notebook 5: Created agents and sessions\n",
+ "- Notebook 6: Built multi-agent workflows with sub-agents\n",
+ "- Notebook 7: Worked with file artifacts\n",
"\n",
"Now we'll create sophisticated Lambda tools for data analysis using NumPy and Pandas."
]
@@ -72,7 +73,14 @@
"cell_type": "code",
"execution_count": 1,
"id": "cell-5",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:17:38.099491Z",
+ "iopub.status.busy": "2026-05-28T22:17:38.098975Z",
+ "iopub.status.idle": "2026-05-28T22:17:38.160496Z",
+ "shell.execute_reply": "2026-05-28T22:17:38.160236Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -108,7 +116,14 @@
"cell_type": "code",
"execution_count": 2,
"id": "cell-6",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:17:38.162482Z",
+ "iopub.status.busy": "2026-05-28T22:17:38.162279Z",
+ "iopub.status.idle": "2026-05-28T22:17:38.165755Z",
+ "shell.execute_reply": "2026-05-28T22:17:38.165353Z"
+ }
+ },
"outputs": [],
"source": [
"# Load the shared helpers (delete_and_create_agent / delete_and_create_tool).\n",
@@ -155,13 +170,20 @@
"cell_type": "code",
"execution_count": 3,
"id": "cell-8",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:17:38.167106Z",
+ "iopub.status.busy": "2026-05-28T22:17:38.166992Z",
+ "iopub.status.idle": "2026-05-28T22:17:43.819644Z",
+ "shell.execute_reply": "2026-05-28T22:17:43.818364Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Created tool 'statistical_analyzer' (id: tol_6330)\n"
+ "Created tool 'statistical_analyzer' (id: tol_6923)\n"
]
}
],
@@ -306,13 +328,20 @@
"cell_type": "code",
"execution_count": 4,
"id": "cell-10",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:17:43.824118Z",
+ "iopub.status.busy": "2026-05-28T22:17:43.823754Z",
+ "iopub.status.idle": "2026-05-28T22:17:45.682924Z",
+ "shell.execute_reply": "2026-05-28T22:17:45.681827Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Created tool 'trend_analyzer' (id: tol_6331)\n"
+ "Created tool 'trend_analyzer' (id: tol_6924)\n"
]
}
],
@@ -473,13 +502,20 @@
"cell_type": "code",
"execution_count": 5,
"id": "cell-14",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:17:45.687535Z",
+ "iopub.status.busy": "2026-05-28T22:17:45.687063Z",
+ "iopub.status.idle": "2026-05-28T22:17:49.677101Z",
+ "shell.execute_reply": "2026-05-28T22:17:49.676452Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Created agent 'Data Analyst' (key: agt_data_analyst_d71a)\n"
+ "Created agent 'Data Analyst' (key: agt_data_analyst_a152)\n"
]
}
],
@@ -558,13 +594,20 @@
"cell_type": "code",
"execution_count": 6,
"id": "cell-16",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:17:49.682938Z",
+ "iopub.status.busy": "2026-05-28T22:17:49.682387Z",
+ "iopub.status.idle": "2026-05-28T22:17:49.821894Z",
+ "shell.execute_reply": "2026-05-28T22:17:49.820715Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Session Created: ase_data_analysis_demo_20260506-062133_91f1\n"
+ "Session Created: ase_data_analysis_demo_20260528-151749_83f6\n"
]
}
],
@@ -600,7 +643,14 @@
"cell_type": "code",
"execution_count": 7,
"id": "cell-17",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:17:49.826862Z",
+ "iopub.status.busy": "2026-05-28T22:17:49.826089Z",
+ "iopub.status.idle": "2026-05-28T22:17:49.837262Z",
+ "shell.execute_reply": "2026-05-28T22:17:49.836581Z"
+ }
+ },
"outputs": [],
"source": [
"# Helper function to chat with the agent\n",
@@ -662,14 +712,27 @@
"cell_type": "code",
"execution_count": 8,
"id": "cell-19",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:17:49.840250Z",
+ "iopub.status.busy": "2026-05-28T22:17:49.839982Z",
+ "iopub.status.idle": "2026-05-28T22:18:06.497909Z",
+ "shell.execute_reply": "2026-05-28T22:18:06.496855Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"User: Analyzing sales data...\n",
- "================================================================================\n",
+ "================================================================================\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
"\n",
"------ Agent Events ------\n",
"Tool Called: statistical_analyzer\n",
@@ -684,47 +747,49 @@
"\n",
"Here's the analysis of your sales data for the three products across four regions:\n",
"\n",
- "### Basic Statistics for Sales and Profit Margins\n",
+ "### Basic Statistics for Sales and Profit Margins:\n",
"\n",
- "**Sales Statistics:**\n",
- "- **Count**: 12 observations\n",
- "- **Mean**: \\$14,916.67\n",
- "- **Standard Deviation**: \\$7,412.62\n",
- "- **Minimum**: \\$6,000\n",
- "- **25th Percentile**: \\$8,750\n",
- "- **Median (50th Percentile)**: \\$13,500\n",
- "- **75th Percentile**: \\$19,750\n",
- "- **Maximum**: \\$28,000\n",
+ "1. **Sales**\n",
+ " - **Mean**: $14,917 approximately\n",
+ " - **Standard Deviation**: $7,413\n",
+ " - **Minimum**: $6,000\n",
+ " - **25th Percentile** (Q1): $8,750\n",
+ " - **Median**: $13,500\n",
+ " - **75th Percentile** (Q3): $19,750\n",
+ " - **Maximum**: $28,000\n",
"\n",
- "**Profit Margin Statistics:**\n",
- "- **Count**: 12 observations\n",
- "- **Mean**: 0.2492\n",
- "- **Standard Deviation**: 0.0965\n",
- "- **Minimum**: 0.12\n",
- "- **25th Percentile**: 0.1725\n",
- "- **Median (50th Percentile)**: 0.235\n",
- "- **75th Percentile**: 0.3275\n",
- "- **Maximum**: 0.4\n",
+ "2. **Profit Margin**\n",
+ " - **Mean**: 0.249, or 24.9%\n",
+ " - **Standard Deviation**: 0.097\n",
+ " - **Minimum**: 0.12, or 12%\n",
+ " - **25th Percentile** (Q1): 0.1725, or 17.25%\n",
+ " - **Median**: 0.235, or 23.5%\n",
+ " - **75th Percentile** (Q3): 0.3275, or 32.75%\n",
+ " - **Maximum**: 0.4, or 40%\n",
"\n",
- "### Correlation Analysis\n",
+ "### Correlation Between Sales, Units, and Profit Margin:\n",
+ "\n",
+ "1. **Sales and Units**: Correlation of -0.3966\n",
+ " - There is a weak negative correlation, indicating that as sales increase, units sold might slightly decrease, but the relationship is not strong.\n",
"\n",
- "**Correlation between Sales, Units, and Profit Margin:**\n",
- "- **Sales and Units**: -0.40 (indicating a weak inverse relationship)\n",
- "- **Sales and Profit Margin**: 0.99 (indicating a very strong positive relationship)\n",
- "- **Units and Profit Margin**: -0.48 (indicating a moderate inverse relationship)\n",
+ "2. **Sales and Profit Margin**: Correlation of 0.9896\n",
+ " - A very strong positive correlation, suggesting that higher sales tend to be associated with higher profit margins.\n",
"\n",
- "### Insights\n",
+ "3. **Units and Profit Margin**: Correlation of -0.4829\n",
+ " - Moderately negative correlation, meaning that higher units sold might be associated with lower profit margins, but this is not definitive.\n",
"\n",
- "1. **Profit Margins and Sales:** \n",
- " - There is a very strong positive correlation between sales and profit margins. This suggests that as sales increase, profit margins tend to be higher. This might indicate that higher sales figures are associated with higher profitability.\n",
+ "### Insights and Actionable Points:\n",
"\n",
- "2. **Sales and Units:**\n",
- " - The negative correlation suggests that increases in the number of units sold do not necessarily correlate with higher sales value. This might imply pricing strategies or different product lines with varying cost per unit.\n",
+ "- **Strong Positive Relationship**: Increasing sales also increases profit margins, which is a key insight for strategic planning. Emphasizing strategies that boost sales can potentially lead to better profit outcomes.\n",
+ "- **Sales Units Dynamics**: It might be beneficial to explore why higher sales correlate with lower unit numbers. This might depend on pricing strategies or product types.\n",
+ "- **Profit Margin Management**: Consider investigating regions or products contributing to low or negative correlations to improve inventory or pricing strategies.\n",
"\n",
- "3. **Units and Profit Margin:**\n",
- " - The moderate negative correlation implies that selling more units is not associated with a proportional increase in the profit margin, perhaps due to economies of scale or discounts for bulk sales.\n",
+ "### Potential Follow-up Analyses:\n",
+ "- Analyze sales and profit margin trends over time to understand seasonal effects.\n",
+ "- Segment analysis by product type or region to identify specific drivers of sales.\n",
+ "- Perform a deeper dive into pricing strategies or marketing efforts.\n",
"\n",
- "These insights could be used to evaluate pricing or sales strategies across different regions or product lines. If you have any further data or need additional analysis, feel free to ask!\n"
+ "This combination of statistical insights can help inform strategic decisions regarding sales growth, pricing, and market expansion.\n"
]
}
],
@@ -774,14 +839,27 @@
"cell_type": "code",
"execution_count": 9,
"id": "cell-21",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:18:06.503352Z",
+ "iopub.status.busy": "2026-05-28T22:18:06.502943Z",
+ "iopub.status.idle": "2026-05-28T22:18:16.984566Z",
+ "shell.execute_reply": "2026-05-28T22:18:16.983964Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"User: Analyzing revenue trends...\n",
- "================================================================================\n",
+ "================================================================================\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
"\n",
"------ Agent Events ------\n",
"Tool Called: trend_analyzer\n",
@@ -791,29 +869,31 @@
"\n",
"Agent Response:\n",
"\n",
- "Here's the analysis of your revenue trend for the year:\n",
+ "Here's the analysis of your company's revenue trend for the year 2024:\n",
"\n",
- "### Revenue Trend Analysis\n",
+ "### Revenue Trend:\n",
"\n",
- "1. **Trend Direction:**\n",
- " - The revenue is trending **upward**. The overall analysis indicates a positive trend with a high R-squared value of 0.9334, which suggests that the trend direction is quite reliable.\n",
+ "1. **Trend Direction**: \n",
+ " - The revenue is trending **upward**. This is indicated by a positive slope (5,860) in the linear regression trend line, which translates to a roughly 4.63% increase in revenue each period on average.\n",
"\n",
- "2. **Growth Rate:**\n",
- " - **Period-over-period growth**: The latest month (December) shows an increase of approximately 8.39% from the previous month. The average period-over-period growth rate for the year is about 5.02%.\n",
- " - **Total growth**: Over the year, revenue has increased by 68% from the starting value.\n",
- " - The trend's **slope per period percentage** is 4.63%, indicating a steady growth rate per time period considered.\n",
+ "2. **Growth Rate**:\n",
+ " - **Period-over-Period Growth (Latest)**: Revenue increased by approximately 8.39% in the most recent period.\n",
+ " - **Average Period-over-Period Growth**: Throughout the year, the average growth rate was about 5.02%.\n",
+ " - **Total Growth from Start to End**: There was a total growth of 68% from the start of the year (January) to the end of the year (December).\n",
"\n",
- "3. **Moving Averages:**\n",
- " - **3-Period Moving Average**: The most recent 3-period average revenue is \\$153,666.67.\n",
- " - **7-Period Moving Average**: The most recent 7-period average revenue is \\$140,714.29.\n",
+ "3. **Moving Averages**:\n",
+ " - **3-Month Moving Average**: The latest value is $153,667, showing a smoothing of revenue over the last three months.\n",
+ " - **7-Month Moving Average**: The latest value is $140,714, highlighting underlying trends over longer periods.\n",
"\n",
- "### Insights\n",
+ "### Additional Insights:\n",
+ "- **Strong Correlation**: The R-squared value of 0.9334 indicates a very strong linear relationship between time and revenue, signifying dependable upward growth throughout the year.\n",
+ "- **Fluctuation**: The revenue reached a minimum of $98,000 in March and a maximum of $168,000 in December, reflecting a robust end to the year.\n",
"\n",
- "- **Steady Growth**: The analysis shows a stable upward trajectory in revenue, with monthly gains indicating effective strategies that might be contributing to this consistent growth.\n",
- "- **Strong End to the Year**: The latter months, particularly November (\\$155,000) and December (\\$168,000), show significant increases, suggesting strong sales performance towards the end of the year.\n",
- "- **Planning for Sustained Growth**: The positive trend and overall growth rate may indicate opportunities to further capitalize on successful strategies or examine successful practices during high-performing months for future growth.\n",
+ "### Actionable Points:\n",
+ "- **Sustain Growth Strategies**: Maintain or enhance strategies contributing to steady growth, especially those boosting revenue significantly towards the end of the year.\n",
+ "- **Focus on Last Quarter**: Since the growth rate peaked towards the year-end, analyze and replicate successful strategies from this period in the upcoming year.\n",
"\n",
- "If further analysis or data insights are required, or if you have more data to explore, feel free to reach out!\n"
+ "This analysis suggests positive revenue momentum which, if sustained, could position your organization well in the coming year. Exploring further detailed segmentation by product or department could reveal more actionable dimensions.\n"
]
}
],
@@ -863,14 +943,27 @@
"cell_type": "code",
"execution_count": 10,
"id": "cell-25",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:18:16.988112Z",
+ "iopub.status.busy": "2026-05-28T22:18:16.987859Z",
+ "iopub.status.idle": "2026-05-28T22:18:36.708379Z",
+ "shell.execute_reply": "2026-05-28T22:18:36.707460Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"User: Comprehensive business analysis...\n",
- "================================================================================\n",
+ "================================================================================\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
"\n",
"------ Agent Events ------\n",
"Tool Called: statistical_analyzer\n",
@@ -886,66 +979,63 @@
"\n",
"Agent Response:\n",
"\n",
- "Here is a comprehensive analysis of your quarterly business performance:\n",
+ "Here's a comprehensive analysis of your quarterly business performance:\n",
"\n",
"### Statistical Summary\n",
"\n",
- "**Revenue:**\n",
- "- **Count**: 8 quarters\n",
- "- **Mean**: \\$601,250\n",
- "- **Standard Deviation**: \\$119,933.02\n",
- "- **Minimum**: \\$450,000\n",
- "- **25th Percentile**: \\$510,000\n",
- "- **Median (50th Percentile)**: \\$595,000\n",
- "- **75th Percentile**: \\$667,500\n",
- "- **Maximum**: \\$800,000\n",
+ "1. **Revenue**\n",
+ " - **Mean**: $601,250\n",
+ " - **Standard Deviation**: $119,933\n",
+ " - **Min**: $450,000\n",
+ " - **25th Percentile (Q1)**: $510,000\n",
+ " - **Median**: $595,000\n",
+ " - **75th Percentile (Q3)**: $667,500\n",
+ " - **Max**: $800,000\n",
"\n",
- "**Costs:**\n",
- "- **Count**: 8 quarters\n",
- "- **Mean**: \\$394,375\n",
- "- **Standard Deviation**: \\$55,255.09\n",
- "- **Minimum**: \\$320,000\n",
- "- **25th Percentile**: \\$353,750\n",
- "- **Median (50th Percentile)**: \\$395,000\n",
- "- **75th Percentile**: \\$427,500\n",
- "- **Maximum**: \\$480,000\n",
+ "2. **Costs**\n",
+ " - **Mean**: $394,375\n",
+ " - **Standard Deviation**: $55,255\n",
+ " - **Min**: $320,000\n",
+ " - **25th Percentile (Q1)**: $353,750\n",
+ " - **Median**: $395,000\n",
+ " - **75th Percentile (Q3)**: $427,500\n",
+ " - **Max**: $480,000\n",
"\n",
- "**Customers:**\n",
- "- **Count**: 8 quarters\n",
- "- **Mean**: 1,818.75\n",
- "- **Standard Deviation**: 487.66\n",
- "- **Minimum**: 1,200\n",
- "- **25th Percentile**: 1,462.5\n",
- "- **Median (50th Percentile)**: 1,775\n",
- "- **75th Percentile**: 2,075\n",
- "- **Maximum**: 2,650\n",
+ "3. **Customers**\n",
+ " - **Mean**: 1,818.75\n",
+ " - **Standard Deviation**: 487.66\n",
+ " - **Min**: 1,200\n",
+ " - **25th Percentile (Q1)**: 1,462.5\n",
+ " - **Median**: 1,775\n",
+ " - **75th Percentile (Q3)**: 2,075\n",
+ " - **Max**: 2,650\n",
"\n",
"### Correlation Analysis\n",
"\n",
- "- **Correlation between Revenue and Customers**: 0.9986, indicating an extremely strong positive correlation. This suggests that increases in customer numbers are closely linked to revenue increases.\n",
+ "- **Correlation between Revenue and Customers**: 0.9986\n",
+ " - There's a very strong positive correlation, indicating that as the number of customers increases, revenue also tends to increase correspondingly. This suggests effective monetization of customer growth.\n",
"\n",
- "### Trend Analysis on Revenue\n",
+ "### Revenue Trend Analysis\n",
"\n",
- "1. **Trend Direction:**\n",
- " - The revenue trend is **upward**, with a strong R-squared value of 0.9506, indicating a consistent trend.\n",
+ "1. **Trend Direction**: \n",
+ " - The revenue shows an **upward trend** with a strong linear relationship (R-squared: 0.9506), indicating reliable growth over the analyzed periods.\n",
"\n",
- "2. **Growth Rate:**\n",
- " - **Period-over-period growth**: The most recent quarter shows an 11.11% increase from the previous quarter.\n",
- " - **Average growth rate**: Approximately 8.76% per quarter.\n",
- " - **Total growth over the period**: 77.78% from the initial value in 2023-Q1.\n",
- " - **Slope per period**: 7.94%, reflecting a notable increase in revenue each quarter.\n",
+ "2. **Growth Rates**:\n",
+ " - **Period-over-Period Growth (Latest)**: Revenue increased by 11.11% from the last quarter.\n",
+ " - **Average Period-over-Period Growth**: Around 8.76% on average per period.\n",
+ " - **Total Growth from Start to End**: Revenue grew by 77.78% from the start (Q1 2023) to the end (Q4 2024).\n",
"\n",
- "3. **Moving Averages:**\n",
- " - **3-Period Moving Average**: The latest value is \\$723,333.33.\n",
- " - **7-Period Moving Average**: The latest value is \\$622,857.14.\n",
+ "3. **Moving Averages**:\n",
+ " - **3-Period Moving Average**: Latest value is $723,333, smoothing out short-term fluctuations.\n",
+ " - **7-Period Moving Average**: Latest value is $622,857, providing a longer-term perspective.\n",
"\n",
- "### Insights\n",
+ "### Insights and Recommendations\n",
"\n",
- "- **Revenue and Customer Growth**: The high correlation between revenue and customer numbers suggests that effective customer acquisition and retention strategies are directly boosting revenue.\n",
- "- **Consistent Upward Trend**: The revenue shows a strong upward trajectory, indicating effective business strategies and possibly reflecting market expansion or product/service improvements.\n",
- "- **Cost Efficiency**: While costs are also increasing, the growth in revenue outpaces cost increases, suggesting improved profit margins.\n",
+ "- **Sustain Customer Acquisition Strategies**: The close correlation between customers and revenue suggests strong customer acquisition or retention strategies. Focus on further leveraging these tactics.\n",
+ "- **Cost Management**: Despite increasing revenues, ensure that cost management practices are in place to protect margins, especially as costs also show an upward trajectory.\n",
+ "- **Prepare for Growth**: The upward trend suggests scaling opportunities. Consider investments in capacity or marketing to sustain and accelerate growth.\n",
"\n",
- "These insights highlight robust business performance, aligning with a growing customer base and strategic financial management. If further exploration or specific strategy development is needed, feel free to ask!\n"
+ "This detailed analysis points towards a strong growth trajectory, with increasing revenues closely linked to customer increases, supported by effective operational strategies. Regular monitoring and strategic planning could further enhance performance in future periods.\n"
]
}
],
@@ -993,15 +1083,28 @@
"cell_type": "code",
"execution_count": 11,
"id": "cell-28",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:18:36.712419Z",
+ "iopub.status.busy": "2026-05-28T22:18:36.711942Z",
+ "iopub.status.idle": "2026-05-28T22:18:39.239416Z",
+ "shell.execute_reply": "2026-05-28T22:18:39.238658Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Deleted agent: agt_data_analyst_d71a\n",
- "Deleted tool: statistical_analyzer (tol_6330)\n",
- "Deleted tool: trend_analyzer (tol_6331)\n"
+ "Deleted agent: agt_data_analyst_a152\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Deleted tool: statistical_analyzer (tol_6923)\n",
+ "Deleted tool: trend_analyzer (tol_6924)\n"
]
}
],
diff --git a/notebooks/api-examples/8-reranker-instructions.ipynb b/notebooks/api-examples/9-reranker-instructions.ipynb
similarity index 64%
rename from notebooks/api-examples/8-reranker-instructions.ipynb
rename to notebooks/api-examples/9-reranker-instructions.ipynb
index b31c01c..3d3232a 100644
--- a/notebooks/api-examples/8-reranker-instructions.ipynb
+++ b/notebooks/api-examples/9-reranker-instructions.ipynb
@@ -5,7 +5,7 @@
"id": "colab-badge",
"metadata": {},
"source": [
- "
"
+ "
"
]
},
{
@@ -49,7 +49,14 @@
"cell_type": "code",
"execution_count": 1,
"id": "setup",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:18:41.782759Z",
+ "iopub.status.busy": "2026-05-28T22:18:41.782077Z",
+ "iopub.status.idle": "2026-05-28T22:18:41.850166Z",
+ "shell.execute_reply": "2026-05-28T22:18:41.849943Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -125,7 +132,14 @@
"cell_type": "code",
"execution_count": 2,
"id": "example1-code",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:18:41.851429Z",
+ "iopub.status.busy": "2026-05-28T22:18:41.851327Z",
+ "iopub.status.idle": "2026-05-28T22:18:49.298413Z",
+ "shell.execute_reply": "2026-05-28T22:18:49.297551Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -133,16 +147,22 @@
"text": [
"============================================================\n",
"BASELINE: qwen3-reranker without instructions\n",
- "============================================================\n",
+ "============================================================\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
"\n",
"=== Generated Summary ===\n",
- "Reranking improves search result quality by refining and reordering search results after their initial retrieval. This process enhances the relevance of the results by using various reranker types, such as neural models, to optimize precision and add diversity or custom business logic. By configuring rerankers, the most relevant and business-critical results are prioritized, ensuring they appear at the top of the search results. This is particularly beneficial in applications requiring advanced neural ranking capabilities, such as multilingual content and real-time response generation, where rerankers can maintain improved result quality as each data chunk is received [1], [5], [6].\n",
+ "Reranking improves search result quality by refining and reordering the results after their initial retrieval. This process enhances the relevance of the results by ensuring that the most pertinent and business-critical results appear at the top. Rerankers can be configured to optimize result quality for different use cases, such as improving precision with neural models, adding diversity, or incorporating custom business logic. They can also provide multilingual support and advanced neural ranking capabilities, making them ideal for applications requiring high relevance and precision [1], [2], [4].\n",
"\n",
- "=== Factual Consistency Score: 0.48828125 ===\n",
+ "=== Factual Consistency Score: 0.6875 ===\n",
"\n",
"=== Top Search Results ===\n",
"\n",
- "--- Result 1 (score: 0.9991) ---\n",
+ "--- Result 1 (score: 0.9990) ---\n",
"Document: docs-vectara-com-docs-sdk-python-rerankers\n",
"Title: Rerankers\n",
"Text: Rerankers enhance the relevance of search results by refining and reordering them\n",
@@ -155,24 +175,24 @@
"Text: This guide assumes you have a corpus calledmy-docs. If you haven't created a corpus yet, follow\n",
"theQuick Startguide to set up your first corpus. Improve result ordering in a query by specifying a rera...\n",
"\n",
- "--- Result 3 (score: 0.9060) ---\n",
+ "--- Result 3 (score: 0.9059) ---\n",
"Document: docs-vectara-com-docs-sdk-python-rerankers\n",
"Title: Rerankers\n",
"Text: Rerankers enhance the relevance of search results by refining and reordering them\n",
"after initial retrieval. The Vectara Python SDK enables you to apply various\n",
"reranker types in queries to optimize res...\n",
"\n",
- "--- Result 4 (score: 0.9009) ---\n",
+ "--- Result 4 (score: 0.9011) ---\n",
+ "Document: docs-vectara-com-docs-rest-api-query-corpus\n",
+ "Title: Advanced Single Corpus Query\n",
+ "Text: You can only use sentences before/after OR characters before/after, but not both. Code example: {\"context_configuration\":{\"sentences_before\":2,\"sentences_after\":2,\"start_tag\":\"\",\"end_tag\":\"\",\"end_tag\":\" Same query, same corpora, same retrieval layer. The instruction\n",
" alone shifts which content rises to the top of the results.\n"
@@ -529,7 +571,14 @@
"cell_type": "code",
"execution_count": 5,
"id": "example3-code",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:19:12.716493Z",
+ "iopub.status.busy": "2026-05-28T22:19:12.716192Z",
+ "iopub.status.idle": "2026-05-28T22:19:26.425715Z",
+ "shell.execute_reply": "2026-05-28T22:19:26.424494Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -537,45 +586,57 @@
"text": [
"============================================================\n",
"BASELINE — no instructions (vector retrieval saturated with docs)\n",
- "============================================================\n",
+ "============================================================\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
"\n",
"=== Generated Summary ===\n",
- "Chunking strategies for long documents involve dividing the text into manageable parts to optimize retrieval and maintain contextual coherence. Two primary strategies are commonly used:\n",
+ "Chunking strategies for long documents involve dividing the text into manageable parts for better processing and retrieval. Common strategies include:\n",
"\n",
- "1. **Sentence-based Chunking**: This strategy creates one chunk per sentence, which is the default method. It provides optimal retrieval accuracy for most datasets by maintaining the semantic integrity of each sentence [2], [3].\n",
+ "1. **Sentence Chunking**: This strategy creates one chunk per sentence, which is the default method. It provides optimal retrieval accuracy for most datasets [1], [6].\n",
"\n",
- "2. **Max Characters-based Chunking**: This strategy creates larger chunks up to a specified character limit (max_chars_per_chunk). It balances retrieval speed with contextual coherence, making it suitable for performance-tuned ingestion of larger documents [1], [2].\n",
+ "2. **Max-Chars Chunking**: This approach accumulates sentences into a chunk until a specified character limit is reached. If a single sentence exceeds this limit, it is split across chunks. This method balances retrieval speed with contextual coherence [1], [6].\n",
"\n",
- "These strategies allow for flexible document processing, ensuring that the text is divided in a way that supports efficient data retrieval and processing [5].\n",
+ "3. **Recursive Chunking**: This involves splitting the document by paragraph, then sentence, and finally word, until a size target is met [3].\n",
"\n",
- "=== Factual Consistency Score: 0.703125 ===\n",
+ "4. **Semantic Chunking**: This strategy splits the document where sentence-level similarity drops, maintaining semantic integrity [3].\n",
"\n",
- "=== Top Search Results ===\n",
+ "5. **Sliding-Window Chunking**: This method uses fixed-size chunks with token overlap between adjacent chunks to ensure context is preserved [3].\n",
"\n",
- "--- Result 1 (score: 0.9997) ---\n",
- "Document: docs-vectara-com-docs-rest-api-create-corpus-document\n",
- "Title: Add a document to a corpus\n",
- "Text: For larger documents or performance-tuned ingestion, you can explicitly set achunking_strategy: sentence_chunking_strategy— creates one chunk per sentence (default). max_chars_chunking_strategy— creat...\n",
+ "6. **Hierarchical Chunking**: This involves indexing content at two granularities for context and precision [3].\n",
"\n",
- "--- Result 2 (score: 0.9053) ---\n",
- "Document: docs-vectara-com-docs-rest-api-create-corpus-document\n",
- "Title: Add a document to a corpus\n",
- "Text: Core documents are designed for advanced use cases such as precise chunk-level optimization or experimental corpus structures, and applications where metadata-driven retrieval or ranking must be expli...\n",
+ "=== Factual Consistency Score: 0.68359375 ===\n",
"\n",
- "--- Result 3 (score: 0.9035) ---\n",
- "Document: docs-vectara-com-docs-rest-api-create-corpus-document\n",
- "Title: Add a document to a corpus\n",
- "Text: By default, Vectara usessentence-based chunking, which provides optimal retrieval accuracy for most datasets. For larger documents or performance-tuned ingestion, you can explicitly set achunking_stra...\n",
+ "=== Top Search Results ===\n",
"\n",
- "--- Result 4 (score: 0.8879) ---\n",
+ "--- Result 1 (score: 0.9997) ---\n",
"Document: docs-vectara-com-docs-rest-api-create-corpus-document\n",
"Title: Add a document to a corpus\n",
- "Text: A list ofdocument_parts, where each part includestext, optionalcontext,metadata, andcustom_dimensions. Optionaltablesandimages, allowing you to represent complex structured data like spreadsheets or c...\n",
+ "Text: For larger documents or performance-tuned ingestion, you can explicitly set achunking_strategy: sentence_chunking_strategy— creates one chunk per sentence (default). max_chars_chunking_strategy— creat...\n",
"\n",
- "--- Result 5 (score: 0.8852) ---\n",
- "Document: docs-vectara-com-docs-rest-api-create-corpus-document\n",
- "Title: Add a document to a corpus\n",
- "Text: An array ofsections, each with its own title, text, and optional nested sections, tables, or images. Optionalcustom_dimensionsthat can influence ranking during search. When indexed, Vectara partitions...\n"
+ "--- Result 2 (score: 0.9079) ---\n",
+ "Document: docs-vectara-com-docs-build-chunking-strategies\n",
+ "Title: Chunking strategies\n",
+ "Text: Amax-chars chunking strategyaccumulates sentences into a part until the part reaches a character limit specified. When a single sentence exceeds the limit, the platform splits that sentence across par...\n",
+ "\n",
+ "--- Result 3 (score: 0.9073) ---\n",
+ "Document: docs-vectara-com-docs-build-chunking-strategies\n",
+ "Title: Chunking strategies\n",
+ "Text: Sentence chunking and fixed-size character chunking are two of the most widely used approaches in RAG systems. Other strategies common in the broader ecosystem include: Recursive chunking- Splitting b...\n",
+ "\n",
+ "--- Result 4 (score: 0.9069) ---\n",
+ "Document: docs-vectara-com-docs-build-chunking-strategies\n",
+ "Title: Chunking strategies\n",
+ "Text: Vectara applies a chunking strategy at ingest time, before embedding. The strategy set on an upload or indexing request determines the part boundaries for every document in that request. Configuring c...\n",
+ "\n",
+ "--- Result 5 (score: 0.9050) ---\n",
+ "Document: docs-vectara-com-docs-build-chunking-strategies\n",
+ "Title: Chunking strategies\n",
+ "Text: Configuring chunking with thechunking_strategyfield accepts one of two strategy types: Asentence chunking strategycreates one part per sentence. This is the default whenchunking_strategyis not set. Am...\n"
]
}
],
@@ -627,7 +688,14 @@
"cell_type": "code",
"execution_count": 6,
"id": "example3-with-glossary",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:19:26.430757Z",
+ "iopub.status.busy": "2026-05-28T22:19:26.430334Z",
+ "iopub.status.idle": "2026-05-28T22:19:36.454675Z",
+ "shell.execute_reply": "2026-05-28T22:19:36.453119Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -635,12 +703,30 @@
"text": [
"============================================================\n",
"WITH RESEARCHER INSTRUCTION — academic chunking papers boosted\n",
- "============================================================\n",
+ "============================================================\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
"\n",
"=== Generated Summary ===\n",
- "Chunking strategies for long documents involve breaking down the document into manageable parts to improve retrieval and processing efficiency. One common strategy is sentence-level chunking, where each sentence forms a chunk. This is suitable for general use but may require additional context for very short or fragmentary parts like FAQ items or captions [6]. Another approach is to create larger chunks based on a specified character limit, balancing retrieval speed with contextual coherence [4]. Additionally, documents can be split into disjoint chunks of a fixed word count, such as 100-word chunks, which can then be indexed for fast retrieval [7]. These strategies help in preserving the document's structure and meaning while enabling efficient querying and retrieval.\n",
+ "Chunking strategies for long documents include several approaches to effectively manage and process text. Some common strategies are:\n",
"\n",
- "=== Factual Consistency Score: 0.93359375 ===\n",
+ "1. **Recursive Chunking**: This involves splitting the document by paragraph, then sentence, and finally word, until a desired size is achieved [3].\n",
+ "\n",
+ "2. **Semantic Chunking**: This method splits the document where there is a drop in sentence-level similarity, ensuring that each chunk maintains semantic coherence [3].\n",
+ "\n",
+ "3. **Sliding-Window Chunking**: This approach creates fixed-size chunks with overlapping tokens between adjacent chunks, which helps in maintaining context across chunks [3].\n",
+ "\n",
+ "4. **Hierarchical Chunking**: This strategy indexes the same content at different granularities, providing both context and precision [3].\n",
+ "\n",
+ "5. **Max-Chars Chunking**: This accumulates sentences up to a character limit, balancing context richness against the risk of diluting the meaning by spanning multiple topics [6].\n",
+ "\n",
+ "These strategies help in managing long documents by breaking them into manageable parts while preserving the context and meaning necessary for effective retrieval and processing.\n",
+ "\n",
+ "=== Factual Consistency Score: 0.80078125 ===\n",
"\n",
"=== Top Search Results ===\n",
"\n",
@@ -651,30 +737,27 @@
" Jonathan Berant. Coarse-to-fine question answering for long documents. In Proceedings of the\n",
" 5...\n",
"\n",
- "--- Result 2 (score: 0.8046) ---\n",
+ "--- Result 2 (score: 0.8056) ---\n",
"Document: rag-retrieval-augmented-generation.pdf\n",
"Title: Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks\n",
"Text: URL\n",
"https://www.aclweb.org/anthology/P17-1171. [6] Eunsol Choi, Daniel Hewlett, Jakob Uszkoreit, Illia Polosukhin, Alexandre Lacoste, and\n",
" Jonathan Berant. Coarse-to-fine question answering for ...\n",
"\n",
- "--- Result 3 (score: 0.5197) ---\n",
- "Document: docs-vectara-com-docs-build-data-ingestion\n",
- "Title: Data ingestion\n",
- "Text: Custom metadata extraction. Flexible chunking strategies. Munging files into a structured data format helps preserve relationships\n",
- "between bits of data, retains special meaning of specific data types,...\n",
+ "--- Result 3 (score: 0.7675) ---\n",
+ "Document: docs-vectara-com-docs-build-chunking-strategies\n",
+ "Title: Chunking strategies\n",
+ "Text: Sentence chunking and fixed-size character chunking are two of the most widely used approaches in RAG systems. Other strategies common in the broader ecosystem include: Recursive chunking- Splitting b...\n",
"\n",
- "--- Result 4 (score: 0.4720) ---\n",
- "Document: docs-vectara-com-docs-rest-api-create-corpus-document\n",
- "Title: Add a document to a corpus\n",
- "Text: For larger documents or performance-tuned ingestion, you can explicitly set achunking_strategy: sentence_chunking_strategy— creates one chunk per sentence (default). max_chars_chunking_strategy— creat...\n",
+ "--- Result 4 (score: 0.7225) ---\n",
+ "Document: docs-vectara-com-docs-build-chunking-strategies\n",
+ "Title: Chunking strategies\n",
+ "Text: Recursive chunking- Splitting by paragraph, then sentence, then word, until a size target is met Semantic chunking- Splitting where sentence-level similarity drops Sliding-window chunking- Fixed-size ...\n",
"\n",
- "--- Result 5 (score: 0.2818) ---\n",
- "Document: gpt3-language-models.pdf\n",
- "Title: Language Models are Few-Shot Learners\n",
- "Text: ArXiv,\n",
- " abs/1803.05457, 2018. [CGRS19] Rewon Child, Scott Gray, Alec Radford, and Ilya Sutskever. Generating long sequences with sparse\n",
- " transformers, 2019. ...\n"
+ "--- Result 5 (score: 0.6941) ---\n",
+ "Document: docs-vectara-com-docs-platform-architecture-platform-stack\n",
+ "Title: The platform stack\n",
+ "Text: A six-stage pipeline, tunable at every stage. Not a one-shot vector lookup. Code example: Documents → SmartChunk → Boomerang → Hybrid (BM25 + dense + filters)→ Slingshot reranker → Citations → Generat...\n"
]
}
],
@@ -690,7 +773,14 @@
"cell_type": "code",
"execution_count": 7,
"id": "example3-compare",
- "metadata": {},
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-28T22:19:36.459317Z",
+ "iopub.status.busy": "2026-05-28T22:19:36.458852Z",
+ "iopub.status.idle": "2026-05-28T22:19:36.468306Z",
+ "shell.execute_reply": "2026-05-28T22:19:36.467771Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -701,19 +791,19 @@
"\n",
"BASELINE papers/docs = 0/5\n",
" 1. [docs ] docs-vectara-com-docs-rest-api-create-corpus-document (score: 0.9997)\n",
- " 2. [docs ] docs-vectara-com-docs-rest-api-create-corpus-document (score: 0.9053)\n",
- " 3. [docs ] docs-vectara-com-docs-rest-api-create-corpus-document (score: 0.9035)\n",
- " 4. [docs ] docs-vectara-com-docs-rest-api-create-corpus-document (score: 0.8879)\n",
- " 5. [docs ] docs-vectara-com-docs-rest-api-create-corpus-document (score: 0.8852)\n",
+ " 2. [docs ] docs-vectara-com-docs-build-chunking-strategies (score: 0.9079)\n",
+ " 3. [docs ] docs-vectara-com-docs-build-chunking-strategies (score: 0.9073)\n",
+ " 4. [docs ] docs-vectara-com-docs-build-chunking-strategies (score: 0.9069)\n",
+ " 5. [docs ] docs-vectara-com-docs-build-chunking-strategies (score: 0.9050)\n",
"\n",
- "WITH RESEARCHER papers/docs = 3/2\n",
+ "WITH RESEARCHER papers/docs = 2/3\n",
" 1. [paper] rag-retrieval-augmented-generation.pdf (score: 0.9520)\n",
- " 2. [paper] rag-retrieval-augmented-generation.pdf (score: 0.8046)\n",
- " 3. [docs ] docs-vectara-com-docs-build-data-ingestion (score: 0.5197)\n",
- " 4. [docs ] docs-vectara-com-docs-rest-api-create-corpus-document (score: 0.4720)\n",
- " 5. [paper] gpt3-language-models.pdf (score: 0.2818)\n",
+ " 2. [paper] rag-retrieval-augmented-generation.pdf (score: 0.8056)\n",
+ " 3. [docs ] docs-vectara-com-docs-build-chunking-strategies (score: 0.7675)\n",
+ " 4. [docs ] docs-vectara-com-docs-build-chunking-strategies (score: 0.7225)\n",
+ " 5. [docs ] docs-vectara-com-docs-platform-architecture-platform-stack (score: 0.6941)\n",
"\n",
- "Top-5 churn: 3 document(s) entered, 0 left.\n",
+ "Top-5 churn: 2 document(s) entered, 1 left.\n",
"\n",
"-> Without instructions, vector retrieval never surfaces research papers\n",
" for this query — Vectara's chunking docs dominate the top-5. A single\n",
diff --git a/notebooks/api-examples/README.md b/notebooks/api-examples/README.md
index b8b5a23..1228631 100644
--- a/notebooks/api-examples/README.md
+++ b/notebooks/api-examples/README.md
@@ -1,6 +1,6 @@
# Vectara API Tutorial Series
-This tutorial series provides a comprehensive, hands-on introduction to building RAG (Retrieval-Augmented Generation) applications using Vectara's REST API. Through thirteen progressive notebooks, you'll learn to create corpora, ingest data, query information, build intelligent AI agents, orchestrate multi-agent workflows, work with file artifacts, create data analysis tools with NumPy and Pandas, use reranker instructions for domain-specific relevance tuning, constrain agent output with JSON schemas and multi-step flows, automate agents on cron or interval schedules, let agents call any REST API — public or authenticated, read or write — with the `web_get` tool, use **agent skills** to load specialist instructions on demand, and drive an agent through deterministic multi-phase pipelines using **agent steps**.
+This tutorial series provides a comprehensive, hands-on introduction to building RAG (Retrieval-Augmented Generation) applications using Vectara's REST API. Through fourteen progressive notebooks, you'll learn to create corpora, ingest data, delete documents, query information, build intelligent AI agents, orchestrate multi-agent workflows, work with file artifacts, create data analysis tools with NumPy and Pandas, use reranker instructions for domain-specific relevance tuning, constrain agent output with JSON schemas and multi-step flows, automate agents on cron or interval schedules, let agents call any REST API — public or authenticated, read or write — with the `web_get` tool, use **agent skills** to load specialist instructions on demand, and drive an agent through deterministic multi-phase pipelines using **agent steps**.
## About Vectara
@@ -78,7 +78,30 @@ Two corpora with different purposes:
---
-### [Notebook 3: Query API](3-query-api.ipynb)
+### [Notebook 3: Deleting Documents](3-document-deletion.ipynb)
+
+**What you'll learn:**
+- Delete a single document by its ID
+- Bulk delete documents by metadata filter (`metadata_filter`) or by a list of document IDs
+- Delete all documents at once by resetting a corpus
+- List documents (optionally filtered by metadata) to verify deletions
+
+**What you'll build:**
+A self-contained walkthrough on its own `tutorial-document-deletion` corpus:
+1. Create the corpus and add six small documents (four with `category`/`year` metadata, two without)
+2. Delete one document by ID
+3. Bulk delete every `finance` document with a metadata filter
+4. Reset the corpus to remove everything, then optionally delete the corpus
+
+**Key concepts:**
+- **Single-document delete**: `DELETE /v2/corpora/{corpus_key}/documents/{document_id}`
+- **Bulk delete**: `DELETE /v2/corpora/{corpus_key}/documents` with `metadata_filter` and/or `document_ids` (async by default; pass `async=false` to wait for results)
+- **Corpus reset**: `POST /v2/corpora/{corpus_key}/reset` empties a corpus but keeps its configuration
+- **Self-contained**: creates and owns its corpus, so it needs only a `VECTARA_API_KEY` and doesn't affect the other notebooks
+
+---
+
+### [Notebook 4: Query API](4-query-api.ipynb)
**What you'll learn:**
- Execute basic queries with hybrid search
@@ -134,7 +157,7 @@ Stream generated responses in real-time using Server-Sent Events for better UX.
---
-### [Notebook 4: Agent API](4-agent-api.ipynb)
+### [Notebook 5: Agent API](5-agent-api.ipynb)
**What you'll learn:**
- Create AI agents with custom instructions
@@ -194,7 +217,7 @@ Agent: [Provides concrete example while maintaining context]
---
-### [Notebook 5: Sub-Agents](5-sub-agents.ipynb)
+### [Notebook 6: Sub-Agents](6-sub-agents.ipynb)
**What you'll learn:**
- Create specialized sub-agents for domain-specific tasks
@@ -258,7 +281,7 @@ orchestrator_config = {
---
-### [Notebook 6: Artifacts](6-artifacts.ipynb)
+### [Notebook 7: Artifacts](7-artifacts.ipynb)
**What you'll learn:**
- Upload files (PDFs, images, documents) to agent sessions
@@ -281,7 +304,7 @@ A **Document Analyst** agent that can:
---
-### [Notebook 7: Lambda Tools for Data Analysis](7-lambda-tools-data-analysis.ipynb)
+### [Notebook 8: Lambda Tools for Data Analysis](8-lambda-tools-data-analysis.ipynb)
**What you'll learn:**
- Create Lambda tools that use NumPy and Pandas for data analysis
@@ -329,7 +352,7 @@ def process(data: str, columns: str = "", operations: str = "describe") -> dict:
---
-### [Notebook 8: Reranker Instructions](8-reranker-instructions.ipynb)
+### [Notebook 9: Reranker Instructions](9-reranker-instructions.ipynb)
**What you'll learn:**
- Use reranker instructions with `qwen3-reranker` to guide relevance scoring
@@ -351,7 +374,7 @@ Three query examples demonstrating:
---
-### [Notebook 9: Structured Output & Multi-Step Agents](9-structured-output-multi-step.ipynb)
+### [Notebook 10: Structured Output & Multi-Step Agents](10-structured-output-multi-step.ipynb)
**What you'll learn:**
- Constrain agent output to a JSON schema so responses are machine-parseable
@@ -371,7 +394,7 @@ Three query examples demonstrating:
---
-### [Notebook 10: Agent Schedules](10-agent-schedules.ipynb)
+### [Notebook 11: Agent Schedules](11-agent-schedules.ipynb)
**What you'll learn:**
- Automate agent execution with cron-based and interval-based schedules
@@ -391,7 +414,7 @@ A **Research Digest Generator** agent with two schedules:
---
-### [Notebook 11: Calling REST APIs with `web_get`](11-web-get-tool.ipynb)
+### [Notebook 12: Calling REST APIs with `web_get`](12-web-get-tool.ipynb)
**What you'll learn:**
- Configure an agent with the inline `web_get` tool — a general-purpose HTTP client supporting `GET`/`POST`/`PUT`/`DELETE`/`HEAD`, custom headers, and request bodies
@@ -416,7 +439,7 @@ The notebook iterates the agent through several configurations — a single gene
---
-### [Notebook 12: Agent Skills — Progressive-Disclosure Instructions](12-agent-skills.ipynb)
+### [Notebook 13: Agent Skills — Progressive-Disclosure Instructions](13-agent-skills.ipynb)
**What you'll learn:**
- Configure an agent with a `skills` map — each skill is just `{description, content}` (caps: 500 / 50,000 chars)
@@ -436,7 +459,7 @@ A **Support Copilot** agent whose system prompt stays small. A `customer_escalat
---
-### [Notebook 13: Agent Steps — Deterministic Plan Execution](13-agent-steps.ipynb)
+### [Notebook 14: Agent Steps — Deterministic Plan Execution](14-agent-steps.ipynb)
**What you'll learn:**
- Build a sequential **plan-then-execute pipeline** where each phase has its own focused system prompt, tools, and structured-output schema
@@ -452,7 +475,7 @@ A **Contract Triage** agent that processes inbound documents through three seque
- **Steps vs. sub-agents**: steps share session history (each later phase can read what earlier phases produced); sub-agents start fresh in their own context. Steps are the right tool when phases need to *build on* each other.
- **Conditional transitions**: `next_steps` entries with UserFn `condition` expressions (`get('$.output.doc_type') == 'other'`) route on **typed structured-output fields**, not on the LLM's free-form text — the deterministic part of "deterministic plan execution".
- **`reentry_step`**: where the *next* user message in the same session lands. Use it to separate *one-shot pipeline runs* from *ongoing Q&A about what the pipeline produced*.
-- **How this differs from notebook 9**: notebook 9 covers the **classifier-router fan-out** (one classifier branches to one of N terminal handlers). This notebook covers **sequential pipelines**, **conditional gating**, and **`reentry_step`** — read both for the full step-orchestration picture.
+- **How this differs from notebook 10**: notebook 10 covers the **classifier-router fan-out** (one classifier branches to one of N terminal handlers). This notebook covers **sequential pipelines**, **conditional gating**, and **`reentry_step`** — read both for the full step-orchestration picture.
- **Self-contained notebook**: requires only `VECTARA_API_KEY` (no corpora from earlier notebooks).
---
@@ -468,47 +491,51 @@ A **Contract Triage** agent that processes inbound documents through three seque
↓
Upload PDFs + Index crawled documentation
-3. Query API
+3. Deleting Documents
+ ↓
+ Remove documents by ID or metadata filter; reset a corpus
+
+4. Query API
↓
Search, filter, rerank, and generate answers
-4. Agent API
+5. Agent API
↓
Build autonomous agents with tools and context
-5. Sub-Agents
+6. Sub-Agents
↓
Create multi-agent workflows with specialized sub-agents
-6. Artifacts
+7. Artifacts
↓
Work with files in agent sessions
-7. Lambda Tools for Data Analysis
+8. Lambda Tools for Data Analysis
↓
Build NumPy/Pandas-powered data analysis tools
-8. Reranker Instructions
+9. Reranker Instructions
↓
Guide relevance scoring with domain-specific instructions
-9. Structured Output & Multi-Step Agents
- ↓
- Constrain agent output to JSON schemas; route queries through multi-step flows
+10. Structured Output & Multi-Step Agents
+ ↓
+ Constrain agent output to JSON schemas; route queries through multi-step flows
-10. Agent Schedules
+11. Agent Schedules
↓
Automate agent runs on cron or interval schedules
-11. Calling REST APIs with web_get
+12. Calling REST APIs with web_get
↓
Give an agent the inline web_get tool to call any REST API (public or authenticated, read or write) at conversation time
-12. Agent Skills
+13. Agent Skills
↓
Attach progressive-disclosure instructions (description + content) so specialist guidance only enters context when the agent invokes it
-13. Agent Steps — Deterministic Plan Execution
+14. Agent Steps — Deterministic Plan Execution
↓
Drive an agent through a fixed sequence of phases (classify → extract → flag_issues) with conditional gates and reentry_step for follow-up Q&A
```
@@ -536,38 +563,42 @@ jupyter notebook
## Important Notes
-1. **Run notebooks in order** - Each notebook builds on the previous one, though notebooks 8, 9, and 10 only require the corpora from 1-2 and can be run independently of 3-7. Notebooks 11, 12, and 13 are fully self-contained and only need a `VECTARA_API_KEY`.
+1. **Run notebooks in order** - Each notebook builds on the previous one, though notebooks 9, 10, and 11 only require the corpora from 1-2 and can be run independently of 4-8. Notebooks 3, 12, 13, and 14 are fully self-contained and only need a `VECTARA_API_KEY`.
2. **Corpus keys** - Save the corpus keys from Notebook 1, you'll need them in subsequent notebooks
-3. **Agent reuse** - Notebooks 4 and 5 check if agents already exist before creating duplicates
+3. **Agent reuse** - Notebooks 5 and 6 check if agents already exist before creating duplicates
4. **Rate limiting** - The notebooks include small delays between API calls to be respectful
5. **Cleanup** - Consider deleting test corpora/agents when done to keep your account organized
-6. **Sub-agent dependencies** - Notebook 5 creates sub-agents first, then a parent orchestrator that references them
+6. **Sub-agent dependencies** - Notebook 6 creates sub-agents first, then a parent orchestrator that references them
## Key API Endpoints Used
| Endpoint | Purpose | Notebook |
|----------|---------|----------|
-| `POST /v2/corpora` | Create corpus | 1 |
+| `POST /v2/corpora` | Create corpus | 1, 3 |
| `GET /v2/corpora` | List corpora | 1 |
| `POST /v2/corpora/{key}/upload_file` | Upload files | 2 |
-| `POST /v2/corpora/{key}/documents` | Index documents | 2 |
-| `GET /v2/corpora/{key}/documents` | List documents | 2 |
-| `POST /v2/query` | Query corpora | 3, 8 |
-| `POST /v2/agents` | Create agent | 4, 5, 6, 7, 9, 10, 11, 12, 13 |
-| `POST /v2/agents/{key}/sessions` | Create session | 4, 5, 6, 7, 9, 11, 12, 13 |
-| `POST /v2/agents/{key}/sessions/{key}/events` | Send messages / Upload artifacts | 4, 5, 6, 7, 9, 11, 12, 13 |
-| `GET /v2/agents/{key}/sessions/{key}/events` | Get conversation history | 4, 10 |
-| `GET /v2/agents/{key}/sessions/{key}/artifacts` | List session artifacts | 6 |
-| `GET /v2/agents` | List agents | 5, 9, 10 |
-| `DELETE /v2/agents/{key}` | Delete agent | 5, 6, 7, 9, 10, 11, 12, 13 |
-| `POST /v2/tools` | Create Lambda tool | 5, 7 |
-| `GET /v2/tools` | List Lambda tools | 5, 7 |
-| `DELETE /v2/tools/{id}` | Delete Lambda tool | 5, 7 |
-| `POST /v2/agents/{key}/schedules` | Create schedule | 10 |
-| `GET /v2/agents/{key}/schedules` | List schedules | 10 |
-| `PATCH /v2/agents/{key}/schedules/{key}` | Update schedule | 10 |
-| `DELETE /v2/agents/{key}/schedules/{key}` | Delete schedule | 10 |
-| `GET /v2/agents/{key}/schedules/{key}/executions` | Execution history | 10 |
+| `POST /v2/corpora/{key}/documents` | Index documents | 2, 3 |
+| `GET /v2/corpora/{key}/documents` | List documents | 2, 3 |
+| `DELETE /v2/corpora/{key}/documents/{id}` | Delete one document | 3 |
+| `DELETE /v2/corpora/{key}/documents` | Bulk delete documents (metadata filter / IDs) | 3 |
+| `POST /v2/corpora/{key}/reset` | Delete all documents (reset corpus) | 3 |
+| `DELETE /v2/corpora/{key}` | Delete corpus | 3 |
+| `POST /v2/query` | Query corpora | 4, 9 |
+| `POST /v2/agents` | Create agent | 5, 6, 7, 8, 10, 11, 12, 13, 14 |
+| `POST /v2/agents/{key}/sessions` | Create session | 5, 6, 7, 8, 10, 12, 13, 14 |
+| `POST /v2/agents/{key}/sessions/{key}/events` | Send messages / Upload artifacts | 5, 6, 7, 8, 10, 12, 13, 14 |
+| `GET /v2/agents/{key}/sessions/{key}/events` | Get conversation history | 5, 11 |
+| `GET /v2/agents/{key}/sessions/{key}/artifacts` | List session artifacts | 7 |
+| `GET /v2/agents` | List agents | 6, 10, 11 |
+| `DELETE /v2/agents/{key}` | Delete agent | 6, 7, 8, 10, 11, 12, 13, 14 |
+| `POST /v2/tools` | Create Lambda tool | 6, 8 |
+| `GET /v2/tools` | List Lambda tools | 6, 8 |
+| `DELETE /v2/tools/{id}` | Delete Lambda tool | 6, 8 |
+| `POST /v2/agents/{key}/schedules` | Create schedule | 11 |
+| `GET /v2/agents/{key}/schedules` | List schedules | 11 |
+| `PATCH /v2/agents/{key}/schedules/{key}` | Update schedule | 11 |
+| `DELETE /v2/agents/{key}/schedules/{key}` | Delete schedule | 11 |
+| `GET /v2/agents/{key}/schedules/{key}/executions` | Execution history | 11 |
## Additional Resources