From 419349d02b683062f3120c79011c8993e4108716 Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Mon, 13 Apr 2026 18:41:33 +0800
Subject: [PATCH 1/5] feat(toc): implement bounded concurrency for LLM
 operations

- Replace join_all with stream::buffer_unordered(5) for page assignment
- Add bounded concurrency to TOC verification with buffer_unordered(5)
- Implement bounded concurrency for index repair with buffer_unordered(5)
- Use stream processing instead of collecting all futures at once
- Prevent rate limiting by limiting concurrent LLM requests

This change improves performance and reliability by preventing
excessive concurrent API calls to LLM services.

fix(structure-extractor): optimize hierarchical structure extraction

- Process first page group as initial structure, then remaining groups
  in parallel with bounded concurrency
- Add static version of continuation generation for parallel use
- Improve error handling for failed continuation groups
- Add proper entry deduplication and sorting logic
- Maintain shared context from initial entries for all continuations

The extraction now follows a phased approach: initial structure
generation followed by parallel continuation processing, which
improves both accuracy and performance.
---
 rust/src/index/parse/toc/assigner.rs          |  40 +++--
 rust/src/index/parse/toc/processor.rs         |   7 +-
 rust/src/index/parse/toc/repairer.rs          |   9 +-
 .../index/parse/toc/structure_extractor.rs    | 164 +++++++++++++++---
 rust/src/index/parse/toc/verifier.rs          |  56 +++---
 5 files changed, 203 insertions(+), 73 deletions(-)

diff --git a/rust/src/index/parse/toc/assigner.rs b/rust/src/index/parse/toc/assigner.rs
index beff3021..52d50403 100644
--- a/rust/src/index/parse/toc/assigner.rs
+++ b/rust/src/index/parse/toc/assigner.rs
@@ -4,7 +4,7 @@
 //! Page assigner - assigns physical page numbers to TOC entries.
 
 use std::collections::HashMap;
-use futures::future::join_all;
+use futures::stream::{self, StreamExt};
 use tracing::{debug, info};
 
 use crate::config::LlmConfig;
@@ -175,7 +175,10 @@ impl PageAssigner {
             })
             .collect();
 
-        let verified_offsets = join_all(futures).await;
+        let verified_offsets: Vec<_> = stream::iter(futures)
+            .buffer_unordered(5)
+            .collect()
+            .await;
 
         // Calculate the mode (most common offset)
         let successful: Vec<_> = verified_offsets
@@ -265,29 +268,32 @@ Reply in JSON format:
         Ok(result.page)
     }
 
-    /// Assign pages using LLM for each entry (concurrently).
+    /// Assign pages using LLM for each entry (with bounded concurrency).
     async fn assign_with_llm(&self, entries: &mut [TocEntry], pages: &[PdfPage]) -> Result<()> {
         info!("Assigning pages using LLM positioning");
 
         let client = self.client.clone();
         let pages_owned = pages.to_vec();
+        let total = entries.len();
 
-        // Launch all entry searches concurrently
-        let futures: Vec<_> = entries
-            .iter()
-            .map(|entry| {
-                let title = entry.title.clone();
-                let client = client.clone();
-                let pages = pages_owned.clone();
+        // Launch entry searches with bounded concurrency to avoid rate limiting
+        let futures: Vec<_> = entries.iter().map(|entry| {
+            let title = entry.title.clone();
+            let client = client.clone();
+            let pages = pages_owned.clone();
 
-                async move {
-                    let groups = Self::group_pages_owned(&pages, 5);
-                    Self::locate_title_in_groups_static(&client, &title, &groups).await
-                }
-            })
-            .collect();
+            async move {
+                let groups = Self::group_pages_owned(&pages, 5);
+                Self::locate_title_in_groups_static(&client, &title, &groups).await
+            }
+        }).collect();
+
+        let results: Vec<_> = stream::iter(futures)
+            .buffer_unordered(5)
+            .collect()
+            .await;
 
-        let results = join_all(futures).await;
+        info!("Assigned pages for {}/{} entries", results.len(), total);
 
         // Write results back
         for (entry, result) in entries.iter_mut().zip(results.into_iter()) {
diff --git a/rust/src/index/parse/toc/processor.rs b/rust/src/index/parse/toc/processor.rs
index 9ed2c95b..8e5f59b0 100644
--- a/rust/src/index/parse/toc/processor.rs
+++ b/rust/src/index/parse/toc/processor.rs
@@ -7,7 +7,7 @@
 //! degradation: if one mode fails verification, it falls back to a lower-quality
 //! but more reliable mode.
 
-use futures::future::join_all;
+use futures::stream::{self, StreamExt};
 use tracing::{debug, info, warn};
 
 use crate::error::Result;
@@ -505,7 +505,10 @@ impl TocProcessor {
             })
             .collect();
 
-        let extraction_results = join_all(oversized_futures).await;
+        let extraction_results: Vec<_> = stream::iter(oversized_futures)
+            .buffer_unordered(3)
+            .collect()
+            .await;
 
         // Build a lookup from index → refined sub-entries
         let mut refined_map = std::collections::HashMap::new();
diff --git a/rust/src/index/parse/toc/repairer.rs b/rust/src/index/parse/toc/repairer.rs
index 51931674..13c19877 100644
--- a/rust/src/index/parse/toc/repairer.rs
+++ b/rust/src/index/parse/toc/repairer.rs
@@ -3,7 +3,7 @@
 
 //! Index repairer - fixes incorrect TOC entry page assignments.
 
-use futures::future::join_all;
+use futures::stream::{self, StreamExt};
 use tracing::{debug, info};
 
 use crate::config::LlmConfig;
@@ -63,7 +63,7 @@ impl IndexRepairer {
         Self::new(RepairerConfig::default())
     }
 
-    /// Repair incorrect entries concurrently.
+    /// Repair incorrect entries with bounded concurrency.
     pub async fn repair(
         &self,
         entries: &mut [TocEntry],
@@ -107,7 +107,10 @@ impl IndexRepairer {
             })
             .collect();
 
-        let results = join_all(tasks).await;
+        let results: Vec<_> = stream::iter(tasks)
+            .buffer_unordered(5)
+            .collect()
+            .await;
 
         // Apply repairs
         let mut repaired_count = 0;
diff --git a/rust/src/index/parse/toc/structure_extractor.rs b/rust/src/index/parse/toc/structure_extractor.rs
index 17511b36..be2486d9 100644
--- a/rust/src/index/parse/toc/structure_extractor.rs
+++ b/rust/src/index/parse/toc/structure_extractor.rs
@@ -7,6 +7,7 @@
 //! module uses LLM to analyse page content and extract the document's
 //! hierarchical structure directly.
 
+use futures::stream::{self, StreamExt};
 use tracing::{debug, info, warn};
 
 use crate::config::LlmConfig;
@@ -40,6 +41,7 @@ impl Default for StructureExtractorConfig {
 }
 
 /// A group of consecutive pages with their combined text.
+#[derive(Clone)]
 struct PageGroup {
     /// Combined text with page markers: `<page_N>\n...\n</page_N>`.
     text: String,
@@ -77,42 +79,102 @@ impl StructureExtractor {
     }
 
     /// Extract hierarchical structure from all pages.
+    ///
+    /// The first page group is processed alone (initial structure), then all
+    /// remaining groups are processed in parallel, each using the initial
+    /// entries as context. Results are merged and deduplicated.
     pub async fn extract(&self, pages: &[PdfPage]) -> Result<Vec<TocEntry>> {
         if pages.is_empty() {
             return Ok(Vec::new());
         }
 
         let groups = self.group_pages(pages);
+        let page_count = pages.len();
         info!(
             "Extracting structure from {} pages in {} groups",
-            pages.len(),
+            page_count,
             groups.len()
         );
 
-        let mut all_entries = Vec::new();
-        let page_count = pages.len();
+        // Phase 1: Generate initial structure from first group
+        let initial_entries = self.generate_initial(&groups[0]).await?;
+        debug!(
+            "Initial group (pages {}-{}): extracted {} entries",
+            groups[0].start_page,
+            groups[0].end_page,
+            initial_entries.len()
+        );
 
-        for (i, group) in groups.iter().enumerate() {
-            let group_entries = if i == 0 {
-                self.generate_initial(group).await?
-            } else {
-                self.generate_continuation(group, &all_entries).await?
-            };
+        if groups.len() == 1 {
+            return Ok(Self::finalize_entries(initial_entries, page_count));
+        }
 
-            debug!(
-                "Group {}/{} (pages {}-{}): extracted {} entries",
-                i + 1,
-                groups.len(),
-                group.start_page,
-                group.end_page,
-                group_entries.len()
-            );
+        // Phase 2: Process remaining groups in parallel (bounded concurrency)
+        // Each continuation group uses the initial entries as shared context.
+        let client = self.client.clone();
+        let initial_entries_ref = &initial_entries;
 
-            all_entries.extend(group_entries);
+        let continuation_futures: Vec<_> = groups[1..]
+            .iter()
+            .map(|group| {
+                let group = group.clone();
+                let client = client.clone();
+                let initial = initial_entries_ref.to_vec();
+
+                async move {
+                    let result = Self::generate_continuation_with_client(
+                        &client, &group, &initial,
+                    )
+                    .await;
+                    (group.start_page, group.end_page, result)
+                }
+            })
+            .collect();
+
+        let continuation_results: Vec<_> = stream::iter(continuation_futures)
+            .buffer_unordered(5)
+            .collect()
+            .await;
+
+        // Phase 3: Merge initial + continuation entries
+        let mut all_entries = initial_entries;
+        for (start, end, result) in continuation_results {
+            match result {
+                Ok(entries) => {
+                    debug!(
+                        "Continuation group (pages {}-{}): extracted {} entries",
+                        start,
+                        end,
+                        entries.len()
+                    );
+                    all_entries.extend(entries);
+                }
+                Err(e) => {
+                    warn!(
+                        "Continuation group (pages {}-{}) failed: {}",
+                        start, end, e
+                    );
+                }
+            }
         }
 
-        // Truncate physical_page values that exceed document length
-        for entry in &mut all_entries {
+        // Phase 4: Sort by page number, deduplicate, truncate
+        all_entries.sort_by(|a, b| {
+            a.physical_page
+                .unwrap_or(0)
+                .cmp(&b.physical_page.unwrap_or(0))
+        });
+        all_entries.dedup_by(|a, b| {
+            a.title.trim() == b.title.trim()
+                && a.physical_page == b.physical_page
+        });
+
+        Ok(Self::finalize_entries(all_entries, page_count))
+    }
+
+    /// Truncate out-of-range page numbers and log stats.
+    fn finalize_entries(mut entries: Vec<TocEntry>, page_count: usize) -> Vec<TocEntry> {
+        for entry in &mut entries {
             if let Some(p) = entry.physical_page {
                 if p > page_count {
                     warn!(
@@ -123,9 +185,8 @@ impl StructureExtractor {
                 }
             }
         }
-
-        info!("Structure extraction complete: {} entries", all_entries.len());
-        Ok(all_entries)
+        info!("Structure extraction complete: {} entries", entries.len());
+        entries
     }
 
     /// Group pages by estimated token count.
@@ -267,6 +328,63 @@ If no new structural elements are found, return: []"#,
             })
             .collect())
     }
+
+    /// Static version of continuation generation for parallel use.
+    ///
+    /// Uses an owned `LlmClient` reference instead of `&self`.
+    async fn generate_continuation_with_client(
+        client: &LlmClient,
+        group: &PageGroup,
+        previous: &[TocEntry],
+    ) -> Result<Vec<TocEntry>> {
+        let system = STRUCTURE_EXTRACTION_SYSTEM_PROMPT;
+
+        let prev_summary = previous
+            .iter()
+            .rev()
+            .take(10)
+            .rev()
+            .map(|e| {
+                format!(
+                    "  {{\"title\": \"{}\", \"level\": {}, \"physical_page\": {}}}",
+                    e.title,
+                    e.level,
+                    e.physical_page.unwrap_or(0)
+                )
+            })
+            .collect::<Vec<_>>()
+            .join(",\n");
+
+        let user = format!(
+            r#"Previously extracted structure:
+[
+{}
+]
+
+Continue extracting structure from these pages:
+{}
+
+Return ONLY the NEW entries (do not repeat previous ones):
+[
+  {{"title": "...", "level": N, "physical_page": M}},
+  ...
+]
+
+If no new structural elements are found, return: []"#,
+            prev_summary, group.text
+        );
+
+        let sections: Vec<ExtractedSection> = client.complete_json(system, &user).await?;
+
+        Ok(sections
+            .into_iter()
+            .map(|s| {
+                TocEntry::new(s.title, s.level)
+                    .with_physical_page(s.physical_page)
+                    .with_confidence(0.7)
+            })
+            .collect())
+    }
 }
 
 /// Format pages into tagged text for LLM consumption.
diff --git a/rust/src/index/parse/toc/verifier.rs b/rust/src/index/parse/toc/verifier.rs
index 09b28059..fd944386 100644
--- a/rust/src/index/parse/toc/verifier.rs
+++ b/rust/src/index/parse/toc/verifier.rs
@@ -3,7 +3,7 @@
 
 //! Index verifier - verifies TOC entry page assignments.
 
-use futures::future::join_all;
+use futures::stream::{self, StreamExt};
 use rand::seq::SliceRandom;
 use tracing::{debug, info};
 
@@ -65,7 +65,7 @@ impl IndexVerifier {
 
     /// Verify TOC entries against PDF pages.
     ///
-    /// All sample entries are verified concurrently via LLM calls.
+    /// Sample entries are verified via LLM calls with bounded concurrency.
     pub async fn verify(
         &self,
         entries: &[TocEntry],
@@ -77,36 +77,36 @@ impl IndexVerifier {
 
         let sample = self.select_sample(entries);
 
-        // Launch all verification checks concurrently
+        // Launch verification checks with bounded concurrency
         let client = self.client.clone();
-        let futures: Vec<_> = sample
-            .iter()
-            .map(|(index, entry)| {
-                let index = *index;
-                let title = entry.title.clone();
-                let physical_page = entry.physical_page;
-                let client = client.clone();
-                let pages = pages.to_vec();
-
-                async move {
-                    match physical_page {
-                        Some(page) => {
-                            let result =
-                                Self::verify_entry_with_client(&client, &title, page, &pages).await;
-                            (index, title, page, result)
-                        }
-                        None => (
-                            index,
-                            title,
-                            0,
-                            Ok(Err(ErrorType::PageOutOfRange)),
-                        ),
+        let futures: Vec<_> = sample.iter().map(|(index, entry)| {
+            let index = *index;
+            let title = entry.title.clone();
+            let physical_page = entry.physical_page;
+            let client = client.clone();
+            let pages = pages.to_vec();
+
+            async move {
+                match physical_page {
+                    Some(page) => {
+                        let result =
+                            Self::verify_entry_with_client(&client, &title, page, &pages).await;
+                        (index, title, page, result)
                     }
+                    None => (
+                        index,
+                        title,
+                        0,
+                        Ok(Err(ErrorType::PageOutOfRange)),
+                    ),
                 }
-            })
-            .collect();
+            }
+        }).collect();
 
-        let results = join_all(futures).await;
+        let results: Vec<_> = stream::iter(futures)
+            .buffer_unordered(5)
+            .collect()
+            .await;
 
         // Aggregate results
         let total = results.len();

From b9c68da62def2cba52bb4697fc9f6313b156fe29 Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Mon, 13 Apr 2026 19:18:31 +0800
Subject: [PATCH 2/5] feat(python): add IndexMetrics binding with detailed
 indexing statistics

Add Python binding for IndexMetrics to expose comprehensive indexing
pipeline metrics including timing information, LLM usage statistics,
and processing counts.

The new PyIndexMetrics class provides access to:
- Total indexing time and individual stage durations
- Node processing and summary generation counts
- LLM call statistics and token usage
- Topic and keyword indexing metrics
- Summary failure tracking

Also expose metrics through the PyIndexItem interface and register
the new class with the module.

feat(rust): track and expose indexing failure metrics

Enhance the IndexMetrics system to track and report on failed
summary generations during the indexing process. Add new
summaries_failed field and add_summaries_failed method to record
failures from LLM errors, rate limits, or other processing issues.

Update example code to display failure statistics and improve
error handling for missing LLM configuration.

refactor(rust): make metrics module public and update exports

Make the metrics module public to allow external access to metric
types and functionality.
---
 python/src/lib.rs                | 97 ++++++++++++++++++++++++++++++++
 rust/examples/index_pdf.rs       | 17 +++++-
 rust/src/index/stages/enhance.rs |  3 +
 rust/src/lib.rs                  |  7 ++-
 rust/src/metrics/index.rs        |  9 +++
 5 files changed, 127 insertions(+), 6 deletions(-)

diff --git a/python/src/lib.rs b/python/src/lib.rs
index a8b32439..a5da45bb 100644
--- a/python/src/lib.rs
+++ b/python/src/lib.rs
@@ -14,6 +14,7 @@ use ::vectorless::client::{
     IndexMode, IndexOptions, IndexResult, QueryContext, QueryResult, QueryResultItem,
 };
 use ::vectorless::error::Error as RustError;
+use ::vectorless::metrics::IndexMetrics;
 
 // ============================================================
 // Error Types
@@ -477,6 +478,95 @@ impl PyQueryResult {
     }
 }
 
+// ============================================================
+// IndexMetrics
+// ============================================================
+
+/// Indexing pipeline metrics.
+#[pyclass(name = "IndexMetrics")]
+pub struct PyIndexMetrics {
+    inner: IndexMetrics,
+}
+
+#[pymethods]
+impl PyIndexMetrics {
+    /// Total indexing time (ms).
+    #[getter]
+    fn total_time_ms(&self) -> u64 {
+        self.inner.total_time_ms()
+    }
+
+    /// Parse stage duration (ms).
+    #[getter]
+    fn parse_time_ms(&self) -> u64 {
+        self.inner.parse_time_ms
+    }
+
+    /// Build stage duration (ms).
+    #[getter]
+    fn build_time_ms(&self) -> u64 {
+        self.inner.build_time_ms
+    }
+
+    /// Enhance (summary) stage duration (ms).
+    #[getter]
+    fn enhance_time_ms(&self) -> u64 {
+        self.inner.enhance_time_ms
+    }
+
+    /// Number of nodes processed.
+    #[getter]
+    fn nodes_processed(&self) -> usize {
+        self.inner.nodes_processed
+    }
+
+    /// Number of summaries successfully generated.
+    #[getter]
+    fn summaries_generated(&self) -> usize {
+        self.inner.summaries_generated
+    }
+
+    /// Number of summaries that failed to generate.
+    #[getter]
+    fn summaries_failed(&self) -> usize {
+        self.inner.summaries_failed
+    }
+
+    /// Number of LLM calls made.
+    #[getter]
+    fn llm_calls(&self) -> usize {
+        self.inner.llm_calls
+    }
+
+    /// Total tokens generated by LLM.
+    #[getter]
+    fn total_tokens_generated(&self) -> usize {
+        self.inner.total_tokens_generated
+    }
+
+    /// Number of topics in reasoning index.
+    #[getter]
+    fn topics_indexed(&self) -> usize {
+        self.inner.topics_indexed
+    }
+
+    /// Number of keywords in reasoning index.
+    #[getter]
+    fn keywords_indexed(&self) -> usize {
+        self.inner.keywords_indexed
+    }
+
+    fn __repr__(&self) -> String {
+        format!(
+            "IndexMetrics(total={}ms, summaries={}, failed={}, llm_calls={})",
+            self.inner.total_time_ms(),
+            self.inner.summaries_generated,
+            self.inner.summaries_failed,
+            self.inner.llm_calls,
+        )
+    }
+}
+
 // ============================================================
 // IndexItem / IndexResult
 // ============================================================
@@ -514,6 +604,12 @@ impl PyIndexItem {
         self.inner.page_count
     }
 
+    /// Indexing pipeline metrics (timing, LLM usage, etc.).
+    #[getter]
+    fn metrics(&self) -> Option<PyIndexMetrics> {
+        self.inner.metrics.as_ref().map(|m| PyIndexMetrics { inner: m.clone() })
+    }
+
     fn __repr__(&self) -> String {
         format!(
             "IndexItem(doc_id='{}', name='{}')",
@@ -1076,6 +1172,7 @@ fn _vectorless(m: &Bound<'_, PyModule>) -> PyResult<()> {
     m.add_class::<PyQueryContext>()?;
     m.add_class::<PyIndexResult>()?;
     m.add_class::<PyIndexItem>()?;
+    m.add_class::<PyIndexMetrics>()?;
     m.add_class::<PyQueryResult>()?;
     m.add_class::<PyQueryResultItem>()?;
     m.add_class::<PyFailedItem>()?;
diff --git a/rust/examples/index_pdf.rs b/rust/examples/index_pdf.rs
index c7840e14..b370b39d 100644
--- a/rust/examples/index_pdf.rs
+++ b/rust/examples/index_pdf.rs
@@ -36,9 +36,19 @@ async fn main() -> vectorless::Result<()> {
 
     println!("=== Indexing PDF: {} ===\n", pdf_path);
 
-    // Build engine with LLM configuration from environment or defaults.
-    let api_key = std::env::var("LLM_API_KEY")
-        .unwrap_or_else(|_| "sk-or-v1-...".to_string());
+    // LLM configuration is required — set these environment variables:
+    //   LLM_API_KEY   — your API key (required)
+    //   LLM_MODEL     — model name (default: google/gemini-3-flash-preview)
+    //   LLM_ENDPOINT  — API endpoint (default: http://localhost:4000/api/v1)
+    let api_key = match std::env::var("LLM_API_KEY") {
+        Ok(key) => key,
+        Err(_) => {
+            eprintln!("Error: LLM_API_KEY environment variable is required.");
+            eprintln!("Set it before running:");
+            eprintln!("  LLM_API_KEY=sk-xxx cargo run --example index_pdf -- <path>");
+            std::process::exit(1);
+        }
+    };
     let model = std::env::var("LLM_MODEL")
         .unwrap_or_else(|_| "google/gemini-3-flash-preview".to_string());
     let endpoint = std::env::var("LLM_ENDPOINT")
@@ -83,6 +93,7 @@ async fn main() -> vectorless::Result<()> {
             println!("  enhance:       {}ms", metrics.enhance_time_ms);
             println!("  nodes:         {}", metrics.nodes_processed);
             println!("  summaries:     {}", metrics.summaries_generated);
+            println!("  failed:        {}", metrics.summaries_failed);
             println!("  llm calls:     {}", metrics.llm_calls);
             println!("  tokens:        {}", metrics.total_tokens_generated);
             println!("  topics:        {}", metrics.topics_indexed);
diff --git a/rust/src/index/stages/enhance.rs b/rust/src/index/stages/enhance.rs
index 5550de45..a79b5fb3 100644
--- a/rust/src/index/stages/enhance.rs
+++ b/rust/src/index/stages/enhance.rs
@@ -313,6 +313,9 @@ impl IndexStage for EnhanceStage {
 
         let duration = start.elapsed().as_millis() as u64;
         ctx.metrics.record_enhance(duration);
+        if failed > 0 {
+            ctx.metrics.add_summaries_failed(failed);
+        }
 
         info!(
             "Generated {} summaries ({} shortcut, {} failed, {} skipped no content, {} skipped tokens) in {}ms",
diff --git a/rust/src/lib.rs b/rust/src/lib.rs
index 92e6a4cc..689d331f 100644
--- a/rust/src/lib.rs
+++ b/rust/src/lib.rs
@@ -54,7 +54,7 @@ pub mod graph;
 mod index;
 mod llm;
 mod memo;
-mod metrics;
+pub mod metrics;
 mod retrieval;
 mod storage;
 mod throttle;
@@ -62,8 +62,9 @@ mod utils;
 
 // Client API
 pub use client::{
-    BuildError, ClientError, DocumentFormat, DocumentInfo, Engine, EngineBuilder, IndexContext,
-    IndexItem, IndexMode, IndexOptions, IndexResult, QueryContext, QueryResult,
+    BuildError, ClientError, DocumentFormat, DocumentInfo, Engine, EngineBuilder, FailedItem,
+    IndexContext, IndexItem, IndexMode, IndexOptions, IndexResult, QueryContext, QueryResult,
+    QueryResultItem,
 };
 
 // Error types
diff --git a/rust/src/metrics/index.rs b/rust/src/metrics/index.rs
index 58054661..4432e32f 100644
--- a/rust/src/metrics/index.rs
+++ b/rust/src/metrics/index.rs
@@ -64,6 +64,10 @@ pub struct IndexMetrics {
     #[serde(default)]
     pub summaries_generated: usize,
 
+    /// Number of summaries that failed to generate (LLM error, rate limit, etc.).
+    #[serde(default)]
+    pub summaries_failed: usize,
+
     /// Number of nodes skipped (thinning).
     #[serde(default)]
     pub nodes_skipped: usize,
@@ -141,6 +145,11 @@ impl IndexMetrics {
         self.summaries_generated += 1;
     }
 
+    /// Add to summaries failed count.
+    pub fn add_summaries_failed(&mut self, count: usize) {
+        self.summaries_failed += count;
+    }
+
     /// Increment nodes skipped.
     pub fn increment_nodes_skipped(&mut self) {
         self.nodes_skipped += 1;

From a0da791e56ca2e14975f4cc2109ee3e5d4bad2ab Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Mon, 13 Apr 2026 20:02:44 +0800
Subject: [PATCH 3/5] feat: add comprehensive examples for batch indexing,
 document management, error handling, and PDF indexing

Add four new example projects demonstrating core functionality:

- Batch Indexing Example: Shows indexing multiple documents using from_paths,
  from_dir, and from_bytes with cross-document querying capabilities

- Document Management Example: Demonstrates CRUD operations including list(),
  exists(), remove(), and clear() methods for indexed documents

- Error Handling Example: Illustrates proper VectorlessError exception
  handling with different error categories and inspection techniques

- PDF Indexing Example: Showcases PDF file indexing with detailed metrics
  inspection and querying capabilities

Each example includes dedicated README.md files with setup instructions,
environment variable documentation, and usage examples. All examples follow
consistent configuration patterns with proper async handling and cleanup
procedures.
---
 examples/batch_indexing/README.md      |  28 ++++
 examples/batch_indexing/main.py        | 183 +++++++++++++++++++++++++
 examples/document_management/README.md |  28 ++++
 examples/document_management/main.py   | 135 ++++++++++++++++++
 examples/error_handling/README.md      |  33 +++++
 examples/error_handling/main.py        | 111 +++++++++++++++
 examples/pdf_indexing/README.md        |  27 ++++
 examples/pdf_indexing/main.py          | 126 +++++++++++++++++
 8 files changed, 671 insertions(+)
 create mode 100644 examples/batch_indexing/README.md
 create mode 100644 examples/batch_indexing/main.py
 create mode 100644 examples/document_management/README.md
 create mode 100644 examples/document_management/main.py
 create mode 100644 examples/error_handling/README.md
 create mode 100644 examples/error_handling/main.py
 create mode 100644 examples/pdf_indexing/README.md
 create mode 100644 examples/pdf_indexing/main.py

diff --git a/examples/batch_indexing/README.md b/examples/batch_indexing/README.md
new file mode 100644
index 00000000..41e87fae
--- /dev/null
+++ b/examples/batch_indexing/README.md
@@ -0,0 +1,28 @@
+# Batch Indexing Example
+
+Demonstrates indexing multiple documents at once using:
+- `from_paths` -- explicit list of file paths
+- `from_dir` -- all supported files in a directory
+- `from_bytes` -- raw in-memory content
+
+Also shows cross-document querying with `with_doc_ids`.
+
+## Setup
+
+```bash
+pip install vectorless
+```
+
+## Run
+
+```bash
+python main.py
+```
+
+## Environment Variables
+
+| Variable                | Description          | Default   |
+|------------------------|----------------------|-----------|
+| `VECTORLESS_API_KEY`   | LLM API key          | `sk-...`  |
+| `VECTORLESS_MODEL`     | LLM model name       | `gpt-4o`  |
+| `VECTORLESS_ENDPOINT`  | Custom API endpoint  | `None`    |
diff --git a/examples/batch_indexing/main.py b/examples/batch_indexing/main.py
new file mode 100644
index 00000000..7d6d03cb
--- /dev/null
+++ b/examples/batch_indexing/main.py
@@ -0,0 +1,183 @@
+"""
+Batch indexing example -- demonstrates indexing multiple documents at once
+using from_paths, from_dir, and from_bytes.
+
+Usage:
+    pip install vectorless
+    python main.py
+"""
+
+import asyncio
+import os
+
+from vectorless import (
+    Engine,
+    IndexContext,
+    IndexOptions,
+    QueryContext,
+    VectorlessError,
+)
+
+# --- Configuration ---
+API_KEY = os.environ.get("VECTORLESS_API_KEY", "sk-...")
+MODEL = os.environ.get("VECTORLESS_MODEL", "gpt-4o")
+ENDPOINT = os.environ.get("VECTORLESS_ENDPOINT", None)
+WORKSPACE = "./workspace"
+
+# Sample documents for demonstration
+DOCS = {
+    "alpha.md": """\
+# Alpha Report
+
+## Summary
+
+Alpha is a distributed key-value store designed for low-latency reads.
+It uses a log-structured merge tree for storage.
+
+## Architecture
+
+Write requests go through a write-ahead log, then are buffered in memory.
+When the buffer is full, it is flushed to disk as an immutable SSTable.
+""",
+    "beta.md": """\
+# Beta Report
+
+## Summary
+
+Beta is a stream processing engine that consumes events from Kafka topics
+and applies real-time transformations using a DAG-based execution model.
+
+## Performance
+
+Beta processes up to 2 million events per second per node on commodity hardware.
+""",
+    "gamma.md": """\
+# Gamma Report
+
+## Summary
+
+Gamma is a feature store that bridges the gap between offline feature
+computation and online serving. Features are computed in Spark and served
+via a low-latency gRPC endpoint.
+
+## Integration
+
+Gamma integrates with Alpha for feature metadata storage and Beta for
+real-time feature updates.
+""",
+}
+
+
+def write_sample_docs(base_dir: str) -> list[str]:
+    """Write sample markdown files and return their paths."""
+    paths = []
+    for name, content in DOCS.items():
+        path = os.path.join(base_dir, name)
+        with open(path, "w") as f:
+            f.write(content)
+        paths.append(path)
+    return paths
+
+
+async def main() -> None:
+    engine = Engine(
+        workspace=WORKSPACE,
+        api_key=API_KEY,
+        model=MODEL,
+        endpoint=ENDPOINT,
+    )
+
+    # Create a temp directory with sample documents
+    docs_dir = "./batch_docs"
+    os.makedirs(docs_dir, exist_ok=True)
+    paths = write_sample_docs(docs_dir)
+
+    # ---- 1. Index multiple files at once via from_paths ----
+    print("=" * 50)
+    print("  from_paths -- index a list of files")
+    print("=" * 50)
+
+    ctx = IndexContext.from_paths(paths)
+    result = await engine.index(ctx)
+
+    print(f"  Indexed {len(result.items)} document(s)")
+    for item in result.items:
+        print(f"    - {item.name} ({item.doc_id[:8]}...)")
+    if result.has_failures():
+        for f in result.failed:
+            print(f"    ! Failed: {f.source} -- {f.error}")
+    print()
+
+    doc_ids = [item.doc_id for item in result.items]
+
+    # ---- 2. Query across all batch-indexed documents ----
+    print("=" * 50)
+    print("  Query across multiple documents")
+    print("=" * 50)
+
+    answer = await engine.query(
+        QueryContext(
+            "Which system processes the most events per second?"
+        ).with_doc_ids(doc_ids)
+    )
+    for item in answer.items:
+        print(f"  [{item.doc_id[:8]}...] score={item.score:.2f}")
+        print(f"    {item.content[:200]}...")
+    print()
+
+    # ---- 3. Index a directory via from_dir ----
+    print("=" * 50)
+    print("  from_dir -- index all supported files in a directory")
+    print("=" * 50)
+
+    # Clear first so we see fresh results
+    await engine.clear()
+
+    ctx = IndexContext.from_dir(docs_dir).with_options(
+        IndexOptions(generate_summaries=True, generate_description=True)
+    )
+    result = await engine.index(ctx)
+
+    print(f"  Indexed {len(result.items)} document(s)")
+    for item in result.items:
+        desc = item.description[:80] if item.description else "N/A"
+        print(f"    - {item.name}: {desc}...")
+    print()
+
+    # ---- 4. Index from raw bytes via from_bytes ----
+    print("=" * 50)
+    print("  from_bytes -- index in-memory content")
+    print("=" * 50)
+
+    md_bytes = b"""# Delta Notes
+
+## Key Points
+
+- Delta uses CRDTs for conflict-free replication.
+- Writes are locally committed then asynchronously propagated.
+- Read repair ensures eventual consistency across all replicas.
+"""
+
+    ctx = IndexContext.from_bytes(md_bytes, "markdown").with_name("delta")
+    result = await engine.index(ctx)
+
+    print(f"  Indexed: {result.doc_id}")
+    print()
+
+    # ---- Cleanup ----
+    print("=" * 50)
+    print("  Cleanup")
+    print("=" * 50)
+
+    removed = await engine.clear()
+    print(f"  Removed {removed} document(s)")
+
+    # Remove temp files
+    for p in paths:
+        os.remove(p)
+    os.rmdir(docs_dir)
+    print(f"  Cleaned up {docs_dir}/")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/document_management/README.md b/examples/document_management/README.md
new file mode 100644
index 00000000..e41148e0
--- /dev/null
+++ b/examples/document_management/README.md
@@ -0,0 +1,28 @@
+# Document Management Example
+
+Demonstrates CRUD operations on indexed documents:
+
+- `engine.list()` -- list all documents
+- `engine.exists(doc_id)` -- check if a document exists
+- `engine.remove(doc_id)` -- remove a single document
+- `engine.clear()` -- remove all documents
+
+## Setup
+
+```bash
+pip install vectorless
+```
+
+## Run
+
+```bash
+python main.py
+```
+
+## Environment Variables
+
+| Variable                | Description          | Default   |
+|------------------------|----------------------|-----------|
+| `VECTORLESS_API_KEY`   | LLM API key          | `sk-...`  |
+| `VECTORLESS_MODEL`     | LLM model name       | `gpt-4o`  |
+| `VECTORLESS_ENDPOINT`  | Custom API endpoint  | `None`    |
diff --git a/examples/document_management/main.py b/examples/document_management/main.py
new file mode 100644
index 00000000..f5d72360
--- /dev/null
+++ b/examples/document_management/main.py
@@ -0,0 +1,135 @@
+"""
+Document management example -- demonstrates CRUD operations on indexed documents:
+list, exists, remove, and clear.
+
+Usage:
+    pip install vectorless
+    python main.py
+"""
+
+import asyncio
+import os
+
+from vectorless import (
+    Engine,
+    IndexContext,
+    QueryContext,
+    VectorlessError,
+)
+
+# --- Configuration ---
+API_KEY = os.environ.get("VECTORLESS_API_KEY", "sk-...")
+MODEL = os.environ.get("VECTORLESS_MODEL", "gpt-4o")
+ENDPOINT = os.environ.get("VECTORLESS_ENDPOINT", None)
+WORKSPACE = "./workspace"
+
+# Sample documents
+SAMPLE_A = """\
+# Project Alpha
+
+## Overview
+
+Project Alpha is a next-generation database engine written in Rust.
+It supports ACID transactions and serializable isolation.
+
+## Features
+
+- MVCC concurrency control
+- B-tree and LSM storage engines
+- Query planner with cost-based optimization
+"""
+
+SAMPLE_B = """\
+# Project Beta
+
+## Overview
+
+Project Beta is a web framework for building real-time applications.
+It uses WebSocket-based communication and server-side rendering.
+
+## Features
+
+- Hot module reloading
+- Built-in authentication middleware
+- Automatic code splitting
+"""
+
+
+async def main() -> None:
+    engine = Engine(
+        workspace=WORKSPACE,
+        api_key=API_KEY,
+        model=MODEL,
+        endpoint=ENDPOINT,
+    )
+
+    # ---- Index two documents ----
+    print("Indexing two documents...")
+
+    result_a = await engine.index(
+        IndexContext.from_content(SAMPLE_A, "markdown").with_name("alpha")
+    )
+    doc_id_a = result_a.doc_id
+    print(f"  A: {doc_id_a}")
+
+    result_b = await engine.index(
+        IndexContext.from_content(SAMPLE_B, "markdown").with_name("beta")
+    )
+    doc_id_b = result_b.doc_id
+    print(f"  B: {doc_id_b}")
+    print()
+
+    # ---- list() -- show all indexed documents ----
+    print("--- list() ---")
+    docs = await engine.list()
+    for doc in docs:
+        pages = f", pages={doc.page_count}" if doc.page_count else ""
+        lines = f", lines={doc.line_count}" if doc.line_count else ""
+        print(f"  {doc.name}  id={doc.id[:8]}...  format={doc.format}{pages}{lines}")
+    print(f"  Total: {len(docs)} document(s)\n")
+
+    # ---- exists() -- check if a document is indexed ----
+    print("--- exists() ---")
+    for did, label in [(doc_id_a, "A"), (doc_id_b, "B"), ("nonexistent-id", "?")]:
+        found = await engine.exists(did)
+        print(f"  {label}: exists={found}")
+    print()
+
+    # ---- Query a specific document ----
+    print("--- query(doc_id_a) ---")
+    answer = await engine.query(
+        QueryContext("What storage engines does Alpha support?").with_doc_id(doc_id_a)
+    )
+    item = answer.single()
+    if item:
+        print(f"  Score: {item.score:.2f}")
+        print(f"  Answer: {item.content[:200]}...\n")
+
+    # ---- remove() -- delete a single document ----
+    print("--- remove(doc_id_a) ---")
+    removed = await engine.remove(doc_id_a)
+    print(f"  Removed A: {removed}")
+
+    # Verify it's gone
+    exists_a = await engine.exists(doc_id_a)
+    print(f"  exists(A) after removal: {exists_a}")
+    print()
+
+    # ---- list() again -- only B should remain ----
+    print("--- list() after removal ---")
+    docs = await engine.list()
+    for doc in docs:
+        print(f"  {doc.name}  id={doc.id[:8]}...")
+    print(f"  Total: {len(docs)} document(s)\n")
+
+    # ---- clear() -- remove all remaining documents ----
+    print("--- clear() ---")
+    cleared = await engine.clear()
+    print(f"  Cleared {cleared} document(s)")
+
+    docs = await engine.list()
+    print(f"  Remaining: {len(docs)} document(s)")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/error_handling/README.md b/examples/error_handling/README.md
new file mode 100644
index 00000000..2424d618
--- /dev/null
+++ b/examples/error_handling/README.md
@@ -0,0 +1,33 @@
+# Error Handling Example
+
+Demonstrates how to catch and inspect `VectorlessError` exceptions:
+
+- Invalid format strings
+- Invalid indexing modes
+- Querying non-existent documents
+- Batch indexing with partial failures
+- Engine creation with invalid credentials
+
+The `VectorlessError` exception provides:
+- `kind` -- error category (`"config"`, `"not_found"`, `"parse"`, `"llm"`, etc.)
+- `message` -- human-readable error description
+
+## Setup
+
+```bash
+pip install vectorless
+```
+
+## Run
+
+```bash
+python main.py
+```
+
+## Environment Variables
+
+| Variable                | Description          | Default   |
+|------------------------|----------------------|-----------|
+| `VECTORLESS_API_KEY`   | LLM API key          | `sk-...`  |
+| `VECTORLESS_MODEL`     | LLM model name       | `gpt-4o`  |
+| `VECTORLESS_ENDPOINT`  | Custom API endpoint  | `None`    |
diff --git a/examples/error_handling/main.py b/examples/error_handling/main.py
new file mode 100644
index 00000000..993814a6
--- /dev/null
+++ b/examples/error_handling/main.py
@@ -0,0 +1,111 @@
+"""
+Error handling example -- demonstrates catching and inspecting VectorlessError.
+
+Usage:
+    pip install vectorless
+    python main.py
+"""
+
+import asyncio
+import os
+
+from vectorless import (
+    Engine,
+    IndexContext,
+    IndexOptions,
+    QueryContext,
+    VectorlessError,
+)
+
+# --- Configuration ---
+API_KEY = os.environ.get("VECTORLESS_API_KEY", "sk-...")
+MODEL = os.environ.get("VECTORLESS_MODEL", "gpt-4o")
+ENDPOINT = os.environ.get("VECTORLESS_ENDPOINT", None)
+WORKSPACE = "./workspace"
+
+
+async def main() -> None:
+    engine = Engine(
+        workspace=WORKSPACE,
+        api_key=API_KEY,
+        model=MODEL,
+        endpoint=ENDPOINT,
+    )
+
+    # ---- 1. Invalid format ----
+    print("--- Invalid format in from_bytes ---")
+    try:
+        ctx = IndexContext.from_bytes(b"hello", "xml")
+    except VectorlessError as e:
+        print(f"  Caught VectorlessError:")
+        print(f"    kind:    {e.kind}")
+        print(f"    message: {e.message}")
+        print(f"    repr:    {repr(e)}")
+    print()
+
+    # ---- 2. Invalid indexing mode ----
+    print("--- Invalid indexing mode ---")
+    try:
+        opts = IndexOptions(mode="bad_mode")
+    except VectorlessError as e:
+        print(f"  Caught VectorlessError:")
+        print(f"    kind:    {e.kind}")
+        print(f"    message: {e.message}")
+    print()
+
+    # ---- 3. Query a non-existent document ----
+    print("--- Query non-existent document ---")
+    try:
+        await engine.query(
+            QueryContext("What is this?").with_doc_id("does-not-exist")
+        )
+    except VectorlessError as e:
+        print(f"  Caught VectorlessError:")
+        print(f"    kind:    {e.kind}")
+        print(f"    message: {e.message}")
+    print()
+
+    # ---- 4. Index with partial failure in batch ----
+    print("--- Batch indexing with mixed results ---")
+    good = IndexContext.from_content("# Real Doc\n\nThis is valid content.", "markdown")
+
+    result = await engine.index(good.with_name("good_doc"))
+    if result.has_failures():
+        for f in result.failed:
+            print(f"  Failed: {f.source} -- {f.error}")
+    else:
+        print(f"  Success: {result.doc_id}")
+
+        # Inspect individual items
+        for item in result.items:
+            print(f"  Item: {item.name} ({item.format})")
+            if item.metrics:
+                m = item.metrics
+                print(f"    Total time: {m.total_time_ms} ms, LLM calls: {m.llm_calls}")
+    print()
+
+    # ---- 5. Engine creation with bad credentials ----
+    print("--- Engine with invalid credentials ---")
+    try:
+        bad_engine = Engine(
+            workspace=WORKSPACE + "_bad",
+            api_key="sk-invalid-key-12345",
+            model="gpt-4o",
+        )
+        # Try to use it -- the error will surface on the first LLM call
+        await bad_engine.index(
+            IndexContext.from_content("# Test\n", "markdown").with_name("fail_test")
+        )
+    except VectorlessError as e:
+        print(f"  Caught VectorlessError:")
+        print(f"    kind:    {e.kind}")
+        print(f"    message: {e.message[:120]}...")
+    print()
+
+    # ---- Cleanup ----
+    await engine.clear()
+    print("Done.")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/pdf_indexing/README.md b/examples/pdf_indexing/README.md
new file mode 100644
index 00000000..cfee9a95
--- /dev/null
+++ b/examples/pdf_indexing/README.md
@@ -0,0 +1,27 @@
+# PDF Indexing Example
+
+Demonstrates indexing a PDF file, inspecting indexing metrics, and querying.
+
+## Setup
+
+```bash
+pip install vectorless
+```
+
+## Run
+
+```bash
+# Use the sample PDF from the repository
+python main.py
+
+# Or specify your own PDF file
+python main.py /path/to/document.pdf
+```
+
+## Environment Variables
+
+| Variable                | Description          | Default   |
+|------------------------|----------------------|-----------|
+| `VECTORLESS_API_KEY`   | LLM API key          | `sk-...`  |
+| `VECTORLESS_MODEL`     | LLM model name       | `gpt-4o`  |
+| `VECTORLESS_ENDPOINT`  | Custom API endpoint  | `None`    |
diff --git a/examples/pdf_indexing/main.py b/examples/pdf_indexing/main.py
new file mode 100644
index 00000000..e79b6db5
--- /dev/null
+++ b/examples/pdf_indexing/main.py
@@ -0,0 +1,126 @@
+"""
+PDF indexing example -- demonstrates indexing PDF files and inspecting metrics.
+
+Usage:
+    pip install vectorless
+    python main.py [path/to/file.pdf]
+
+If no path is given, uses the sample PDF in the repository.
+"""
+
+import asyncio
+import os
+import sys
+
+from vectorless import (
+    Engine,
+    IndexContext,
+    IndexItem,
+    IndexMetrics,
+    IndexOptions,
+    QueryContext,
+    VectorlessError,
+)
+
+# --- Configuration ---
+API_KEY = os.environ.get("VECTORLESS_API_KEY", "sk-...")
+MODEL = os.environ.get("VECTORLESS_MODEL", "gpt-4o")
+ENDPOINT = os.environ.get("VECTORLESS_ENDPOINT", None)
+WORKSPACE = "./workspace"
+
+# Resolve the sample PDF path relative to the repo root
+SAMPLE_PDF = os.path.join(
+    os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
+    "samples",
+    "Docker_Cheat_Sheet.pdf",
+)
+
+
+def print_separator(title: str) -> None:
+    print(f"\n{'=' * 40}")
+    print(f"  {title}")
+    print(f"{'=' * 40}")
+
+
+def print_metrics(item: IndexItem) -> None:
+    """Pretty-print indexing metrics for a single item."""
+    m: IndexMetrics | None = item.metrics
+    if m is None:
+        print("  (no metrics available)")
+        return
+
+    print(f"  Total time:       {m.total_time_ms:>6} ms")
+    print(f"  Parse time:       {m.parse_time_ms:>6} ms")
+    print(f"  Build time:       {m.build_time_ms:>6} ms")
+    print(f"  Enhance time:     {m.enhance_time_ms:>6} ms")
+    print(f"  Nodes processed:  {m.nodes_processed:>6}")
+    print(f"  Summaries ok:     {m.summaries_generated:>6}")
+    print(f"  Summaries failed: {m.summaries_failed:>6}")
+    print(f"  LLM calls:        {m.llm_calls:>6}")
+    print(f"  Tokens generated:  {m.total_tokens_generated:>6}")
+    print(f"  Topics indexed:   {m.topics_indexed:>6}")
+    print(f"  Keywords indexed: {m.keywords_indexed:>6}")
+
+
+async def main() -> None:
+    pdf_path = sys.argv[1] if len(sys.argv) > 1 else SAMPLE_PDF
+
+    if not os.path.isfile(pdf_path):
+        print(f"Error: file not found: {pdf_path}")
+        sys.exit(1)
+
+    engine = Engine(
+        workspace=WORKSPACE,
+        api_key=API_KEY,
+        model=MODEL,
+        endpoint=ENDPOINT,
+    )
+
+    # ---- Index with description + summaries enabled ----
+    print_separator("Indexing PDF")
+
+    options = IndexOptions(generate_summaries=True, generate_description=True)
+    ctx = IndexContext.from_path(pdf_path).with_options(options)
+
+    try:
+        result = await engine.index(ctx)
+    except VectorlessError as e:
+        print(f"Indexing failed: [{e.kind}] {e.message}")
+        return
+
+    if result.has_failures():
+        for f in result.failed:
+            print(f"  Failed: {f.source} -- {f.error}")
+        return
+
+    doc_id = result.doc_id
+    print(f"  doc_id: {doc_id}")
+
+    for item in result.items:
+        print(f"\n  Item: {item.name} ({item.format})")
+        if item.page_count is not None:
+            print(f"  Pages: {item.page_count}")
+        if item.description:
+            print(f"  Description: {item.description[:120]}...")
+        print_metrics(item)
+
+    # ---- Query the PDF ----
+    print_separator("Query")
+
+    answer = await engine.query(
+        QueryContext("What is this document about?").with_doc_id(doc_id)
+    )
+    item = answer.single()
+    if item:
+        print(f"  Score:   {item.score:.2f}")
+        print(f"  Nodes:   {item.node_ids}")
+        print(f"  Content: {item.content[:300]}...")
+
+    # ---- Cleanup ----
+    print_separator("Cleanup")
+    removed = await engine.clear()
+    print(f"  Removed {removed} document(s)")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())

From d3d1c61463ee2c318df8df5f3638b108e37bc081 Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Mon, 13 Apr 2026 20:05:03 +0800
Subject: [PATCH 4/5] docs(examples): add index metrics example with detailed
 pipeline analysis

Add a new example demonstrating how to use IndexMetrics to inspect
detailed indexing pipeline metrics including timing breakdowns,
LLM usage statistics, and reasoning index performance.

The example includes:
- README with setup instructions and environment variables
- Main script comparing documents with/without summaries enabled
- Detailed metrics reporting for parse, build, and enhance stages
- LLM call statistics and token usage analysis
- Node processing and indexing success metrics

This helps users understand how different IndexOptions affect
pipeline performance and resource utilization.
---
 examples/index_metrics/README.md |  42 ++++++
 examples/index_metrics/main.py   | 236 +++++++++++++++++++++++++++++++
 2 files changed, 278 insertions(+)
 create mode 100644 examples/index_metrics/README.md
 create mode 100644 examples/index_metrics/main.py

diff --git a/examples/index_metrics/README.md b/examples/index_metrics/README.md
new file mode 100644
index 00000000..78bdd552
--- /dev/null
+++ b/examples/index_metrics/README.md
@@ -0,0 +1,42 @@
+# IndexMetrics Example
+
+Demonstrates how to inspect detailed indexing pipeline metrics via `IndexMetrics`.
+
+`IndexMetrics` is attached to each `IndexItem` and provides:
+
+| Field                  | Description                                  |
+|------------------------|----------------------------------------------|
+| `total_time_ms`        | Total indexing time                           |
+| `parse_time_ms`        | Document parsing stage duration               |
+| `build_time_ms`        | Tree building stage duration                  |
+| `enhance_time_ms`      | Summary/enhancement stage duration            |
+| `nodes_processed`      | Number of tree nodes processed                |
+| `summaries_generated`  | Successfully generated summaries              |
+| `summaries_failed`     | Failed summary generations                    |
+| `llm_calls`            | Total LLM API calls made                      |
+| `total_tokens_generated` | Total tokens produced by the LLM            |
+| `topics_indexed`       | Topics added to the reasoning index           |
+| `keywords_indexed`     | Keywords added to the reasoning index         |
+
+This example compares documents indexed with and without summaries enabled
+to show how `IndexOptions` affect pipeline stages and LLM usage.
+
+## Setup
+
+```bash
+pip install vectorless
+```
+
+## Run
+
+```bash
+python main.py
+```
+
+## Environment Variables
+
+| Variable                | Description          | Default   |
+|------------------------|----------------------|-----------|
+| `VECTORLESS_API_KEY`   | LLM API key          | `sk-...`  |
+| `VECTORLESS_MODEL`     | LLM model name       | `gpt-4o`  |
+| `VECTORLESS_ENDPOINT`  | Custom API endpoint  | `None`    |
diff --git a/examples/index_metrics/main.py b/examples/index_metrics/main.py
new file mode 100644
index 00000000..3bff91cb
--- /dev/null
+++ b/examples/index_metrics/main.py
@@ -0,0 +1,236 @@
+"""
+IndexMetrics example -- demonstrates inspecting detailed indexing pipeline metrics.
+
+IndexMetrics exposes timing, node processing, LLM usage, and reasoning index
+statistics for each indexed document.  This example compares two documents with
+different IndexOptions to show how options affect the pipeline.
+
+Usage:
+    pip install vectorless
+    python main.py
+"""
+
+import asyncio
+import os
+
+from vectorless import (
+    Engine,
+    IndexContext,
+    IndexItem,
+    IndexMetrics,
+    IndexOptions,
+    VectorlessError,
+)
+
+# --- Configuration ---
+API_KEY = os.environ.get("VECTORLESS_API_KEY", "sk-...")
+MODEL = os.environ.get("VECTORLESS_MODEL", "gpt-4o")
+ENDPOINT = os.environ.get("VECTORLESS_ENDPOINT", None)
+WORKSPACE = "./workspace"
+
+# --- Sample documents with varying complexity ---
+SIMPLE_DOC = """\
+# Quick Note
+
+This is a short note about caching strategies.
+Redis is commonly used as an in-memory cache.
+"""
+
+COMPLEX_DOC = """\
+# Distributed Systems Design Guide
+
+## Consensus
+
+Raft is a consensus algorithm designed to be easy to understand.
+It elects a leader via randomized timeouts and replicates log entries
+to a majority of followers before committing them.
+
+## Replication
+
+State machine replication ensures that all replicas execute the same
+commands in the same order. Primary-backup replication is simpler but
+provides lower availability during leader failover.
+
+## Partitioning
+
+Consistent hashing distributes keys across nodes with minimal
+remapping when the cluster size changes. Virtual nodes improve balance
+when the key space is small.
+
+## Failure Detection
+
+Phi accrual failure detection treats failure as a continuous suspicion
+level rather than a binary alive/dead state. This reduces false
+positives during transient network issues.
+"""
+
+
+def print_pipeline_breakdown(m: IndexMetrics) -> None:
+    """Print a breakdown of pipeline stages and their percentages."""
+    total = m.total_time_ms
+    if total == 0:
+        print("    (no timing data)")
+        return
+
+    parse_pct = m.parse_time_ms / total * 100
+    build_pct = m.build_time_ms / total * 100
+    enhance_pct = m.enhance_time_ms / total * 100
+    other_pct = max(0, 100 - parse_pct - build_pct - enhance_pct)
+
+    print(f"    Parse:    {m.parse_time_ms:>5} ms  ({parse_pct:5.1f}%)")
+    print(f"    Build:    {m.build_time_ms:>5} ms  ({build_pct:5.1f}%)")
+    print(f"    Enhance:  {m.enhance_time_ms:>5} ms  ({enhance_pct:5.1f}%)")
+    print(f"    Other:    {total - m.parse_time_ms - m.build_time_ms - m.enhance_time_ms:>5} ms  ({other_pct:5.1f}%)")
+
+
+def print_llm_stats(m: IndexMetrics) -> None:
+    """Print LLM utilization statistics."""
+    print(f"    LLM calls:         {m.llm_calls}")
+    print(f"    Tokens generated:   {m.total_tokens_generated}")
+    if m.llm_calls > 0:
+        avg_tokens = m.total_tokens_generated / m.llm_calls
+        print(f"    Avg tokens/call:    {avg_tokens:.0f}")
+
+
+def print_summary_stats(m: IndexMetrics) -> None:
+    """Print summary generation success/failure."""
+    total = m.summaries_generated + m.summaries_failed
+    print(f"    Summaries ok:       {m.summaries_generated}")
+    print(f"    Summaries failed:   {m.summaries_failed}")
+    if total > 0:
+        success_rate = m.summaries_generated / total * 100
+        print(f"    Success rate:       {success_rate:.1f}%")
+
+
+def print_reasoning_index(m: IndexMetrics) -> None:
+    """Print reasoning index statistics."""
+    print(f"    Nodes processed:    {m.nodes_processed}")
+    print(f"    Topics indexed:     {m.topics_indexed}")
+    print(f"    Keywords indexed:   {m.keywords_indexed}")
+
+
+def print_full_report(item: IndexItem) -> None:
+    """Print a full metrics report for an indexed item."""
+    m = item.metrics
+    print(f"  Document: {item.name} ({item.format})")
+    if m is None:
+        print("    (no metrics)")
+        return
+
+    print(f"  Total time: {m.total_time_ms} ms")
+    print(f"  repr: {repr(m)}")
+
+    print()
+    print("  Pipeline stages:")
+    print_pipeline_breakdown(m)
+
+    print()
+    print("  LLM usage:")
+    print_llm_stats(m)
+
+    print()
+    print("  Summary generation:")
+    print_summary_stats(m)
+
+    print()
+    print("  Reasoning index:")
+    print_reasoning_index(m)
+
+
+async def main() -> None:
+    engine = Engine(
+        workspace=WORKSPACE,
+        api_key=API_KEY,
+        model=MODEL,
+        endpoint=ENDPOINT,
+    )
+
+    # ================================================================
+    # 1. Index a simple document WITHOUT summaries
+    # ================================================================
+    print("=" * 55)
+    print("  Run 1: Simple doc, summaries OFF")
+    print("=" * 55)
+
+    opts_no_summary = IndexOptions(
+        generate_summaries=False,
+        generate_description=False,
+    )
+    result = await engine.index(
+        IndexContext.from_content(SIMPLE_DOC, "markdown")
+        .with_name("simple_no_summary")
+        .with_options(opts_no_summary)
+    )
+    item = result.items[0]
+    print_full_report(item)
+    doc_id_1 = item.doc_id
+    print()
+
+    # ================================================================
+    # 2. Index the same simple document WITH summaries
+    # ================================================================
+    print("=" * 55)
+    print("  Run 2: Simple doc, summaries ON")
+    print("=" * 55)
+
+    opts_with_summary = IndexOptions(
+        generate_summaries=True,
+        generate_description=True,
+    )
+    result = await engine.index(
+        IndexContext.from_content(SIMPLE_DOC, "markdown")
+        .with_name("simple_with_summary")
+        .with_options(opts_with_summary)
+    )
+    item = result.items[0]
+    print_full_report(item)
+    doc_id_2 = item.doc_id
+    print()
+
+    # ================================================================
+    # 3. Compare: summaries OFF vs ON for the simple doc
+    # ================================================================
+    m_off = (await engine.list())[0]  # first indexed
+    # Find the second document's metrics via a fresh index
+    # (We already have both items above; let's compare directly)
+
+    # ================================================================
+    # 4. Index a complex document WITH summaries
+    # ================================================================
+    print("=" * 55)
+    print("  Run 3: Complex doc, summaries ON")
+    print("=" * 55)
+
+    result = await engine.index(
+        IndexContext.from_content(COMPLEX_DOC, "markdown")
+        .with_name("complex_with_summary")
+        .with_options(opts_with_summary)
+    )
+    item = result.items[0]
+    print_full_report(item)
+    doc_id_3 = item.doc_id
+    print()
+
+    # ================================================================
+    # 5. Summary table
+    # ================================================================
+    print("=" * 55)
+    print("  Comparison table")
+    print("=" * 55)
+
+    docs = await engine.list()
+    for doc in docs:
+        print(f"  {doc.name:<30} id={doc.id[:8]}...")
+        if doc.description:
+            print(f"    description: {doc.description[:80]}")
+
+    # ================================================================
+    # Cleanup
+    # ================================================================
+    print()
+    cleared = await engine.clear()
+    print(f"Cleaned up {cleared} document(s).")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())

From c45f637812dee36f34c782f9b340c81a4391d697 Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Mon, 13 Apr 2026 20:05:46 +0800
Subject: [PATCH 5/5] chore(release): bump version from 0.1.25 to 0.1.26 in
 Cargo.toml

Bump workspace package version from 0.1.25 to 0.1.26 in Cargo.toml
to prepare for new release.

chore(release): bump version from 0.1.4 to 0.1.5 in pyproject.toml

Bump python package version from 0.1.4 to 0.1.5 in pyproject.toml
to prepare for new release.
---
 Cargo.toml     | 2 +-
 pyproject.toml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 6eeec14e..f94e1c2b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,7 +3,7 @@ members = ["rust", "python"]
 resolver = "2"
 
 [workspace.package]
-version = "0.1.25"
+version = "0.1.26"
 edition = "2024"
 authors = ["zTgx <beautifularea@gmail.com>"]
 license = "Apache-2.0"
diff --git a/pyproject.toml b/pyproject.toml
index 23bffc7d..4951e3e5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "maturin"
 
 [project]
 name = "vectorless"
-version = "0.1.4"
+version = "0.1.5"
 description = "Hierarchical document intelligence without vectors"
 readme = "README.md"
 requires-python = ">=3.9"