From a35cd4faacc24173122db772401072671af02fd3 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Wed, 15 Apr 2026 07:27:28 +0800 Subject: [PATCH 1/6] refactor(python): reorder imports and improve code formatting Reorder import statements to follow logical grouping and update format! macro calls to use multi-line formatting for better readability. style(rust): clean up unused imports and improve code formatting Remove unused RefType import and reformat function calls to use multi-line syntax for better readability. Also clean up unnecessary imports in retrieval strategy module. refactor(index): reorganize module structure Move ReasoningIndexConfig import after other imports to maintain consistent ordering in the indexer module. refactor(retrieval): remove unused DocumentEntry export Remove unused DocumentEntry re-export from retrieval strategy module as it's not being used externally. --- python/src/lib.rs | 62 +++++++++++++++-------------- rust/src/client/indexer.rs | 4 +- rust/src/index/mod.rs | 2 +- rust/src/index/stages/enrich.rs | 21 +++++++--- rust/src/index/stages/reasoning.rs | 7 ++-- rust/src/retrieval/stages/search.rs | 11 ++--- rust/src/retrieval/strategy/mod.rs | 2 +- 7 files changed, 61 insertions(+), 48 deletions(-) diff --git a/python/src/lib.rs b/python/src/lib.rs index 45ac87fb..9cfef0af 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -3,19 +3,19 @@ //! Python bindings for vectorless. -use pyo3::prelude::*; use pyo3::exceptions::PyException; +use pyo3::prelude::*; use pyo3_async_runtimes::tokio::future_into_py; use std::sync::Arc; use tokio::runtime::Runtime; +use ::vectorless::StrategyPreference; use ::vectorless::client::{ DocumentFormat, DocumentInfo, Engine, EngineBuilder, FailedItem, IndexContext, IndexItem, IndexMode, IndexOptions, IndexResult, QueryContext, QueryResult, QueryResultItem, }; use ::vectorless::error::Error as RustError; use ::vectorless::metrics::IndexMetrics; -use ::vectorless::StrategyPreference; // ============================================================ // Error Types @@ -135,9 +135,12 @@ impl PyIndexOptions { "incremental" => opts = opts.with_mode(IndexMode::Incremental), _ => { return Err(PyErr::from(VectorlessError::new( - format!("Unknown mode: {}. Supported: default, force, incremental", mode), + format!( + "Unknown mode: {}. Supported: default, force, incremental", + mode + ), "config", - ))) + ))); } } opts.generate_summaries = generate_summaries; @@ -259,9 +262,12 @@ impl PyIndexContext { "incremental" => IndexMode::Incremental, _ => { return Err(PyErr::from(VectorlessError::new( - format!("Unknown mode: {}. Supported: default, force, incremental", mode), + format!( + "Unknown mode: {}. Supported: default, force, incremental", + mode + ), "config", - ))) + ))); } }; let ctx = self.inner.clone().with_mode(m); @@ -529,17 +535,15 @@ impl PyQueryResult { self.inner .items .iter() - .map(|i| PyQueryResultItem { - inner: i.clone(), - }) + .map(|i| PyQueryResultItem { inner: i.clone() }) .collect() } /// Get the first (single-doc) result item. fn single(&self) -> Option { - self.inner.single().map(|i| PyQueryResultItem { - inner: i.clone(), - }) + self.inner + .single() + .map(|i| PyQueryResultItem { inner: i.clone() }) } /// Number of result items. @@ -700,7 +704,10 @@ impl PyIndexItem { /// Indexing pipeline metrics (timing, LLM usage, etc.). #[getter] fn metrics(&self) -> Option { - self.inner.metrics.as_ref().map(|m| PyIndexMetrics { inner: m.clone() }) + self.inner + .metrics + .as_ref() + .map(|m| PyIndexMetrics { inner: m.clone() }) } fn __repr__(&self) -> String { @@ -823,7 +830,9 @@ impl PyDocumentInfo { // DocumentGraph types // ============================================================ -use ::vectorless::graph::{DocumentGraph, DocumentGraphNode, EdgeEvidence, GraphEdge, WeightedKeyword}; +use ::vectorless::graph::{ + DocumentGraph, DocumentGraphNode, EdgeEvidence, GraphEdge, WeightedKeyword, +}; /// A keyword with weight from document analysis. #[pyclass(name = "WeightedKeyword")] @@ -844,7 +853,10 @@ impl PyWeightedKeyword { } fn __repr__(&self) -> String { - format!("WeightedKeyword('{}', weight={:.2})", self.inner.keyword, self.inner.weight) + format!( + "WeightedKeyword('{}', weight={:.2})", + self.inner.keyword, self.inner.weight + ) } } @@ -956,9 +968,7 @@ impl PyDocumentGraphNode { self.inner .top_keywords .iter() - .map(|kw| PyWeightedKeyword { - inner: kw.clone(), - }) + .map(|kw| PyWeightedKeyword { inner: kw.clone() }) .collect() } @@ -993,9 +1003,9 @@ impl PyDocumentGraph { /// Get a document node by ID. fn get_node(&self, doc_id: String) -> Option { - self.inner.get_node(&doc_id).map(|n| PyDocumentGraphNode { - inner: n.clone(), - }) + self.inner + .get_node(&doc_id) + .map(|n| PyDocumentGraphNode { inner: n.clone() }) } /// Get all document IDs in the graph. @@ -1008,9 +1018,7 @@ impl PyDocumentGraph { self.inner .get_neighbors(&doc_id) .iter() - .map(|e| PyGraphEdge { - inner: e.clone(), - }) + .map(|e| PyGraphEdge { inner: e.clone() }) .collect() } @@ -1186,11 +1194,7 @@ impl PyEngine { /// /// Raises: /// VectorlessError: If query fails. - fn query<'py>( - &self, - py: Python<'py>, - ctx: &PyQueryContext, - ) -> PyResult> { + fn query<'py>(&self, py: Python<'py>, ctx: &PyQueryContext) -> PyResult> { let engine = Arc::clone(&self.inner); let query_ctx = ctx.inner.clone(); future_into_py(py, run_query(engine, query_ctx)) diff --git a/rust/src/client/indexer.rs b/rust/src/client/indexer.rs index 490e8a78..4d9dd6f0 100644 --- a/rust/src/client/indexer.rs +++ b/rust/src/client/indexer.rs @@ -28,7 +28,9 @@ use uuid::Uuid; use crate::error::{Error, Result}; use crate::index::parse::DocumentFormat; -use crate::index::{IndexInput, IndexMode, PipelineExecutor, PipelineOptions, ReasoningIndexConfig, SummaryStrategy}; +use crate::index::{ + IndexInput, IndexMode, PipelineExecutor, PipelineOptions, ReasoningIndexConfig, SummaryStrategy, +}; use crate::llm::LlmClient; use crate::storage::{DocumentMeta, PersistedDocument}; diff --git a/rust/src/index/mod.rs b/rust/src/index/mod.rs index 512f93c2..89e23403 100644 --- a/rust/src/index/mod.rs +++ b/rust/src/index/mod.rs @@ -63,8 +63,8 @@ pub mod summary; pub use pipeline::{IndexInput, IndexMetrics, PipelineExecutor, PipelineResult}; // Re-export config types -pub use config::{IndexMode, PipelineOptions, ThinningConfig}; pub use crate::document::ReasoningIndexConfig; +pub use config::{IndexMode, PipelineOptions, ThinningConfig}; // Re-export summary pub use summary::SummaryStrategy; diff --git a/rust/src/index/stages/enrich.rs b/rust/src/index/stages/enrich.rs index 29fe55ab..1f6456ad 100644 --- a/rust/src/index/stages/enrich.rs +++ b/rust/src/index/stages/enrich.rs @@ -7,7 +7,7 @@ use super::async_trait; use std::time::Instant; use tracing::info; -use crate::document::{DocumentTree, NodeId, RefType, ReferenceExtractor, TocView}; +use crate::document::{DocumentTree, NodeId, ReferenceExtractor, TocView}; use crate::error::Result; use super::{AccessPattern, IndexStage, StageResult}; @@ -105,7 +105,10 @@ impl EnrichStage { let mut total_resolved = 0; for node_id in node_ids { - let content = tree.get(node_id).map(|n| n.content.clone()).unwrap_or_default(); + let content = tree + .get(node_id) + .map(|n| n.content.clone()) + .unwrap_or_default(); if content.is_empty() { continue; } @@ -204,9 +207,10 @@ impl IndexStage for EnrichStage { stage_result .metadata .insert("node_count".to_string(), serde_json::json!(node_count)); - stage_result - .metadata - .insert("resolved_references".to_string(), serde_json::json!(resolved_refs)); + stage_result.metadata.insert( + "resolved_references".to_string(), + serde_json::json!(resolved_refs), + ); Ok(stage_result) } @@ -215,13 +219,18 @@ impl IndexStage for EnrichStage { #[cfg(test)] mod tests { use super::*; + use crate::document::RefType; #[test] fn test_resolve_references_section_ref() { let mut tree = DocumentTree::new("Root", "root content"); let s1 = tree.add_child(tree.root(), "Introduction", "Introduction text."); tree.set_structure(s1, "1"); - let s2 = tree.add_child(tree.root(), "Details", "For details, see Section 1 for more info"); + let s2 = tree.add_child( + tree.root(), + "Details", + "For details, see Section 1 for more info", + ); tree.set_structure(s2, "2"); let resolved = EnrichStage::resolve_references(&mut tree); diff --git a/rust/src/index/stages/reasoning.rs b/rust/src/index/stages/reasoning.rs index 0a7d1711..c85a175b 100644 --- a/rust/src/index/stages/reasoning.rs +++ b/rust/src/index/stages/reasoning.rs @@ -406,9 +406,10 @@ impl IndexStage for ReasoningIndexStage { stage_result .metadata .insert("topics_indexed".to_string(), serde_json::json!(topic_count)); - stage_result - .metadata - .insert("synonyms_expanded".to_string(), serde_json::json!(synonym_count)); + stage_result.metadata.insert( + "synonyms_expanded".to_string(), + serde_json::json!(synonym_count), + ); Ok(stage_result) } diff --git a/rust/src/retrieval/stages/search.rs b/rust/src/retrieval/stages/search.rs index bdc0d9c7..2e90a57e 100644 --- a/rust/src/retrieval/stages/search.rs +++ b/rust/src/retrieval/stages/search.rs @@ -27,8 +27,8 @@ use crate::retrieval::search::{ SearchTree, ToCNavigator, }; use crate::retrieval::strategy::{ - CrossDocumentConfig, CrossDocumentStrategy, DocumentEntry, HybridConfig, HybridStrategy, - KeywordStrategy, LlmStrategy, RetrievalStrategy, + CrossDocumentConfig, CrossDocumentStrategy, HybridConfig, HybridStrategy, KeywordStrategy, + LlmStrategy, RetrievalStrategy, }; use crate::retrieval::types::{ NavigationDecision, ReasoningCandidate, ReasoningStep, StageName, StrategyPreference, @@ -155,8 +155,7 @@ impl SearchStage { } StrategyPreference::ForceCrossDocument => { // Build a CrossDocumentStrategy with graph-based boosting - let inner: Box = - Box::new(self.keyword_strategy.clone()); + let inner: Box = Box::new(self.keyword_strategy.clone()); let cross_doc = CrossDocumentStrategy::new(inner).with_config(CrossDocumentConfig::default()); @@ -180,9 +179,7 @@ impl SearchStage { info!("Using Hybrid strategy as fallback for ForcePageRange"); strategy.clone() } else { - warn!( - "ForcePageRange requires special configuration, falling back to Keyword" - ); + warn!("ForcePageRange requires special configuration, falling back to Keyword"); Arc::new(self.keyword_strategy.clone()) } } diff --git a/rust/src/retrieval/strategy/mod.rs b/rust/src/retrieval/strategy/mod.rs index 19f9ac38..ebf042dc 100644 --- a/rust/src/retrieval/strategy/mod.rs +++ b/rust/src/retrieval/strategy/mod.rs @@ -18,7 +18,7 @@ mod llm; mod page_range; mod r#trait; -pub use cross_document::{CrossDocumentConfig, CrossDocumentStrategy, DocumentEntry}; +pub use cross_document::{CrossDocumentConfig, CrossDocumentStrategy}; pub use hybrid::{HybridConfig, HybridStrategy}; pub use keyword::KeywordStrategy; pub use llm::LlmStrategy; From 758ae11cba4e0b119357a6586d4eb40c93a87626 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Wed, 15 Apr 2026 08:55:59 +0800 Subject: [PATCH 2/6] chore(release): bump version to 0.1.27 for Rust and 0.1.6 for Python - Update workspace package version from 0.1.26 to 0.1.27 in Cargo.toml - Update Python package version from 0.1.5 to 0.1.6 in pyproject.toml --- Cargo.toml | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index f94e1c2b..ef9c22b5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,7 @@ members = ["rust", "python"] resolver = "2" [workspace.package] -version = "0.1.26" +version = "0.1.27" edition = "2024" authors = ["zTgx "] license = "Apache-2.0" diff --git a/pyproject.toml b/pyproject.toml index 4951e3e5..8bc47032 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "vectorless" -version = "0.1.5" +version = "0.1.6" description = "Hierarchical document intelligence without vectors" readme = "README.md" requires-python = ">=3.9" From b1345d63e84d0a30fe745d320695ad7d0c274f72 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Wed, 15 Apr 2026 09:44:52 +0800 Subject: [PATCH 3/6] refactor(rust): remove unused variable warnings and improve code quality - Replace unused variables with underscore prefix to eliminate compiler warnings across multiple modules including workspace.rs, emitter.rs, executor.rs, aggregator.rs, llm_pilot.rs, reference.rs, retriever.rs, beam.rs, evaluate.rs, hybrid.rs, and threshold.rs - Remove overly permissive clippy allowances in lib.rs and replace with specific lint configurations for better code quality - Add SearchAlgorithm and QueryComplexity exports to main library interface - Include ReasoningIndexConfig in document type exports - These changes clean up code by addressing unused variable warnings while maintaining all existing functionality --- rust/src/client/workspace.rs | 2 +- rust/src/events/emitter.rs | 2 +- rust/src/lib.rs | 11 +++++------ rust/src/llm/executor.rs | 2 +- rust/src/retrieval/content/aggregator.rs | 2 +- rust/src/retrieval/pilot/llm_pilot.rs | 4 ++-- rust/src/retrieval/reference.rs | 4 ++-- rust/src/retrieval/retriever.rs | 2 +- rust/src/retrieval/search/beam.rs | 2 +- rust/src/retrieval/stages/evaluate.rs | 2 +- rust/src/retrieval/strategy/hybrid.rs | 2 +- rust/src/retrieval/sufficiency/threshold.rs | 2 +- 12 files changed, 18 insertions(+), 19 deletions(-) diff --git a/rust/src/client/workspace.rs b/rust/src/client/workspace.rs index d23c37bb..061533ed 100644 --- a/rust/src/client/workspace.rs +++ b/rust/src/client/workspace.rs @@ -135,7 +135,7 @@ impl WorkspaceClient { let doc = self.workspace.load_and_cache(doc_id).await?; let cache_hit = doc.is_some(); - if let Some(ref doc) = doc { + if let Some(ref _doc) = doc { debug!("Loaded document: {} (cache={})", doc_id, cache_hit); } diff --git a/rust/src/events/emitter.rs b/rust/src/events/emitter.rs index 42753b5b..c54efa94 100644 --- a/rust/src/events/emitter.rs +++ b/rust/src/events/emitter.rs @@ -135,7 +135,7 @@ impl EventEmitter { for handler in &inner.index_handlers { handler(&event); } - for handler in &inner.async_handlers { + for _handler in &inner.async_handlers { // For sync context, we just log async handlers let event = Event::Index(event.clone()); info!("Async event: {:?}", event); diff --git a/rust/src/lib.rs b/rust/src/lib.rs index ea1d79d6..1363b5b7 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -3,10 +3,7 @@ //! # Vectorless -// Clippy: allow some pedantic lints that are too noisy for early-stage project -#![allow(clippy::all)] -#![allow(dead_code)] -#![allow(unused_variables)] +// Clippy: allow specific lints that are too noisy for this project #![allow(clippy::iter_over_hash_type)] #![allow(clippy::large_enum_variant)] #![allow(clippy::manual_unwrap_or_default)] @@ -69,14 +66,16 @@ pub use client::{ // Retrieval types pub use retrieval::StrategyPreference; +pub use retrieval::pipeline::SearchAlgorithm; +pub use retrieval::QueryComplexity; // Error types pub use error::{Error, Result}; // Document types pub use document::{ - DocumentStructure, DocumentTree, NodeId, StructureNode, TocConfig, TocEntry, TocNode, TocView, - TreeNode, + DocumentStructure, DocumentTree, NodeId, ReasoningIndexConfig, StructureNode, TocConfig, + TocEntry, TocNode, TocView, TreeNode, }; // Graph types diff --git a/rust/src/llm/executor.rs b/rust/src/llm/executor.rs index d13e24fc..8498062b 100644 --- a/rust/src/llm/executor.rs +++ b/rust/src/llm/executor.rs @@ -353,7 +353,7 @@ impl LlmExecutor { let truncated = self.truncate_prompt(user); // Build request based on whether max_tokens is specified - let request = if let Some(tokens) = max_tokens { + let request = if let Some(_tokens) = max_tokens { CreateChatCompletionRequestArgs::default() .model(model) .messages([ diff --git a/rust/src/retrieval/content/aggregator.rs b/rust/src/retrieval/content/aggregator.rs index be7028ae..7e1ace2d 100644 --- a/rust/src/retrieval/content/aggregator.rs +++ b/rust/src/retrieval/content/aggregator.rs @@ -104,7 +104,7 @@ impl ContentAggregator { tree: &DocumentTree, query: &str, ) -> AggregationResult { - let start = std::time::Instant::now(); + let _start = std::time::Instant::now(); // Step 1: Collect all content chunks from candidates and their descendants let chunks = self.collect_chunks(candidates, tree); diff --git a/rust/src/retrieval/pilot/llm_pilot.rs b/rust/src/retrieval/pilot/llm_pilot.rs index 88e2ee03..e252da96 100644 --- a/rust/src/retrieval/pilot/llm_pilot.rs +++ b/rust/src/retrieval/pilot/llm_pilot.rs @@ -269,9 +269,9 @@ impl LlmPilot { fn compute_cache_key( &self, context: &super::builder::PilotContext, - point: InterventionPoint, + _point: InterventionPoint, ) -> Option { - let store = self.memo_store.as_ref()?; + let _store = self.memo_store.as_ref()?; // Build a fingerprint from the context using available methods let context_str = context.to_string(); diff --git a/rust/src/retrieval/reference.rs b/rust/src/retrieval/reference.rs index 523fd042..cf681f6b 100644 --- a/rust/src/retrieval/reference.rs +++ b/rust/src/retrieval/reference.rs @@ -220,7 +220,7 @@ impl ReferenceFollower { }; // Use pre-extracted references if available, otherwise extract - let refs = if !node.references.is_empty() { + let _refs = if !node.references.is_empty() { node.references.clone() } else { ReferenceExtractor::extract(&node.content) @@ -325,7 +325,7 @@ impl ReferenceFollower { // Get references from this node if let Some(node) = tree.get(node_id) { - let refs = if !node.references.is_empty() { + let _refs = if !node.references.is_empty() { node.references.clone() } else { ReferenceExtractor::extract(&node.content) diff --git a/rust/src/retrieval/retriever.rs b/rust/src/retrieval/retriever.rs index 97c280c0..1e37d7a0 100644 --- a/rust/src/retrieval/retriever.rs +++ b/rust/src/retrieval/retriever.rs @@ -81,7 +81,7 @@ pub trait Retriever: Send + Sync { /// /// Returns an estimated number of LLM calls or tokens that will be used. /// Useful for cost-aware strategy selection. - fn estimate_cost(&self, tree: &DocumentTree, options: &RetrieveOptions) -> CostEstimate { + fn estimate_cost(&self, tree: &DocumentTree, _options: &RetrieveOptions) -> CostEstimate { let node_count = tree.node_count(); CostEstimate { llm_calls: node_count / 2, // Rough estimate diff --git a/rust/src/retrieval/search/beam.rs b/rust/src/retrieval/search/beam.rs index 6181e1c7..977fba9b 100644 --- a/rust/src/retrieval/search/beam.rs +++ b/rust/src/retrieval/search/beam.rs @@ -134,7 +134,7 @@ impl BeamSearch { tree: &DocumentTree, context: &RetrievalContext, pilot: Option<&dyn Pilot>, - cache: &PilotDecisionCache, + _cache: &PilotDecisionCache, visited: &HashSet, fallback_stack: &mut Vec, result: &mut SearchResult, diff --git a/rust/src/retrieval/stages/evaluate.rs b/rust/src/retrieval/stages/evaluate.rs index d0d51bef..d3dc2ee4 100644 --- a/rust/src/retrieval/stages/evaluate.rs +++ b/rust/src/retrieval/stages/evaluate.rs @@ -410,7 +410,7 @@ impl RetrievalStage for EvaluateStage { let doc_key = format!("{:?}", ctx.tree.root()); for candidate in ctx.candidates.iter().take(3) { if let Some(node) = ctx.tree.get(candidate.node_id) { - let path = format!("{}", node.depth); + let _path = format!("{}", node.depth); // Use the node title as path identifier for L2 ctx.reasoning_cache .l2_record(&doc_key, &node.title, candidate.score); diff --git a/rust/src/retrieval/strategy/hybrid.rs b/rust/src/retrieval/strategy/hybrid.rs index 74484efa..37c7f5fc 100644 --- a/rust/src/retrieval/strategy/hybrid.rs +++ b/rust/src/retrieval/strategy/hybrid.rs @@ -353,7 +353,7 @@ impl RetrievalStrategy for HybridStrategy { // Map LLM results back with combined scores let mut llm_iter = llm_results.into_iter(); - for (idx, node_id, bm25_score) in &needs_llm { + for (idx, _node_id, bm25_score) in &needs_llm { if candidate_indices.contains(idx) { if let Some(llm_eval) = llm_iter.next() { let combined_score = self.combine_scores(*bm25_score, llm_eval.score); diff --git a/rust/src/retrieval/sufficiency/threshold.rs b/rust/src/retrieval/sufficiency/threshold.rs index 30af9197..9fb48f8e 100644 --- a/rust/src/retrieval/sufficiency/threshold.rs +++ b/rust/src/retrieval/sufficiency/threshold.rs @@ -104,7 +104,7 @@ impl Default for ThresholdChecker { } impl SufficiencyChecker for ThresholdChecker { - fn check(&self, query: &str, content: &str, token_count: usize) -> SufficiencyLevel { + fn check(&self, _query: &str, content: &str, token_count: usize) -> SufficiencyLevel { let estimated_tokens = if token_count == 0 { self.estimate_tokens(content) } else { From ddaf01a69b3a06c88384c1f338375c4cfabb0c42 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Wed, 15 Apr 2026 09:54:16 +0800 Subject: [PATCH 4/6] feat: add runtime metrics reports for LLM, Pilot, and Retrieval - Add LlmMetricsReport with total calls, token usage, success rates, latency, and estimated costs - Add PilotMetricsReport with decision accuracy, guidance calls, and intervention tracking - Add RetrievalMetricsReport with query performance, cache metrics, and path quality statistics - Add combined MetricsReport to aggregate all subsystem metrics - Implement metrics hub for centralized metric collection - Expose metrics_report() method in Python bindings --- python/src/lib.rs | 391 ++++++++++++++++++++++++++++++++++++++ rust/src/client/engine.rs | 14 ++ rust/src/lib.rs | 3 + rust/src/metrics/mod.rs | 4 + 4 files changed, 412 insertions(+) diff --git a/python/src/lib.rs b/python/src/lib.rs index 9cfef0af..a2fe5bb3 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -16,6 +16,7 @@ use ::vectorless::client::{ }; use ::vectorless::error::Error as RustError; use ::vectorless::metrics::IndexMetrics; +use ::vectorless::metrics::{LlmMetricsReport, MetricsReport, PilotMetricsReport, RetrievalMetricsReport}; // ============================================================ // Error Types @@ -664,6 +665,378 @@ impl PyIndexMetrics { } } +// ============================================================ +// Runtime Metrics Reports +// ============================================================ + +/// LLM usage metrics report. +#[pyclass(name = "LlmMetricsReport")] +pub struct PyLlmMetricsReport { + inner: LlmMetricsReport, +} + +#[pymethods] +impl PyLlmMetricsReport { + /// Total number of LLM calls. + #[getter] + fn total_calls(&self) -> u64 { + self.inner.total_calls + } + + /// Number of successful calls. + #[getter] + fn successful_calls(&self) -> u64 { + self.inner.successful_calls + } + + /// Number of failed calls. + #[getter] + fn failed_calls(&self) -> u64 { + self.inner.failed_calls + } + + /// Success rate (0.0 - 1.0). + #[getter] + fn success_rate(&self) -> f64 { + self.inner.success_rate + } + + /// Total input tokens. + #[getter] + fn total_input_tokens(&self) -> u64 { + self.inner.total_input_tokens + } + + /// Total output tokens. + #[getter] + fn total_output_tokens(&self) -> u64 { + self.inner.total_output_tokens + } + + /// Total tokens (input + output). + #[getter] + fn total_tokens(&self) -> u64 { + self.inner.total_tokens + } + + /// Average latency per call in milliseconds. + #[getter] + fn avg_latency_ms(&self) -> f64 { + self.inner.avg_latency_ms + } + + /// Total latency in milliseconds. + #[getter] + fn total_latency_ms(&self) -> u64 { + self.inner.total_latency_ms + } + + /// Estimated cost in USD. + #[getter] + fn estimated_cost_usd(&self) -> f64 { + self.inner.estimated_cost_usd + } + + /// Number of rate limit errors. + #[getter] + fn rate_limit_errors(&self) -> u64 { + self.inner.rate_limit_errors + } + + /// Number of timeout errors. + #[getter] + fn timeout_errors(&self) -> u64 { + self.inner.timeout_errors + } + + /// Number of fallback triggers. + #[getter] + fn fallback_triggers(&self) -> u64 { + self.inner.fallback_triggers + } + + fn __repr__(&self) -> String { + format!( + "LlmMetricsReport(calls={}, tokens={}, cost=${:.4})", + self.inner.total_calls, + self.inner.total_tokens, + self.inner.estimated_cost_usd, + ) + } +} + +/// Pilot decision metrics report. +#[pyclass(name = "PilotMetricsReport")] +pub struct PyPilotMetricsReport { + inner: PilotMetricsReport, +} + +#[pymethods] +impl PyPilotMetricsReport { + /// Total number of Pilot decisions. + #[getter] + fn total_decisions(&self) -> u64 { + self.inner.total_decisions + } + + /// Number of start guidance calls. + #[getter] + fn start_guidance_calls(&self) -> u64 { + self.inner.start_guidance_calls + } + + /// Number of fork decisions. + #[getter] + fn fork_decisions(&self) -> u64 { + self.inner.fork_decisions + } + + /// Number of backtrack calls. + #[getter] + fn backtrack_calls(&self) -> u64 { + self.inner.backtrack_calls + } + + /// Number of evaluate calls. + #[getter] + fn evaluate_calls(&self) -> u64 { + self.inner.evaluate_calls + } + + /// Decision accuracy based on feedback (0.0 - 1.0). + #[getter] + fn accuracy(&self) -> f64 { + self.inner.accuracy + } + + /// Number of correct decisions. + #[getter] + fn correct_decisions(&self) -> u64 { + self.inner.correct_decisions + } + + /// Number of incorrect decisions. + #[getter] + fn incorrect_decisions(&self) -> u64 { + self.inner.incorrect_decisions + } + + /// Average confidence across all decisions. + #[getter] + fn avg_confidence(&self) -> f64 { + self.inner.avg_confidence + } + + /// Number of LLM calls made by Pilot. + #[getter] + fn llm_calls(&self) -> u64 { + self.inner.llm_calls + } + + /// Number of interventions. + #[getter] + fn interventions(&self) -> u64 { + self.inner.interventions + } + + /// Number of skipped interventions. + #[getter] + fn skipped_interventions(&self) -> u64 { + self.inner.skipped_interventions + } + + /// Number of budget exhausted events. + #[getter] + fn budget_exhausted(&self) -> u64 { + self.inner.budget_exhausted + } + + /// Number of algorithm fallbacks. + #[getter] + fn algorithm_fallbacks(&self) -> u64 { + self.inner.algorithm_fallbacks + } + + fn __repr__(&self) -> String { + format!( + "PilotMetricsReport(decisions={}, accuracy={:.2}, avg_confidence={:.2})", + self.inner.total_decisions, self.inner.accuracy, self.inner.avg_confidence, + ) + } +} + +/// Retrieval operation metrics report. +#[pyclass(name = "RetrievalMetricsReport")] +pub struct PyRetrievalMetricsReport { + inner: RetrievalMetricsReport, +} + +#[pymethods] +impl PyRetrievalMetricsReport { + /// Total number of queries. + #[getter] + fn total_queries(&self) -> u64 { + self.inner.total_queries + } + + /// Total number of search iterations. + #[getter] + fn total_iterations(&self) -> u64 { + self.inner.total_iterations + } + + /// Average iterations per query. + #[getter] + fn avg_iterations(&self) -> f64 { + self.inner.avg_iterations + } + + /// Total nodes visited. + #[getter] + fn nodes_visited(&self) -> u64 { + self.inner.nodes_visited + } + + /// Total paths found. + #[getter] + fn paths_found(&self) -> u64 { + self.inner.paths_found + } + + /// Average path length. + #[getter] + fn avg_path_length(&self) -> f64 { + self.inner.avg_path_length + } + + /// Average path score (0.0 - 1.0). + #[getter] + fn avg_path_score(&self) -> f64 { + self.inner.avg_path_score + } + + /// Number of high-score paths (>= 0.5). + #[getter] + fn high_score_paths(&self) -> u64 { + self.inner.high_score_paths + } + + /// Number of low-score paths (< 0.3). + #[getter] + fn low_score_paths(&self) -> u64 { + self.inner.low_score_paths + } + + /// Number of cache hits. + #[getter] + fn cache_hits(&self) -> u64 { + self.inner.cache_hits + } + + /// Number of cache misses. + #[getter] + fn cache_misses(&self) -> u64 { + self.inner.cache_misses + } + + /// Cache hit rate (0.0 - 1.0). + #[getter] + fn cache_hit_rate(&self) -> f64 { + self.inner.cache_hit_rate + } + + /// Total latency in milliseconds. + #[getter] + fn total_latency_ms(&self) -> u64 { + self.inner.total_latency_ms + } + + /// Average latency per query in milliseconds. + #[getter] + fn avg_latency_ms(&self) -> f64 { + self.inner.avg_latency_ms + } + + /// Number of backtracks. + #[getter] + fn backtracks(&self) -> u64 { + self.inner.backtracks + } + + /// Number of sufficiency checks. + #[getter] + fn sufficiency_checks(&self) -> u64 { + self.inner.sufficiency_checks + } + + /// Sufficiency rate (0.0 - 1.0). + #[getter] + fn sufficiency_rate(&self) -> f64 { + self.inner.sufficiency_rate + } + + fn __repr__(&self) -> String { + format!( + "RetrievalMetricsReport(queries={}, avg_score={:.2}, cache_hit={:.1}%)", + self.inner.total_queries, + self.inner.avg_path_score, + self.inner.cache_hit_rate * 100.0, + ) + } +} + +/// Complete metrics report combining all subsystem metrics. +#[pyclass(name = "MetricsReport")] +pub struct PyMetricsReport { + inner: MetricsReport, +} + +#[pymethods] +impl PyMetricsReport { + /// LLM metrics. + #[getter] + fn llm(&self) -> PyLlmMetricsReport { + PyLlmMetricsReport { + inner: self.inner.llm.clone(), + } + } + + /// Pilot metrics. + #[getter] + fn pilot(&self) -> PyPilotMetricsReport { + PyPilotMetricsReport { + inner: self.inner.pilot.clone(), + } + } + + /// Retrieval metrics. + #[getter] + fn retrieval(&self) -> PyRetrievalMetricsReport { + PyRetrievalMetricsReport { + inner: self.inner.retrieval.clone(), + } + } + + /// Total estimated cost in USD. + fn total_cost_usd(&self) -> f64 { + self.inner.total_cost_usd() + } + + /// Overall success rate (0.0 - 1.0). + fn overall_success_rate(&self) -> f64 { + self.inner.overall_success_rate() + } + + fn __repr__(&self) -> String { + format!( + "MetricsReport(llm_calls={}, cost=${:.4}, queries={})", + self.inner.llm.total_calls, + self.inner.total_cost_usd(), + self.inner.retrieval.total_queries, + ) + } +} + // ============================================================ // IndexItem / IndexResult // ============================================================ @@ -1075,6 +1448,12 @@ async fn run_get_graph(engine: Arc) -> PyResult> Ok(graph.map(|g| PyDocumentGraph { inner: g })) } +fn run_metrics_report(engine: Arc) -> PyMetricsReport { + PyMetricsReport { + inner: engine.metrics_report(), + } +} + // ============================================================ // Engine // ============================================================ @@ -1242,6 +1621,14 @@ impl PyEngine { future_into_py(py, run_get_graph(engine)) } + /// Generate a complete metrics report. + /// + /// Returns: + /// MetricsReport with LLM, Pilot, and Retrieval metrics. + fn metrics_report(&self) -> PyMetricsReport { + run_metrics_report(Arc::clone(&self.inner)) + } + fn __repr__(&self) -> String { "Engine(...)".to_string() } @@ -1280,6 +1667,10 @@ fn _vectorless(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; m.add_class::()?; m.add("__version__", env!("CARGO_PKG_VERSION"))?; diff --git a/rust/src/client/engine.rs b/rust/src/client/engine.rs index c46fe7c8..94cbcfb4 100644 --- a/rust/src/client/engine.rs +++ b/rust/src/client/engine.rs @@ -45,6 +45,7 @@ use crate::config::Config; use crate::error::Result; use crate::index::PipelineOptions; use crate::index::incremental::{self, IndexAction}; +use crate::metrics::MetricsHub; use crate::retrieval::{PipelineRetriever, RetrieveEventReceiver}; use crate::storage::{PersistedDocument, Workspace}; use crate::{DocumentTree, Error}; @@ -85,6 +86,9 @@ pub struct Engine { /// Event emitter. events: EventEmitter, + + /// Central metrics hub for unified collection. + metrics_hub: Arc, } impl Engine { @@ -120,6 +124,7 @@ impl Engine { retriever, workspace: Some(workspace_client), events, + metrics_hub: Arc::new(MetricsHub::with_defaults()), }) } @@ -543,6 +548,14 @@ impl Engine { workspace.get_graph().await } + /// Generate a complete metrics report. + /// + /// Returns a [`MetricsReport`](crate::metrics::MetricsReport) containing + /// LLM usage, pilot decision, and retrieval operation metrics. + pub fn metrics_report(&self) -> crate::metrics::MetricsReport { + self.metrics_hub.generate_report() + } + // ============================================================ // Internal // ============================================================ @@ -727,6 +740,7 @@ impl Clone for Engine { retriever: self.retriever.clone(), workspace: self.workspace.clone(), events: self.events.clone(), + metrics_hub: Arc::clone(&self.metrics_hub), } } } diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 1363b5b7..59756a37 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -86,3 +86,6 @@ pub use events::{EventEmitter, IndexEvent, QueryEvent, WorkspaceEvent}; // Index metrics pub use metrics::IndexMetrics; + +// Runtime metrics reports +pub use metrics::{LlmMetricsReport, MetricsReport, PilotMetricsReport, RetrievalMetricsReport}; diff --git a/rust/src/metrics/mod.rs b/rust/src/metrics/mod.rs index 258f235d..bc724988 100644 --- a/rust/src/metrics/mod.rs +++ b/rust/src/metrics/mod.rs @@ -55,4 +55,8 @@ mod llm; mod pilot; mod retrieval; +pub use hub::{MetricsHub, MetricsReport}; pub use index::IndexMetrics; +pub use llm::LlmMetricsReport; +pub use pilot::PilotMetricsReport; +pub use retrieval::RetrievalMetricsReport; From d763949edfb33fdff7598e6040cfa56ad615f3c2 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Wed, 15 Apr 2026 10:05:14 +0800 Subject: [PATCH 5/6] feat(python): add recursive option to from_dir method Add recursive parameter to PyIndexContext.from_dir with default value false. When recursive=True, uses IndexContext::from_dir_recursive instead of from_dir. feat(rust): implement recursive directory scanning for IndexContext Add IndexContext::from_dir_recursive method that scans directories recursively. Refactor from_dir to use internal scan_dir helper function. Update supported extensions to only include .md and .pdf files. docs: add directory indexing example with CLI interface Add new example showing how to recursively index documents in a directory with command line arguments for recursive/non-recursive modes. --- python/src/lib.rs | 16 +++- rust/examples/index_directory.rs | 123 +++++++++++++++++++++++++++++++ rust/src/client/index_context.rs | 85 ++++++++++++++++++--- 3 files changed, 209 insertions(+), 15 deletions(-) create mode 100644 rust/examples/index_directory.rs diff --git a/python/src/lib.rs b/python/src/lib.rs index a2fe5bb3..0ed8dc7d 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -219,11 +219,19 @@ impl PyIndexContext { } /// Create an IndexContext from all supported files in a directory. + /// + /// Args: + /// path: Directory path to scan. + /// recursive: If True, scan subdirectories recursively. Default: False. #[staticmethod] - fn from_dir(path: String) -> Self { - Self { - inner: IndexContext::from_dir(&path), - } + #[pyo3(signature = (path, recursive=false))] + fn from_dir(path: String, recursive: bool) -> Self { + let inner = if recursive { + IndexContext::from_dir_recursive(&path) + } else { + IndexContext::from_dir(&path) + }; + Self { inner } } /// Create an IndexContext from text content. diff --git a/rust/examples/index_directory.rs b/rust/examples/index_directory.rs new file mode 100644 index 00000000..289cb8a2 --- /dev/null +++ b/rust/examples/index_directory.rs @@ -0,0 +1,123 @@ +// Copyright (c) 2026 vectorless developers +// SPDX-License-Identifier: Apache-2.0 + +//! Directory indexing example — recursively index all documents in a directory. +//! +//! ```bash +//! # Using environment variables for LLM config: +//! LLM_API_KEY=sk-xxx LLM_MODEL=google/gemini-3-flash-preview \ +//! LLM_ENDPOINT=http://localhost:4000/api/v1 \ +//! cargo run --example index_directory -- /path/to/docs +//! +//! # With recursive flag (default): +//! cargo run --example index_directory -- /path/to/docs --recursive +//! +//! # Non-recursive (top-level only): +//! cargo run --example index_directory -- /path/to/docs --no-recursive +//! ``` + +use vectorless::{EngineBuilder, IndexContext}; + +#[tokio::main] +async fn main() -> vectorless::Result<()> { + tracing_subscriber::fmt::init(); + + // Parse CLI arguments + let args: Vec = std::env::args().collect(); + let dir = args + .get(1) + .map(|s| s.as_str()) + .unwrap_or("./samples"); + let recursive = !args.iter().any(|a| a == "--no-recursive"); + + // Build engine + let api_key = std::env::var("LLM_API_KEY").unwrap_or_else(|_| "sk-or-v1-...".to_string()); + let model = + std::env::var("LLM_MODEL").unwrap_or_else(|_| "google/gemini-3-flash-preview".to_string()); + let endpoint = std::env::var("LLM_ENDPOINT") + .unwrap_or_else(|_| "http://localhost:4000/api/v1".to_string()); + + let engine = EngineBuilder::new() + .with_workspace("./workspace_directory_example") + .with_key(&api_key) + .with_model(&model) + .with_endpoint(&endpoint) + .build() + .await + .map_err(|e| vectorless::Error::Config(e.to_string()))?; + + // Index directory + let ctx = if recursive { + println!("Recursively indexing: {}", dir); + IndexContext::from_dir_recursive(dir) + } else { + println!("Indexing top-level files in: {}", dir); + IndexContext::from_dir(dir) + }; + + if ctx.is_empty() { + println!("No supported files found in: {}", dir); + return Ok(()); + } + + println!("Found {} file(s) to index", ctx.len()); + + let result = engine.index(ctx).await?; + + println!("\nIndexed {} document(s):", result.items.len()); + for item in &result.items { + println!(" {} ({})", item.name, item.doc_id); + if let Some(metrics) = &item.metrics { + println!( + " nodes: {}, time: {}ms", + metrics.nodes_processed, + metrics.total_time_ms() + ); + } + } + + if result.has_failures() { + println!("\nFailed:"); + for f in &result.failed { + println!(" {} — {}", f.source, f.error); + } + } + + // Query across all indexed documents + let query = "What is this about?"; + println!("\nQuerying: \"{query}\""); + + let answer = engine + .query(vectorless::QueryContext::new(query)) + .await?; + + for item in &answer.items { + println!(" [{} score={:.2}]", item.doc_id, item.score); + let preview: String = item.content.chars().take(200).collect(); + println!(" {preview}"); + if item.content.len() > 200 { + println!(" ..."); + } + } + + // Metrics report + let report = engine.metrics_report(); + println!("\nMetrics:"); + println!( + " LLM: {} calls, {} tokens, ${:.4}", + report.llm.total_calls, + report.llm.total_tokens, + report.llm.estimated_cost_usd, + ); + println!( + " Retrieval: {} queries, avg score {:.2}", + report.retrieval.total_queries, report.retrieval.avg_path_score, + ); + + // Cleanup + for doc in engine.list().await? { + engine.remove(&doc.id).await?; + } + + Ok(()) +} diff --git a/rust/src/client/index_context.rs b/rust/src/client/index_context.rs index aca0270e..989252b8 100644 --- a/rust/src/client/index_context.rs +++ b/rust/src/client/index_context.rs @@ -29,7 +29,11 @@ //! ```rust,no_run //! use vectorless::client::IndexContext; //! +//! // Non-recursive (top-level only) //! let ctx = IndexContext::from_dir("./documents"); +//! +//! // Recursive (includes subdirectories) +//! let ctx = IndexContext::from_dir_recursive("./documents"); //! ``` use std::path::PathBuf; @@ -149,27 +153,58 @@ impl IndexContext { /// Create from a directory path. /// /// Indexes all supported files in the directory (non-recursive). - /// Supported extensions: `.md`, `.pdf`, `.txt`. + /// Supported extensions: `.md`, `.pdf`. pub fn from_dir(dir: impl Into) -> Self { + Self::scan_dir(dir, false) + } + + /// Create from a directory path with recursive scanning. + /// + /// Recursively indexes all supported files in the directory and its + /// subdirectories. Supported extensions: `.md`, `.pdf`. + pub fn from_dir_recursive(dir: impl Into) -> Self { + Self::scan_dir(dir, true) + } + + /// Internal: scan a directory for supported document files. + fn scan_dir(dir: impl Into, recursive: bool) -> Self { let dir = dir.into(); - let supported_extensions = ["md", "markdown", "pdf", "txt"]; + let supported_extensions = ["md", "pdf"]; let mut sources = Vec::new(); - if let Ok(entries) = std::fs::read_dir(&dir) { + Self::collect_files(&dir, &supported_extensions, recursive, &mut sources); + + Self { + sources, + name: None, + options: IndexOptions::default(), + } + } + + /// Recursively or non-recursively collect supported files. + fn collect_files( + dir: &std::path::Path, + extensions: &[&str], + recursive: bool, + sources: &mut Vec, + ) { + if let Ok(entries) = std::fs::read_dir(dir) { + let mut subdirs = Vec::new(); for entry in entries.flatten() { let path = entry.path(); - if let Some(ext) = path.extension().and_then(|e| e.to_str()) { - if supported_extensions.contains(&ext.to_lowercase().as_str()) { + if path.is_dir() { + if recursive { + subdirs.push(path); + } + } else if let Some(ext) = path.extension().and_then(|e| e.to_str()) { + if extensions.contains(&ext.to_lowercase().as_str()) { sources.push(IndexSource::Path(path)); } } } - } - - Self { - sources, - name: None, - options: IndexOptions::default(), + for subdir in subdirs { + Self::collect_files(&subdir, extensions, recursive, sources); + } } } @@ -316,4 +351,32 @@ mod tests { let ctx = IndexContext::from(PathBuf::from("./test.md")); assert_eq!(ctx.len(), 1); } + + #[test] + fn test_from_dir_recursive() { + // Create a temp directory structure: + // tmp/ + // a.md + // sub/ + // b.md + // deep/ + // c.pdf + let tmp = std::env::temp_dir().join("vectorless_test_dir_recursive"); + let _ = std::fs::remove_dir_all(&tmp); + std::fs::create_dir_all(tmp.join("sub/deep")).unwrap(); + std::fs::write(tmp.join("a.md"), "# A").unwrap(); + std::fs::write(tmp.join("sub/b.md"), "# B").unwrap(); + std::fs::write(tmp.join("sub/deep/c.pdf"), b"%PDF").unwrap(); + std::fs::write(tmp.join("sub/deep/ignore.dat"), b"xxx").unwrap(); + + // Non-recursive: only top-level + let ctx = IndexContext::from_dir(&tmp); + assert_eq!(ctx.len(), 1); // only a.md + + // Recursive: all levels + let ctx = IndexContext::from_dir_recursive(&tmp); + assert_eq!(ctx.len(), 3); // a.md, b.md, c.pdf + + let _ = std::fs::remove_dir_all(&tmp); + } } From 782c3783b25e014ecc14877e72cfce2013bd8e80 Mon Sep 17 00:00:00 2001 From: zTgx <747674262@qq.com> Date: Wed, 15 Apr 2026 10:11:53 +0800 Subject: [PATCH 6/6] feat: add directory indexing example and enhance IndexContext - Add new example script examples/index_directory/main.py that demonstrates recursive directory indexing functionality - The example supports command-line arguments for directory path and recursive/non-recursive scanning options - Add environment variable support for LLM configuration - Implement document listing, querying, and metrics reporting - Add __len__ and is_empty methods to PyIndexContext for better Python integration - Update IndexContext repr to show number of sources --- examples/index_directory/main.py | 100 +++++++++++++++++++++++++++++++ python/src/lib.rs | 12 +++- 2 files changed, 111 insertions(+), 1 deletion(-) create mode 100644 examples/index_directory/main.py diff --git a/examples/index_directory/main.py b/examples/index_directory/main.py new file mode 100644 index 00000000..f2446215 --- /dev/null +++ b/examples/index_directory/main.py @@ -0,0 +1,100 @@ +""" +Directory indexing example — recursively index all documents in a directory. + +Usage: + python index_directory.py /path/to/docs + python index_directory.py /path/to/docs --no-recursive + +Environment variables: + LLM_API_KEY — Your LLM API key (required) + LLM_MODEL — Model name (default: google/gemini-3-flash-preview) + LLM_ENDPOINT — API endpoint (default: http://localhost:4000/api/v1) +""" + +import argparse +import asyncio +import os + +from vectorless import Engine, IndexContext, QueryContext + + +async def main(): + parser = argparse.ArgumentParser(description="Index a directory of documents") + parser.add_argument("directory", help="Directory path to index") + parser.add_argument( + "--no-recursive", + action="store_true", + help="Only scan top-level files (default: recursive)", + ) + args = parser.parse_args() + + # Build engine + api_key = os.environ.get("LLM_API_KEY", "sk-or-v1-...") + model = os.environ.get("LLM_MODEL", "google/gemini-3-flash-preview") + endpoint = os.environ.get("LLM_ENDPOINT", "http://localhost:4000/api/v1") + + engine = Engine( + workspace="./workspace_directory_example", + api_key=api_key, + model=model, + endpoint=endpoint, + ) + + recursive = not args.no_recursive + + # Index directory + ctx = IndexContext.from_dir(args.directory, recursive=recursive) + + if ctx.is_empty(): + print(f"No supported files found in: {args.directory}") + return + + print(f"{'Recursively scanning' if recursive else 'Scanning top-level files in'}: {args.directory}") + print(f"Found files to index") + + result = await engine.index(ctx) + + print(f"\nIndexed {len(result.items)} document(s):") + for item in result.items: + print(f" {item.name} ({item.doc_id})") + if item.metrics: + print(f" nodes: {item.metrics.nodes_processed}, time: {item.metrics.total_time_ms}ms") + + if result.has_failures(): + print("\nFailed:") + for f in result.failed: + print(f" {f.source} — {f.error}") + + # Query across all indexed documents + query = "What is this about?" + print(f'\nQuerying: "{query}"') + + answer = await engine.query(QueryContext(query)) + for item in answer.items: + print(f" [{item.doc_id} score={item.score:.2f}]") + preview = item.content[:200] + print(f" {preview}") + if len(item.content) > 200: + print(" ...") + + # Metrics report + report = engine.metrics_report() + print("\nMetrics:") + print( + f" LLM: {report.llm.total_calls} calls, " + f"{report.llm.total_tokens} tokens, " + f"${report.llm.estimated_cost_usd:.4f}" + ) + print( + f" Retrieval: {report.retrieval.total_queries} queries, " + f"avg score {report.retrieval.avg_path_score:.2f}" + ) + + # Cleanup + docs = await engine.list() + for doc in docs: + await engine.remove(doc.id) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/python/src/lib.rs b/python/src/lib.rs index 0ed8dc7d..640b1024 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -283,8 +283,18 @@ impl PyIndexContext { Ok(Self { inner: ctx }) } + /// Number of document sources. + fn __len__(&self) -> usize { + self.inner.len() + } + + /// Whether no sources are present. + fn is_empty(&self) -> bool { + self.inner.is_empty() + } + fn __repr__(&self) -> String { - "IndexContext(...)".to_string() + format!("IndexContext(sources={})", self.inner.len()) } }