diff --git a/retrieval-bench/src/retrieval_bench/cli/evaluate.py b/retrieval-bench/src/retrieval_bench/cli/evaluate.py index d3cab7d76..7c52db03a 100644 --- a/retrieval-bench/src/retrieval_bench/cli/evaluate.py +++ b/retrieval-bench/src/retrieval_bench/cli/evaluate.py @@ -199,15 +199,15 @@ def _run_evaluation( # Cache-only mode: build corpus embeddings and exit without running queries. if cache_only: - pipeline.dataset_name = dataset_name - pipeline.index(corpus_ids=corpus_ids, corpus_images=corpus_images, corpus_texts=corpus_texts) + pipeline.index( + corpus_ids=corpus_ids, corpus_images=corpus_images, corpus_texts=corpus_texts, dataset_name=dataset_name + ) print("Corpus embeddings cached. Exiting (--cache-only).") return # Evaluate print("\nRunning evaluation...") try: - pipeline.dataset_name = dataset_name trace_run_name_eff = trace_run_name or default_trace_run_name(pipeline) results = evaluate_retrieval( pipeline=pipeline, diff --git a/retrieval-bench/src/retrieval_bench/pipeline_evaluation/evaluator.py b/retrieval-bench/src/retrieval_bench/pipeline_evaluation/evaluator.py index 05cc43505..80f8ad201 100644 --- a/retrieval-bench/src/retrieval_bench/pipeline_evaluation/evaluator.py +++ b/retrieval-bench/src/retrieval_bench/pipeline_evaluation/evaluator.py @@ -91,7 +91,7 @@ def _filtered_run_for_query(qid: str, run_q: Any) -> Any: return out # Dataset context (for trace directory layout). - dataset_name_eff = dataset_name or getattr(pipeline, "dataset_name", None) or "unknown_dataset" + dataset_name_eff = dataset_name or "unknown_dataset" # Trace run name: always enabled; default is __. trace_run_name_eff = trace_run_name or default_trace_run_name(pipeline) diff --git a/retrieval-bench/src/retrieval_bench/pipelines/agentic.py b/retrieval-bench/src/retrieval_bench/pipelines/agentic.py index b923cfdcd..a9a76afce 100644 --- a/retrieval-bench/src/retrieval_bench/pipelines/agentic.py +++ b/retrieval-bench/src/retrieval_bench/pipelines/agentic.py @@ -253,6 +253,7 @@ def __init__( self.retriever_top_k = int(retriever_top_k) self.num_concurrent = max(1, int(num_concurrent)) self._backend_kwargs = dict(backend_kwargs) + self._dataset_name = None # Resolve os.environ/... convention for base_url. if base_url and str(base_url).strip().startswith("os.environ/"): @@ -288,6 +289,14 @@ def __init__( print("Error: CUDA is not available. This pipeline requires a GPU.") sys.exit(1) + @property + def dataset_name(self): + return self._dataset_name + + @dataset_name.setter + def dataset_name(self, value: str): + self._dataset_name = value + # ----------------------------------------------------------------------- # Async query loop # ----------------------------------------------------------------------- @@ -525,10 +534,13 @@ async def _process_query(q_idx: int, qid: str, query_text: Any) -> None: # Main entry point # ----------------------------------------------------------------------- - def index(self, corpus_ids: List[str], corpus_images: List[Any], corpus_texts: List[str]) -> None: + def index( + self, corpus_ids: List[str], corpus_images: List[Any], corpus_texts: List[str], dataset_name: str = None + ) -> None: super().index(corpus_ids=corpus_ids, corpus_images=corpus_images, corpus_texts=corpus_texts) - dataset_name = self.dataset_name + if dataset_name is not None: + self.dataset_name = dataset_name task_key = infer_bright_task_key(dataset_name) corpus = [{"image": img, "markdown": md} for img, md in zip(corpus_images, corpus_texts)] diff --git a/retrieval-bench/src/retrieval_bench/pipelines/dense.py b/retrieval-bench/src/retrieval_bench/pipelines/dense.py index 9af791813..102a809f7 100644 --- a/retrieval-bench/src/retrieval_bench/pipelines/dense.py +++ b/retrieval-bench/src/retrieval_bench/pipelines/dense.py @@ -51,10 +51,21 @@ def __init__(self, *, backend: str, top_k: int = 100, **kwargs: Any) -> None: print("Error: CUDA is not available. This pipeline requires a GPU.") sys.exit(1) - def index(self, corpus_ids: List[str], corpus_images: List[Any], corpus_texts: List[str]) -> None: + @property + def dataset_name(self): + return self._dataset_name + + @dataset_name.setter + def dataset_name(self, value: str): + self._dataset_name = value + + def index( + self, corpus_ids: List[str], corpus_images: List[Any], corpus_texts: List[str], dataset_name: str = None + ) -> None: super().index(corpus_ids=corpus_ids, corpus_images=corpus_images, corpus_texts=corpus_texts) - dataset_name = self.dataset_name + if dataset_name is not None: + self.dataset_name = dataset_name task_key = infer_bright_task_key(dataset_name) corpus = [{"image": img, "markdown": md} for img, md in zip(corpus_images, corpus_texts)]