From 0092ed3e66a901c4f8152ee00195657fc7531a20 Mon Sep 17 00:00:00 2001 From: Gabriel Moreira Date: Thu, 19 Mar 2026 22:46:19 +0000 Subject: [PATCH 1/3] Adding dataset_name argument to pipelines index() method --- .../src/retrieval_bench/cli/evaluate.py | 9 +++++---- .../pipeline_evaluation/evaluator.py | 2 +- .../src/retrieval_bench/pipelines/agentic.py | 19 ++++++++++++++++--- .../src/retrieval_bench/pipelines/dense.py | 16 ++++++++++++++-- 4 files changed, 36 insertions(+), 10 deletions(-) diff --git a/retrieval-bench/src/retrieval_bench/cli/evaluate.py b/retrieval-bench/src/retrieval_bench/cli/evaluate.py index d3cab7d76..68de330c1 100644 --- a/retrieval-bench/src/retrieval_bench/cli/evaluate.py +++ b/retrieval-bench/src/retrieval_bench/cli/evaluate.py @@ -199,15 +199,16 @@ def _run_evaluation( # Cache-only mode: build corpus embeddings and exit without running queries. if cache_only: - pipeline.dataset_name = dataset_name - pipeline.index(corpus_ids=corpus_ids, corpus_images=corpus_images, corpus_texts=corpus_texts) + pipeline.index(corpus_ids=corpus_ids, + corpus_images=corpus_images, + corpus_texts=corpus_texts, + dataset_name=dataset_name) print("Corpus embeddings cached. Exiting (--cache-only).") return # Evaluate print("\nRunning evaluation...") try: - pipeline.dataset_name = dataset_name trace_run_name_eff = trace_run_name or default_trace_run_name(pipeline) results = evaluate_retrieval( pipeline=pipeline, @@ -224,7 +225,7 @@ def _run_evaluation( language=language, query_ids_selector=query_ids_selector, excluded_ids_by_query=excluded_ids_by_query, - metrics=_METRICS, + metrics=_METRICS ) except Exception as e: print(f"\nError during evaluation: {e}\n") diff --git a/retrieval-bench/src/retrieval_bench/pipeline_evaluation/evaluator.py b/retrieval-bench/src/retrieval_bench/pipeline_evaluation/evaluator.py index 05cc43505..80f8ad201 100644 --- a/retrieval-bench/src/retrieval_bench/pipeline_evaluation/evaluator.py +++ b/retrieval-bench/src/retrieval_bench/pipeline_evaluation/evaluator.py @@ -91,7 +91,7 @@ def _filtered_run_for_query(qid: str, run_q: Any) -> Any: return out # Dataset context (for trace directory layout). - dataset_name_eff = dataset_name or getattr(pipeline, "dataset_name", None) or "unknown_dataset" + dataset_name_eff = dataset_name or "unknown_dataset" # Trace run name: always enabled; default is __. trace_run_name_eff = trace_run_name or default_trace_run_name(pipeline) diff --git a/retrieval-bench/src/retrieval_bench/pipelines/agentic.py b/retrieval-bench/src/retrieval_bench/pipelines/agentic.py index b923cfdcd..f406589a3 100644 --- a/retrieval-bench/src/retrieval_bench/pipelines/agentic.py +++ b/retrieval-bench/src/retrieval_bench/pipelines/agentic.py @@ -253,6 +253,7 @@ def __init__( self.retriever_top_k = int(retriever_top_k) self.num_concurrent = max(1, int(num_concurrent)) self._backend_kwargs = dict(backend_kwargs) + self._dataset_name = None # Resolve os.environ/... convention for base_url. if base_url and str(base_url).strip().startswith("os.environ/"): @@ -288,6 +289,14 @@ def __init__( print("Error: CUDA is not available. This pipeline requires a GPU.") sys.exit(1) + @property + def dataset_name(self): + return self._dataset_name + + @dataset_name.setter + def dataset_name(self, value: str): + self._dataset_name = value + # ----------------------------------------------------------------------- # Async query loop # ----------------------------------------------------------------------- @@ -524,11 +533,15 @@ async def _process_query(q_idx: int, qid: str, query_text: Any) -> None: # ----------------------------------------------------------------------- # Main entry point # ----------------------------------------------------------------------- - - def index(self, corpus_ids: List[str], corpus_images: List[Any], corpus_texts: List[str]) -> None: + + def index(self, corpus_ids: List[str], + corpus_images: List[Any], + corpus_texts: List[str], + dataset_name: str = None) -> None: super().index(corpus_ids=corpus_ids, corpus_images=corpus_images, corpus_texts=corpus_texts) - dataset_name = self.dataset_name + if dataset_name is not None: + self.dataset_name = dataset_name task_key = infer_bright_task_key(dataset_name) corpus = [{"image": img, "markdown": md} for img, md in zip(corpus_images, corpus_texts)] diff --git a/retrieval-bench/src/retrieval_bench/pipelines/dense.py b/retrieval-bench/src/retrieval_bench/pipelines/dense.py index 9af791813..1663f5d90 100644 --- a/retrieval-bench/src/retrieval_bench/pipelines/dense.py +++ b/retrieval-bench/src/retrieval_bench/pipelines/dense.py @@ -51,10 +51,22 @@ def __init__(self, *, backend: str, top_k: int = 100, **kwargs: Any) -> None: print("Error: CUDA is not available. This pipeline requires a GPU.") sys.exit(1) - def index(self, corpus_ids: List[str], corpus_images: List[Any], corpus_texts: List[str]) -> None: + @property + def dataset_name(self): + return self._dataset_name + + @dataset_name.setter + def dataset_name(self, value: str): + self._dataset_name = value + + def index(self, corpus_ids: List[str], + corpus_images: List[Any], + corpus_texts: List[str], + dataset_name: str = None) -> None: super().index(corpus_ids=corpus_ids, corpus_images=corpus_images, corpus_texts=corpus_texts) - dataset_name = self.dataset_name + if dataset_name is not None: + self.dataset_name = dataset_name task_key = infer_bright_task_key(dataset_name) corpus = [{"image": img, "markdown": md} for img, md in zip(corpus_images, corpus_texts)] From 980e055a650881efa832fb5270c197ed08b4eb24 Mon Sep 17 00:00:00 2001 From: Gabriel Moreira Date: Thu, 19 Mar 2026 22:58:21 +0000 Subject: [PATCH 2/3] Fixing linting issues --- retrieval-bench/src/retrieval_bench/cli/evaluate.py | 9 ++++----- retrieval-bench/src/retrieval_bench/pipelines/agentic.py | 7 +++---- retrieval-bench/src/retrieval_bench/pipelines/dense.py | 7 +++---- 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/retrieval-bench/src/retrieval_bench/cli/evaluate.py b/retrieval-bench/src/retrieval_bench/cli/evaluate.py index 68de330c1..7c52db03a 100644 --- a/retrieval-bench/src/retrieval_bench/cli/evaluate.py +++ b/retrieval-bench/src/retrieval_bench/cli/evaluate.py @@ -199,10 +199,9 @@ def _run_evaluation( # Cache-only mode: build corpus embeddings and exit without running queries. if cache_only: - pipeline.index(corpus_ids=corpus_ids, - corpus_images=corpus_images, - corpus_texts=corpus_texts, - dataset_name=dataset_name) + pipeline.index( + corpus_ids=corpus_ids, corpus_images=corpus_images, corpus_texts=corpus_texts, dataset_name=dataset_name + ) print("Corpus embeddings cached. Exiting (--cache-only).") return @@ -225,7 +224,7 @@ def _run_evaluation( language=language, query_ids_selector=query_ids_selector, excluded_ids_by_query=excluded_ids_by_query, - metrics=_METRICS + metrics=_METRICS, ) except Exception as e: print(f"\nError during evaluation: {e}\n") diff --git a/retrieval-bench/src/retrieval_bench/pipelines/agentic.py b/retrieval-bench/src/retrieval_bench/pipelines/agentic.py index f406589a3..f7b6c0885 100644 --- a/retrieval-bench/src/retrieval_bench/pipelines/agentic.py +++ b/retrieval-bench/src/retrieval_bench/pipelines/agentic.py @@ -534,10 +534,9 @@ async def _process_query(q_idx: int, qid: str, query_text: Any) -> None: # Main entry point # ----------------------------------------------------------------------- - def index(self, corpus_ids: List[str], - corpus_images: List[Any], - corpus_texts: List[str], - dataset_name: str = None) -> None: + def index( + self, corpus_ids: List[str], corpus_images: List[Any], corpus_texts: List[str], dataset_name: str = None + ) -> None: super().index(corpus_ids=corpus_ids, corpus_images=corpus_images, corpus_texts=corpus_texts) if dataset_name is not None: diff --git a/retrieval-bench/src/retrieval_bench/pipelines/dense.py b/retrieval-bench/src/retrieval_bench/pipelines/dense.py index 1663f5d90..102a809f7 100644 --- a/retrieval-bench/src/retrieval_bench/pipelines/dense.py +++ b/retrieval-bench/src/retrieval_bench/pipelines/dense.py @@ -59,10 +59,9 @@ def dataset_name(self): def dataset_name(self, value: str): self._dataset_name = value - def index(self, corpus_ids: List[str], - corpus_images: List[Any], - corpus_texts: List[str], - dataset_name: str = None) -> None: + def index( + self, corpus_ids: List[str], corpus_images: List[Any], corpus_texts: List[str], dataset_name: str = None + ) -> None: super().index(corpus_ids=corpus_ids, corpus_images=corpus_images, corpus_texts=corpus_texts) if dataset_name is not None: From 725171edb4679bd3566b060745520217c178e1a6 Mon Sep 17 00:00:00 2001 From: Gabriel Moreira Date: Thu, 19 Mar 2026 23:13:50 +0000 Subject: [PATCH 3/3] Fixed lint error --- retrieval-bench/src/retrieval_bench/pipelines/agentic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/retrieval-bench/src/retrieval_bench/pipelines/agentic.py b/retrieval-bench/src/retrieval_bench/pipelines/agentic.py index f7b6c0885..a9a76afce 100644 --- a/retrieval-bench/src/retrieval_bench/pipelines/agentic.py +++ b/retrieval-bench/src/retrieval_bench/pipelines/agentic.py @@ -296,7 +296,7 @@ def dataset_name(self): @dataset_name.setter def dataset_name(self, value: str): self._dataset_name = value - + # ----------------------------------------------------------------------- # Async query loop # ----------------------------------------------------------------------- @@ -533,7 +533,7 @@ async def _process_query(q_idx: int, qid: str, query_text: Any) -> None: # ----------------------------------------------------------------------- # Main entry point # ----------------------------------------------------------------------- - + def index( self, corpus_ids: List[str], corpus_images: List[Any], corpus_texts: List[str], dataset_name: str = None ) -> None: