From b6fc8e418ead4ff25f964bf61daef3b7d3331717 Mon Sep 17 00:00:00 2001 From: Nicholas Pipitone Date: Thu, 16 Oct 2025 00:12:24 +0000 Subject: [PATCH] clean up types, by default dont include relevant docs --- README.md | 9 +++--- evals/common.py | 70 ++++++++++++++++++----------------------- evals/run_embeddings.py | 3 +- evals/run_ndcg.py | 4 +-- evals/run_pipeline.py | 3 +- evals/run_recall.py | 4 +-- evals/run_rerankers.py | 4 +-- evals/types.py | 37 ++++++++++++++++++++-- 8 files changed, 79 insertions(+), 55 deletions(-) diff --git a/README.md b/README.md index 4e1f05b..3838af2 100644 --- a/README.md +++ b/README.md @@ -214,11 +214,10 @@ data/datasets/ │ ├── documents.jsonl # Processed documents │ ├── qrels.jsonl # Relevance judgments │ └── {retrieval_method}/ -│ └── {merged|unmerged}/ -│ ├── ze_results.jsonl # Initial retrieval results -│ ├── embeddings_cache.db # Embedding cache -│ └── {reranker}/ -│ └── ze_scores.jsonl # Reranked results +│ ├── ze_results.jsonl # Initial retrieval results +│ ├── embeddings_cache.db # Embedding cache +│ └── {reranker}/ +│ └── ze_scores.jsonl # Reranked results ``` ## Performance Tips diff --git a/evals/common.py b/evals/common.py index 29b9ceb..e5256ea 100644 --- a/evals/common.py +++ b/evals/common.py @@ -1,31 +1,9 @@ -from typing import Any, Literal +from typing import Any from pydantic import AliasChoices, BaseModel, Field, computed_field from evals.utils import ROOT -RetrievalMethod = Literal[ - "qwen3_4b", "qwen3_0.6b", "voyageai", "openai_small", "bm25", "hybrid" -] -MergeStatus = Literal["merged", "unmerged"] -RerankerName = Literal[ - "cohere", - "salesforce", - "zeroentropy-large", - "zeroentropy-small", - "zeroentropy-small-modal", - "zeroentropy-large-modal", - "zeroentropy-baseten", - "mixedbread", - "jina", - "qwen", - "openai-large-embedding", - "gpt-4o-mini", - "gpt-4.1-mini", - "gpt-5-mini", - "gpt-5-nano", -] - class ZEDataset(BaseModel): id: str @@ -57,38 +35,52 @@ def documents_path(self) -> str: def qrels_path(self) -> str: return self.file_path("qrels.jsonl") + def retrieval_method_path( + self, + retrieval_method: str, + include_relevant_docs: bool, + path: str, + ) -> str: + if include_relevant_docs: + retrieval_method = f"{retrieval_method}+include_relevant_docs" + return self.file_path(f"{retrieval_method}/{path}") + def ze_results_path( - self, retrieval_method: RetrievalMethod, include_relevant_docs: bool + self, + retrieval_method: str, + include_relevant_docs: bool, ) -> str: - merge_status: MergeStatus = "merged" if include_relevant_docs else "unmerged" - return self.file_path(f"{retrieval_method}/{merge_status}/ze_results.jsonl") + return self.retrieval_method_path( + retrieval_method, include_relevant_docs, "ze_results.jsonl" + ) def embeddings_cache_path( - self, retrieval_method: RetrievalMethod, include_relevant_docs: bool + self, + retrieval_method: str, + include_relevant_docs: bool, ) -> str: - merge_status: MergeStatus = "merged" if include_relevant_docs else "unmerged" - return self.file_path(f"{retrieval_method}/{merge_status}/embeddings_cache.db") + return self.retrieval_method_path( + retrieval_method, include_relevant_docs, "embeddings_cache.db" + ) def reranker_cache_path( self, - retrieval_method: RetrievalMethod, + retrieval_method: str, include_relevant_docs: bool, - reranker: RerankerName, + reranker: str, ) -> str: - merge_status: MergeStatus = "merged" if include_relevant_docs else "unmerged" - return self.file_path( - f"{retrieval_method}/{merge_status}/{reranker}/reranker_cache.db" + return self.retrieval_method_path( + retrieval_method, include_relevant_docs, f"{reranker}/reranker_cache.db" ) def ze_scores_path( self, - retrieval_method: RetrievalMethod, + retrieval_method: str, include_relevant_docs: bool, - reranker: RerankerName, + reranker: str, ) -> str: - merge_status: MergeStatus = "merged" if include_relevant_docs else "unmerged" - return self.file_path( - f"{retrieval_method}/{merge_status}/{reranker}/ze_scores.jsonl" + return self.retrieval_method_path( + retrieval_method, include_relevant_docs, f"{reranker}/ze_scores.jsonl" ) diff --git a/evals/run_embeddings.py b/evals/run_embeddings.py index de60977..b2c3edb 100644 --- a/evals/run_embeddings.py +++ b/evals/run_embeddings.py @@ -15,12 +15,13 @@ ai_embedding, tiktoken_truncate_by_num_tokens, ) -from evals.common import Document, QRel, Query, RetrievalMethod, ZEDataset, ZEResults +from evals.common import Document, QRel, Query, ZEDataset, ZEResults from evals.ingestors.common import BaseIngestor from evals.types import ( DEFAULT_INCLUDE_RELEVANT_DOCS, DEFAULT_INGESTORS, DEFAULT_RETRIEVAL_METHOD, + RetrievalMethod, ) from evals.utils import ROOT diff --git a/evals/run_ndcg.py b/evals/run_ndcg.py index d053c3e..b066055 100644 --- a/evals/run_ndcg.py +++ b/evals/run_ndcg.py @@ -4,8 +4,6 @@ from evals.common import ( QueryScores, - RerankerName, - RetrievalMethod, ZEDataset, ZEResults, ) @@ -15,6 +13,8 @@ DEFAULT_INGESTORS, DEFAULT_RERANKERS, DEFAULT_RETRIEVAL_METHOD, + RerankerName, + RetrievalMethod, ) from evals.utils import argsort, avg diff --git a/evals/run_pipeline.py b/evals/run_pipeline.py index 3aec849..774b4aa 100644 --- a/evals/run_pipeline.py +++ b/evals/run_pipeline.py @@ -1,7 +1,6 @@ import asyncio from typing import Literal -from evals.common import RerankerName, RetrievalMethod from evals.ingestors.common import BaseIngestor from evals.run_embeddings import run_embeddings from evals.run_ingestors import run_ingestors @@ -13,6 +12,8 @@ DEFAULT_MAX_QUERIES, DEFAULT_RERANKERS, DEFAULT_RETRIEVAL_METHOD, + RerankerName, + RetrievalMethod, ) INGESTORS: list[BaseIngestor] = DEFAULT_INGESTORS diff --git a/evals/run_recall.py b/evals/run_recall.py index 0d99d9a..0b77128 100644 --- a/evals/run_recall.py +++ b/evals/run_recall.py @@ -4,8 +4,6 @@ from evals.common import ( QRel, QueryScores, - RerankerName, - RetrievalMethod, ZEDataset, ZEResults, ) @@ -15,6 +13,8 @@ DEFAULT_INGESTORS, DEFAULT_RERANKERS, DEFAULT_RETRIEVAL_METHOD, + RerankerName, + RetrievalMethod, ) from evals.utils import argsort, avg diff --git a/evals/run_rerankers.py b/evals/run_rerankers.py index 2779291..5428435 100644 --- a/evals/run_rerankers.py +++ b/evals/run_rerankers.py @@ -18,8 +18,6 @@ from evals.common import ( DocumentScores, QueryScores, - RerankerName, - RetrievalMethod, ZEDataset, ZEResults, ) @@ -30,6 +28,8 @@ DEFAULT_INGESTORS, DEFAULT_RERANKERS, DEFAULT_RETRIEVAL_METHOD, + RerankerName, + RetrievalMethod, ) from evals.utils import flatten, read_num_lines_pbar diff --git a/evals/types.py b/evals/types.py index ef58143..59b1094 100644 --- a/evals/types.py +++ b/evals/types.py @@ -1,6 +1,7 @@ +from typing import Literal + from evals.ai import AIEmbeddingModel, AIModel, AIRerankModel from evals.ai_rerank import AIModelAsReranker -from evals.common import RerankerName, RetrievalMethod from evals.ingestors.bioasq import BioasqIngestor from evals.ingestors.common import BaseIngestor from evals.ingestors.cosqa import CosqaIngestor @@ -23,8 +24,36 @@ from evals.ingestors.quora_swedish import QuoraSwedishIngestor from evals.ingestors.stackoverflowqa import StackoverflowqaIngestor +RetrievalMethod = Literal[ + "qwen3_4b", + "qwen3_0.6b", + "voyageai", + "openai_small", + "bm25", + "hybrid", +] + +RerankerName = Literal[ + "cohere", + "salesforce", + "zeroentropy-large", + "zeroentropy-small", + "zeroentropy-small-modal", + "zeroentropy-large-modal", + "zeroentropy-baseten", + "mixedbread", + "jina", + "qwen", + "openai-large-embedding", + "gpt-4o-mini", + "gpt-4.1-mini", + "gpt-5-mini", + "gpt-5-nano", +] + ALL_RERANKERS: dict[ - RerankerName, AIRerankModel | AIEmbeddingModel | AIModelAsReranker + RerankerName, + AIRerankModel | AIEmbeddingModel | AIModelAsReranker, ] = { "cohere": AIRerankModel(company="cohere", model="rerank-v3.5"), "salesforce": AIRerankModel(company="together", model="Salesforce/Llama-Rank-V1"), @@ -246,10 +275,12 @@ ), ] ALL_INGESTORS: list[BaseIngestor] = MTEB_INGESTORS + NEW_INGESTORS + ORIGINAL_INGESTORS + +# Defaults DEFAULT_INGESTORS: list[BaseIngestor] = ORIGINAL_INGESTORS DEFAULT_MAX_QUERIES: int = 100 DEFAULT_RETRIEVAL_METHOD: RetrievalMethod = "openai_small" -DEFAULT_INCLUDE_RELEVANT_DOCS: bool = True +DEFAULT_INCLUDE_RELEVANT_DOCS: bool = False DEFAULT_RERANKERS: list[RerankerName] = [ # "cohere", # "gpt-4o-mini",