Conversation
Member
|
Cannot review till automated checks are cleared |
NirantK
requested changes
Sep 19, 2025
inference/evaluate_nanobeir.py
Outdated
Comment on lines
+101
to
+187
| class LlamaServerEncoder: | ||
| """Client for llama.cpp --embedding server (/embedding). Ensures fixed-dim vectors.""" | ||
| def __init__(self, endpoint: str): | ||
| import requests | ||
| from requests.adapters import HTTPAdapter | ||
| from urllib3.util.retry import Retry | ||
| self.session = requests.Session() | ||
| retry = Retry(total=3, backoff_factor=0.2, status_forcelist=(502, 503, 504)) | ||
| self.session.mount("http://", HTTPAdapter(max_retries=retry)) | ||
| self.endpoint = endpoint.rstrip("/") | ||
| self._dim = None # lock the embedding size after first good response | ||
|
|
||
| def _parse_embedding_json(self, js): | ||
| # try to dig out the first numeric list from many shapes | ||
| def first_vector(obj): | ||
| if isinstance(obj, dict): | ||
| # common fields | ||
| for k in ("embedding", "vector", "values", "data"): | ||
| if k in obj: | ||
| return first_vector(obj[k]) | ||
| # otherwise try the first value | ||
| if obj: | ||
| return first_vector(next(iter(obj.values()))) | ||
| return [] | ||
| if isinstance(obj, (list, tuple)): | ||
| if not obj: | ||
| return [] | ||
| if isinstance(obj[0], dict): | ||
| return first_vector(obj[0]) | ||
| if isinstance(obj[0], (list, tuple)): | ||
| return first_vector(obj[0]) | ||
| return obj | ||
| return [obj] | ||
| return first_vector(js) | ||
|
|
||
| def _to_1d_numeric(self, obj): | ||
| def flatten(xs): | ||
| for x in xs: | ||
| if isinstance(x, (list, tuple)): | ||
| yield from flatten(x) | ||
| elif isinstance(x, dict): | ||
| for k in ("vector", "embedding", "values", "data"): | ||
| if k in x: | ||
| yield from flatten(x[k]); break | ||
| else: | ||
| for v in x.values(): | ||
| yield from flatten(v) | ||
| else: | ||
| yield x | ||
| vec = list(flatten(self._parse_embedding_json(obj))) | ||
| vec = [float(x) for x in vec] | ||
| return vec | ||
|
|
||
| def _normalize_vec(self, vec): | ||
| vec = self._to_1d_numeric(vec) | ||
| if self._dim is None: | ||
| self._dim = len(vec) | ||
| print(f"[llama] locked embedding dim = {self._dim}", flush=True) | ||
| if len(vec) < self._dim: | ||
| vec = vec + [0.0] * (self._dim - len(vec)) | ||
| elif len(vec) > self._dim: | ||
| vec = vec[:self._dim] | ||
| return vec | ||
|
|
||
| def _embed_one(self, text: str, timeout=60): | ||
| orig = text | ||
| for backoff, shrink in zip(RETRY_BACKOFF, RETRY_SHRINK): | ||
| if backoff: time.sleep(backoff) | ||
| t = orig[: int(MAX_CHARS * shrink)] | ||
| r = self.session.post(self.endpoint, json={"content": t}, timeout=timeout) | ||
| if r.status_code >= 500: | ||
| continue | ||
| r.raise_for_status() | ||
| return self._normalize_vec(r.json()) | ||
| # final tiny attempt | ||
| t = orig[: min(512, len(orig))] | ||
| r = self.session.post(self.endpoint, json={"content": t}, timeout=timeout) | ||
| r.raise_for_status() | ||
| return self._normalize_vec(r.json()) | ||
|
|
||
| def encode_queries(self, queries, batch_size=1, **kwargs): | ||
| return [self._embed_one(q) for q in queries] | ||
|
|
||
| def encode_corpus(self, corpus, batch_size=1, **kwargs): | ||
| docs_iter = corpus.values() if isinstance(corpus, dict) else corpus | ||
| texts = [_doc_text(d) for d in docs_iter] | ||
| return [self._embed_one(t) for t in texts] |
Member
There was a problem hiding this comment.
Why are we embedding inside this code here? Instead of the existing embedding code?
inference/nanobeir_config.yaml
Outdated
Comment on lines
+5
to
+7
| models: | ||
| baseline: | ||
| hf_id: sentence-transformers/all-MiniLM-L6-v2 |
Member
There was a problem hiding this comment.
We're ~8 weeks into the semester. You should atleast know the embedding models we are working on. all-MiniLM-L6-v2 is not even discussed or mentioned once.
It's okay to copy code off the Internet but for heaven's sake, don't sleep-walk through the project
Member
|
I don't see any of the changes for which you've commented "Resolved" @vedjaw. Do you not know how to use Github? You need to push changes. And fix the linting while you're at it |
did the required changes (qwen/qwen3-ebedding-0.6b)
did the required changes
fixed lint error
fixing lint
Member
|
The linter is still failing, not ready for review |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
Pull Request: Add NanoBEIR Evaluator
Summary
evaluate_nanobeir.py) that can benchmark:Motivation
What’s in this PR
evaluate_nanobeir.pySTModelfor Sentence-Transformers baselinesoutputs/nanobeir_leaderboard.jsonnanobeir_config.yamlHow it works
scifact).Results (sample)
all-MiniLM-L6-v2baseline:Example table
Repro / How to Run
Output location
Config example
Design notes
Risks / limitations
Testing done
scifactbaseline.