diff --git a/demos/embeddings/README.md b/demos/embeddings/README.md index b74042f754..371b3c7fea 100644 --- a/demos/embeddings/README.md +++ b/demos/embeddings/README.md @@ -572,7 +572,9 @@ Results will be stored in `results` folder: Compare against local HuggingFace execution for reference: ```console mteb run -m thenlper/gte-small -t Banking77Classification --output_folder results -``` +``` + +> **Note**: To run limited number of samples, add `--eval_splits test` to the command. # Usage of tokenize endpoint diff --git a/demos/embeddings/ovms_mteb.py b/demos/embeddings/ovms_mteb.py index ce1bb7af70..07c9158a54 100644 --- a/demos/embeddings/ovms_mteb.py +++ b/demos/embeddings/ovms_mteb.py @@ -32,6 +32,9 @@ dest='model_name') parser.add_argument('--dataset', default='Banking77Classification', help='Dataset to benchmark. default: Banking77Classification', dest='dataset') +parser.add_argument('--eval_splits', nargs='*', default=None, + help='Evaluation splits to use, e.g. --eval_splits test dev. If not set, all splits defined in the task are used.', + dest='eval_splits') args = vars(parser.parse_args()) @@ -70,7 +73,8 @@ def _to_numpy(self, embedding_response) -> np.ndarray: return np.array([e.embedding for e in embedding_response.data]) model = OVMSModel(args['model_name'], args['service_url'] ,1) -tasks = mteb.get_task(args['dataset']) +tasks = mteb.get_task(args['dataset'], + eval_splits=args['eval_splits']) evaluation = mteb.MTEB(tasks=[tasks]) evaluation.run(model,verbosity=3,overwrite_results=True,output_folder='results') # For full leaderboard tests set run: