From 36fe7f0a90194d628add063b31b2509cb3c9cf94 Mon Sep 17 00:00:00 2001 From: Eduardo Pacheco Date: Tue, 30 Dec 2025 17:29:18 -0600 Subject: [PATCH] refactor: Allow callhome-english to be used as diarization dataset as well --- src/openbench/dataset/dataset_aliases.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/openbench/dataset/dataset_aliases.py b/src/openbench/dataset/dataset_aliases.py index d5b1400..c37af42 100644 --- a/src/openbench/dataset/dataset_aliases.py +++ b/src/openbench/dataset/dataset_aliases.py @@ -201,7 +201,7 @@ def register_dataset_aliases() -> None: DatasetConfig( dataset_id=os.getenv("CALLHOME_ENGLISH_DATASET_REPO_ID", "argmaxinc/callhome-english"), split="test" ), - supported_pipeline_types={PipelineType.TRANSCRIPTION, PipelineType.ORCHESTRATION}, + supported_pipeline_types={PipelineType.TRANSCRIPTION, PipelineType.ORCHESTRATION, PipelineType.DIARIZATION}, description=( "Callhome English dataset for transcription and orchestration evaluation. " "To use this dataset you need to buy the license for the audio files at https://catalog.ldc.upenn.edu/LDC97S42 and the license for the transcript files at https://catalog.ldc.upenn.edu/LDC97T14"