From b472fcde960aab4d00a9f028ca132080d68498af Mon Sep 17 00:00:00 2001 From: Victor Agostinelli <35327788+agostinv@users.noreply.github.com> Date: Mon, 2 Mar 2026 10:28:36 -0800 Subject: [PATCH 1/7] Tiny update to scorer example in README.md to work out of the box + provide some minimal context Updated reference and transcript file names in README. --- README.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 27aaf41..b042b1e 100644 --- a/README.md +++ b/README.md @@ -177,14 +177,14 @@ can score your speech processor by running: simulstream_score_latency --scorer stream_laal \ --eval-config config/speech_processor.yaml \ --log-file metrics.jsonl \ - --reference REFERENCE_FILE.txt \ + --reference TEXT_FILE.tgt \ --audio-definition YAML_AUDIO_REFERENCES_DEFINITION.yaml simulstream_score_quality --scorer comet \ --eval-config config/speech_processor.yaml \ --log-file metrics.jsonl \ - --references REFERENCES_FILE.txt \ - --transcripts TRANSCRIPTS_FILE.txt + --references TEXT_FILE.tgt \ + --transcripts TEXT_FILE.src simulstream_stats --eval-config config/speech_processor.yaml \ --log-file metrics.jsonl @@ -198,7 +198,9 @@ the selected metric (``--scorer``). Similarly, ``simulstream_score_quality`` evaluated the quality of the generated outputs against one (or more) reference (and transcript, only for metrics -requiring them) file(s). +requiring them) file(s). For metrics where an audio definition YAML is unnecessary, but a transcript +is necessary, both the reference and transcript files must have corresponding file stems (e.g., +`TEXT_FILE`). Lastly, ``simulstream_stats`` computes statistics like the computational cost and flickering ratio. From 55aae5cc9e25619b1c26a9d6efd695b1002b7aef Mon Sep 17 00:00:00 2001 From: Victor Agostinelli <35327788+agostinv@users.noreply.github.com> Date: Mon, 2 Mar 2026 10:32:07 -0800 Subject: [PATCH 2/7] Tiny modification to score_quality.py documentation to clarify usage Updated reference and transcript file names in the usage example and adjusted help text for clarity. --- simulstream/metrics/score_quality.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/simulstream/metrics/score_quality.py b/simulstream/metrics/score_quality.py index 0a621d3..7208cf1 100644 --- a/simulstream/metrics/score_quality.py +++ b/simulstream/metrics/score_quality.py @@ -122,8 +122,8 @@ def cli_main(): $ python -m simulstream.metrics.score_quality \\ --eval-config config/speech-processor.yaml \\ --log-file metrics.jsonl \\ - --references ref.en \\ - --transcripts src.it \\ + --references 1.en \\ + --transcripts 1.it \\ --scorer sacrebleu """ LOGGER.info(f"Simulstream version: {simulstream.__version__}") @@ -140,14 +140,17 @@ def cli_main(): "specified, this should be a single file containing all the lines of the audios in " "the reference, which should be of the same length of the audio definition. " "Otherwise, this should be a list of files, where each contains the lines " - "corresponding to an audio file.") + "corresponding to an audio file. In the case of being a list of files, the file " + "stem must match a corresponding transcript for an audio file (if applicable " + "to the quality metric).") parser.add_argument( "--transcripts", nargs="+", type=str, help="Path to the textual files containing reference transcripts. If `--audio-definition` " "is specified, this should be a single file containing all the lines of the audios " "in the reference, which should be of the same length of the audio definition. " "Otherwise, this should be a list of files, where each contains the lines " - "corresponding to an audio file.") + "corresponding to an audio file. In the case of being a list of files, the file " + "stem must match a corresponding reference for an audio file.") parser.add_argument( "--audio-definition", "-a", type=str, default=None, help="Path to the yaml file containing the segment-level audio information.") From 4e2c1d369a98b05ec3a8bbdd3c9d54f7d4af78a9 Mon Sep 17 00:00:00 2001 From: Victor Agostinelli <35327788+agostinv@users.noreply.github.com> Date: Tue, 3 Mar 2026 11:19:53 -0800 Subject: [PATCH 3/7] Update README.md with suggested change Co-authored-by: Marco Gaido --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b042b1e..af7e098 100644 --- a/README.md +++ b/README.md @@ -198,7 +198,7 @@ the selected metric (``--scorer``). Similarly, ``simulstream_score_quality`` evaluated the quality of the generated outputs against one (or more) reference (and transcript, only for metrics -requiring them) file(s). For metrics where an audio definition YAML is unnecessary, but a transcript +requiring them) file(s). If an audio definition YAML is not provided, but a transcript is necessary, both the reference and transcript files must have corresponding file stems (e.g., `TEXT_FILE`). From d210b03eb3b43e2f0927b52d80818a31bccd9a2e Mon Sep 17 00:00:00 2001 From: Victor Agostinelli <35327788+agostinv@users.noreply.github.com> Date: Tue, 3 Mar 2026 11:39:05 -0800 Subject: [PATCH 4/7] Update to README.md with improved clarification for quality scorer usage Updated references in README for simulstream commands and added clarification on audio definitions. --- README.md | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index af7e098..d1604e7 100644 --- a/README.md +++ b/README.md @@ -177,14 +177,15 @@ can score your speech processor by running: simulstream_score_latency --scorer stream_laal \ --eval-config config/speech_processor.yaml \ --log-file metrics.jsonl \ - --reference TEXT_FILE.tgt \ + --reference REFERENCES_FILE.tgt \ --audio-definition YAML_AUDIO_REFERENCES_DEFINITION.yaml simulstream_score_quality --scorer comet \ --eval-config config/speech_processor.yaml \ --log-file metrics.jsonl \ - --references TEXT_FILE.tgt \ - --transcripts TEXT_FILE.src + --references REFERENCES_FILE.tgt \ + --transcripts TRANSCRIPTS_FILE.src \ + --audio-definition YAML_AUDIO_REFERENCES_DEFINITION.yaml simulstream_stats --eval-config config/speech_processor.yaml \ --log-file metrics.jsonl @@ -198,9 +199,20 @@ the selected metric (``--scorer``). Similarly, ``simulstream_score_quality`` evaluated the quality of the generated outputs against one (or more) reference (and transcript, only for metrics -requiring them) file(s). If an audio definition YAML is not provided, but a transcript -is necessary, both the reference and transcript files must have corresponding file stems (e.g., -`TEXT_FILE`). +requiring them) file(s). Here, the `YAML_AUDIO_REFERENCES_DEFINITION.yaml` has the same number of entries (sentence definitions +in terms of wav file origin, offset and duration) as `REFERENCES_FILE.tgt` and `TRANSCRIPTS_FILE.src`. + +As an alternative, `simulstream_score_quality` can be run without the `--audio-definition` specification, by using a list of +files as arguments of `--references` and `--transcripts`. In this case, the name of the files (trimmed of the extension) +**must be the same** of the audio files used (i.e. the names present in `metrics.jsonl`). For instance: + +``` +simulstream_score_quality --scorer comet \ + --eval-config config/speech_processor.yaml \ + --log-file metrics.jsonl \ + --references AUDIO1.tgt,AUDIO2.tgt,AUDIO3.tgt \ + --transcripts AUDIO1.src,AUDIO2.src,AUDIO3.src +``` Lastly, ``simulstream_stats`` computes statistics like the computational cost and flickering ratio. From 100551cf2b1a7ef7404b511a2e0c4255961e60b9 Mon Sep 17 00:00:00 2001 From: Victor Agostinelli <35327788+agostinv@users.noreply.github.com> Date: Tue, 3 Mar 2026 11:43:33 -0800 Subject: [PATCH 5/7] Modify quality scorer docs to clarify usage without audio definitions YAML as suggested Updated command line arguments for the score_quality script to use new reference and transcript file names, and added an audio definition option. --- simulstream/metrics/score_quality.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/simulstream/metrics/score_quality.py b/simulstream/metrics/score_quality.py index 7208cf1..fdecb62 100644 --- a/simulstream/metrics/score_quality.py +++ b/simulstream/metrics/score_quality.py @@ -122,8 +122,20 @@ def cli_main(): $ python -m simulstream.metrics.score_quality \\ --eval-config config/speech-processor.yaml \\ --log-file metrics.jsonl \\ - --references 1.en \\ - --transcripts 1.it \\ + --references ref.en \\ + --transcripts src.it \\ + --audio-definition audio_def.yaml \\ + --scorer sacrebleu + + Otherwise, the script can be invoked without specifying the `--audio-definition`, but in this case + the name of the refererence and transcript files (trimmed of the extension) must be the same + of the audio files used (i.e. the names present in `metrics.jsonl`), e.g.: + + $ python -m simulstream.metrics.score_quality \\ + --eval-config config/speech-processor.yaml \\ + --log-file metrics.jsonl \\ + --references 1.en,2.en \\ + --transcripts 1.it,2.it \\ --scorer sacrebleu """ LOGGER.info(f"Simulstream version: {simulstream.__version__}") From 788969e056359b383285048169de13ddf6d54ad2 Mon Sep 17 00:00:00 2001 From: Victor Agostinelli <35327788+agostinv@users.noreply.github.com> Date: Tue, 3 Mar 2026 11:47:36 -0800 Subject: [PATCH 6/7] Modified scorer docs to respect linter column/width limit --- simulstream/metrics/score_quality.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/simulstream/metrics/score_quality.py b/simulstream/metrics/score_quality.py index fdecb62..8f9d475 100644 --- a/simulstream/metrics/score_quality.py +++ b/simulstream/metrics/score_quality.py @@ -127,9 +127,10 @@ def cli_main(): --audio-definition audio_def.yaml \\ --scorer sacrebleu - Otherwise, the script can be invoked without specifying the `--audio-definition`, but in this case - the name of the refererence and transcript files (trimmed of the extension) must be the same - of the audio files used (i.e. the names present in `metrics.jsonl`), e.g.: + Otherwise, the script can be invoked without specifying the `--audio-definition`, + but in this case the name of the refererence and transcript files (trimmed of + the extension) must be the same of the audio files used (i.e. the names present + in `metrics.jsonl`), e.g.: $ python -m simulstream.metrics.score_quality \\ --eval-config config/speech-processor.yaml \\ From ad68791de974a8b73c543b54915fe608f2535056 Mon Sep 17 00:00:00 2001 From: Victor Agostinelli <35327788+agostinv@users.noreply.github.com> Date: Tue, 3 Mar 2026 11:53:38 -0800 Subject: [PATCH 7/7] Removed trailing white space in quality scorer --- simulstream/metrics/score_quality.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/simulstream/metrics/score_quality.py b/simulstream/metrics/score_quality.py index 8f9d475..6479f98 100644 --- a/simulstream/metrics/score_quality.py +++ b/simulstream/metrics/score_quality.py @@ -127,9 +127,9 @@ def cli_main(): --audio-definition audio_def.yaml \\ --scorer sacrebleu - Otherwise, the script can be invoked without specifying the `--audio-definition`, - but in this case the name of the refererence and transcript files (trimmed of - the extension) must be the same of the audio files used (i.e. the names present + Otherwise, the script can be invoked without specifying the `--audio-definition`, + but in this case the name of the refererence and transcript files (trimmed of + the extension) must be the same of the audio files used (i.e. the names present in `metrics.jsonl`), e.g.: $ python -m simulstream.metrics.score_quality \\