From b472fcde960aab4d00a9f028ca132080d68498af Mon Sep 17 00:00:00 2001
From: Victor Agostinelli <35327788+agostinv@users.noreply.github.com>
Date: Mon, 2 Mar 2026 10:28:36 -0800
Subject: [PATCH 1/7] Tiny update to scorer example in README.md to work out of
 the box + provide some minimal context

Updated reference and transcript file names in README.
---
 README.md | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 27aaf41..b042b1e 100644
--- a/README.md
+++ b/README.md
@@ -177,14 +177,14 @@ can score your speech processor by running:
 simulstream_score_latency --scorer stream_laal \
     --eval-config config/speech_processor.yaml \
     --log-file metrics.jsonl \
-    --reference REFERENCE_FILE.txt \
+    --reference TEXT_FILE.tgt \
     --audio-definition YAML_AUDIO_REFERENCES_DEFINITION.yaml
 
 simulstream_score_quality --scorer comet \
     --eval-config config/speech_processor.yaml \
     --log-file metrics.jsonl \
-    --references REFERENCES_FILE.txt \
-    --transcripts TRANSCRIPTS_FILE.txt
+    --references TEXT_FILE.tgt \
+    --transcripts TEXT_FILE.src
 
 simulstream_stats --eval-config config/speech_processor.yaml \
     --log-file metrics.jsonl
@@ -198,7 +198,9 @@ the selected metric (``--scorer``).
 
 Similarly, ``simulstream_score_quality`` evaluated the quality
 of the generated outputs against one (or more) reference (and transcript, only for metrics
-requiring them) file(s).
+requiring them) file(s). For metrics where an audio definition YAML is unnecessary, but a transcript
+is necessary, both the reference and transcript files must have corresponding file stems (e.g.,
+`TEXT_FILE`).
 
 Lastly, ``simulstream_stats`` computes statistics like the computational cost and flickering ratio.
 

From 55aae5cc9e25619b1c26a9d6efd695b1002b7aef Mon Sep 17 00:00:00 2001
From: Victor Agostinelli <35327788+agostinv@users.noreply.github.com>
Date: Mon, 2 Mar 2026 10:32:07 -0800
Subject: [PATCH 2/7] Tiny modification to score_quality.py documentation to
 clarify usage

Updated reference and transcript file names in the usage example and adjusted help text for clarity.
---
 simulstream/metrics/score_quality.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/simulstream/metrics/score_quality.py b/simulstream/metrics/score_quality.py
index 0a621d3..7208cf1 100644
--- a/simulstream/metrics/score_quality.py
+++ b/simulstream/metrics/score_quality.py
@@ -122,8 +122,8 @@ def cli_main():
         $ python -m simulstream.metrics.score_quality \\
             --eval-config config/speech-processor.yaml \\
             --log-file metrics.jsonl \\
-            --references ref.en \\
-            --transcripts src.it \\
+            --references 1.en \\
+            --transcripts 1.it \\
             --scorer sacrebleu
     """
     LOGGER.info(f"Simulstream version: {simulstream.__version__}")
@@ -140,14 +140,17 @@ def cli_main():
              "specified, this should be a single file containing all the lines of the audios in "
              "the reference, which should be of the same length of the audio definition. "
              "Otherwise, this should be a list of files, where each contains the lines "
-             "corresponding to an audio file.")
+             "corresponding to an audio file. In the case of being a list of files, the file "
+             "stem must match a corresponding transcript for an audio file (if applicable "
+             "to the quality metric).")
     parser.add_argument(
         "--transcripts", nargs="+", type=str,
         help="Path to the textual files containing reference transcripts. If `--audio-definition` "
              "is specified, this should be a single file containing all the lines of the audios "
              "in the reference, which should be of the same length of the audio definition. "
              "Otherwise, this should be a list of files, where each contains the lines "
-             "corresponding to an audio file.")
+             "corresponding to an audio file. In the case of being a list of files, the file "
+             "stem must match a corresponding reference for an audio file.")
     parser.add_argument(
         "--audio-definition", "-a", type=str, default=None,
         help="Path to the yaml file containing the segment-level audio information.")

From 4e2c1d369a98b05ec3a8bbdd3c9d54f7d4af78a9 Mon Sep 17 00:00:00 2001
From: Victor Agostinelli <35327788+agostinv@users.noreply.github.com>
Date: Tue, 3 Mar 2026 11:19:53 -0800
Subject: [PATCH 3/7] Update README.md with suggested change

Co-authored-by: Marco Gaido <marcogaido91@gmail.com>
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index b042b1e..af7e098 100644
--- a/README.md
+++ b/README.md
@@ -198,7 +198,7 @@ the selected metric (``--scorer``).
 
 Similarly, ``simulstream_score_quality`` evaluated the quality
 of the generated outputs against one (or more) reference (and transcript, only for metrics
-requiring them) file(s). For metrics where an audio definition YAML is unnecessary, but a transcript
+requiring them) file(s). If an audio definition YAML is not provided, but a transcript
 is necessary, both the reference and transcript files must have corresponding file stems (e.g.,
 `TEXT_FILE`).
 

From d210b03eb3b43e2f0927b52d80818a31bccd9a2e Mon Sep 17 00:00:00 2001
From: Victor Agostinelli <35327788+agostinv@users.noreply.github.com>
Date: Tue, 3 Mar 2026 11:39:05 -0800
Subject: [PATCH 4/7] Update to README.md with improved clarification for
 quality scorer usage

Updated references in README for simulstream commands and added clarification on audio definitions.
---
 README.md | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index af7e098..d1604e7 100644
--- a/README.md
+++ b/README.md
@@ -177,14 +177,15 @@ can score your speech processor by running:
 simulstream_score_latency --scorer stream_laal \
     --eval-config config/speech_processor.yaml \
     --log-file metrics.jsonl \
-    --reference TEXT_FILE.tgt \
+    --reference REFERENCES_FILE.tgt \
     --audio-definition YAML_AUDIO_REFERENCES_DEFINITION.yaml
 
 simulstream_score_quality --scorer comet \
     --eval-config config/speech_processor.yaml \
     --log-file metrics.jsonl \
-    --references TEXT_FILE.tgt \
-    --transcripts TEXT_FILE.src
+    --references REFERENCES_FILE.tgt \
+    --transcripts TRANSCRIPTS_FILE.src \
+    --audio-definition YAML_AUDIO_REFERENCES_DEFINITION.yaml
 
 simulstream_stats --eval-config config/speech_processor.yaml \
     --log-file metrics.jsonl
@@ -198,9 +199,20 @@ the selected metric (``--scorer``).
 
 Similarly, ``simulstream_score_quality`` evaluated the quality
 of the generated outputs against one (or more) reference (and transcript, only for metrics
-requiring them) file(s). If an audio definition YAML is not provided, but a transcript
-is necessary, both the reference and transcript files must have corresponding file stems (e.g.,
-`TEXT_FILE`).
+requiring them) file(s). Here, the `YAML_AUDIO_REFERENCES_DEFINITION.yaml` has the same number of entries (sentence definitions
+in terms of wav file origin, offset and duration) as `REFERENCES_FILE.tgt` and `TRANSCRIPTS_FILE.src`.
+
+As an alternative, `simulstream_score_quality` can be run without the `--audio-definition` specification, by using a list of 
+files as arguments of `--references` and `--transcripts`. In this case, the name of the files (trimmed of the extension) 
+**must be the same** of the audio files used (i.e. the names present in `metrics.jsonl`). For instance:
+
+```
+simulstream_score_quality --scorer comet \
+    --eval-config config/speech_processor.yaml \
+    --log-file metrics.jsonl \
+    --references AUDIO1.tgt,AUDIO2.tgt,AUDIO3.tgt \
+    --transcripts AUDIO1.src,AUDIO2.src,AUDIO3.src
+```
 
 Lastly, ``simulstream_stats`` computes statistics like the computational cost and flickering ratio.
 

From 100551cf2b1a7ef7404b511a2e0c4255961e60b9 Mon Sep 17 00:00:00 2001
From: Victor Agostinelli <35327788+agostinv@users.noreply.github.com>
Date: Tue, 3 Mar 2026 11:43:33 -0800
Subject: [PATCH 5/7] Modify quality scorer docs to clarify usage without audio
 definitions YAML as suggested

Updated command line arguments for the score_quality script to use new reference and transcript file names, and added an audio definition option.
---
 simulstream/metrics/score_quality.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/simulstream/metrics/score_quality.py b/simulstream/metrics/score_quality.py
index 7208cf1..fdecb62 100644
--- a/simulstream/metrics/score_quality.py
+++ b/simulstream/metrics/score_quality.py
@@ -122,8 +122,20 @@ def cli_main():
         $ python -m simulstream.metrics.score_quality \\
             --eval-config config/speech-processor.yaml \\
             --log-file metrics.jsonl \\
-            --references 1.en \\
-            --transcripts 1.it \\
+            --references ref.en \\
+            --transcripts src.it \\
+            --audio-definition audio_def.yaml \\
+            --scorer sacrebleu
+
+    Otherwise, the script can be invoked without specifying the `--audio-definition`, but in this case
+    the name of the refererence and transcript files (trimmed of the extension) must be the same
+    of the audio files used (i.e. the names present in `metrics.jsonl`), e.g.:
+
+        $ python -m simulstream.metrics.score_quality \\
+            --eval-config config/speech-processor.yaml \\
+            --log-file metrics.jsonl \\
+            --references 1.en,2.en \\
+            --transcripts 1.it,2.it \\
             --scorer sacrebleu
     """
     LOGGER.info(f"Simulstream version: {simulstream.__version__}")

From 788969e056359b383285048169de13ddf6d54ad2 Mon Sep 17 00:00:00 2001
From: Victor Agostinelli <35327788+agostinv@users.noreply.github.com>
Date: Tue, 3 Mar 2026 11:47:36 -0800
Subject: [PATCH 6/7] Modified scorer docs to respect linter column/width limit

---
 simulstream/metrics/score_quality.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/simulstream/metrics/score_quality.py b/simulstream/metrics/score_quality.py
index fdecb62..8f9d475 100644
--- a/simulstream/metrics/score_quality.py
+++ b/simulstream/metrics/score_quality.py
@@ -127,9 +127,10 @@ def cli_main():
             --audio-definition audio_def.yaml \\
             --scorer sacrebleu
 
-    Otherwise, the script can be invoked without specifying the `--audio-definition`, but in this case
-    the name of the refererence and transcript files (trimmed of the extension) must be the same
-    of the audio files used (i.e. the names present in `metrics.jsonl`), e.g.:
+    Otherwise, the script can be invoked without specifying the `--audio-definition`, 
+    but in this case the name of the refererence and transcript files (trimmed of 
+    the extension) must be the same of the audio files used (i.e. the names present 
+    in `metrics.jsonl`), e.g.:
 
         $ python -m simulstream.metrics.score_quality \\
             --eval-config config/speech-processor.yaml \\

From ad68791de974a8b73c543b54915fe608f2535056 Mon Sep 17 00:00:00 2001
From: Victor Agostinelli <35327788+agostinv@users.noreply.github.com>
Date: Tue, 3 Mar 2026 11:53:38 -0800
Subject: [PATCH 7/7] Removed trailing white space in quality scorer

---
 simulstream/metrics/score_quality.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/simulstream/metrics/score_quality.py b/simulstream/metrics/score_quality.py
index 8f9d475..6479f98 100644
--- a/simulstream/metrics/score_quality.py
+++ b/simulstream/metrics/score_quality.py
@@ -127,9 +127,9 @@ def cli_main():
             --audio-definition audio_def.yaml \\
             --scorer sacrebleu
 
-    Otherwise, the script can be invoked without specifying the `--audio-definition`, 
-    but in this case the name of the refererence and transcript files (trimmed of 
-    the extension) must be the same of the audio files used (i.e. the names present 
+    Otherwise, the script can be invoked without specifying the `--audio-definition`,
+    but in this case the name of the refererence and transcript files (trimmed of
+    the extension) must be the same of the audio files used (i.e. the names present
     in `metrics.jsonl`), e.g.:
 
         $ python -m simulstream.metrics.score_quality \\