From ade05b6c4ee4c7228be2569ec274c15a53f8109b Mon Sep 17 00:00:00 2001
From: al8n <scygliu1@gmail.com>
Date: Thu, 12 Mar 2026 01:43:58 +0800
Subject: [PATCH 01/20] bench: add benchmarks and results

---
 Cargo.toml     |   5 +++
 README.md      |  28 +++++++++++++
 benches/srt.rs |  85 ++++++++++++++++++++++++++++++++++++-
 benches/vtt.rs | 111 +++++++++++++++++++++++++++++++++++++++++++++++++
 src/srt.rs     |  78 ++++++++++++++++++++--------------
 src/vtt.rs     | 109 ++++++++++++++++++++++++++++--------------------
 6 files changed, 340 insertions(+), 76 deletions(-)
 create mode 100644 benches/vtt.rs

diff --git a/Cargo.toml b/Cargo.toml
index 6f6464b..3189008 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -18,6 +18,11 @@ path = "benches/srt.rs"
 name = "srt"
 harness = false
 
+[[bench]]
+path = "benches/vtt.rs"
+name = "vtt"
+harness = false
+
 [features]
 default = ["std"]
 alloc = ["memchr"]
diff --git a/README.md b/README.md
index 45e9e6a..50300a5 100644
--- a/README.md
+++ b/README.md
@@ -70,6 +70,34 @@ fasrt = "0.2"
 | `alloc`  | No      | Enables `CueText` DOM tree and entity decoding without `std` |
 | `memchr` | Yes (via `alloc`/`std`) | SIMD-accelerated fast path for entity decoding |
 
+## Benchmarks
+
+Measured on Apple Silicon with `cargo bench` (Criterion).
+
+### SRT
+
+| Benchmark | Input | Time | Throughput |
+|-----------|-------|------|------------|
+| Parse (strict) | 2 cues, 89 B | ~316 ns | 281 MiB/s |
+| Parse (strict) | 26 KB file | ~39 µs | 645 MiB/s |
+| Parse (lossy) | 332 files, ~8 MB | ~12.5 ms | 624 MiB/s |
+| Collect into `Vec` | 26 KB file | ~41 µs | 606 MiB/s |
+
+### WebVTT
+
+| Benchmark | Input | Time | Throughput |
+|-----------|-------|------|------------|
+| Parse | 2 cues, 96 B | ~299 ns | 309 MiB/s |
+| Parse | Settings + region + style, 354 B | ~829 ns | 427 MiB/s |
+| Parse | All WPT fixtures, ~34 KB | ~103 µs | 342 MiB/s |
+| Collect into `Vec` | Settings + region + style, 354 B | ~1.08 µs | 329 MiB/s |
+
+Run benchmarks yourself:
+
+```sh
+cargo bench
+```
+
 #### License
 
 `fasrt` is under the terms of both the MIT license and the
diff --git a/benches/srt.rs b/benches/srt.rs
index f328e4d..977ecc8 100644
--- a/benches/srt.rs
+++ b/benches/srt.rs
@@ -1 +1,84 @@
-fn main() {}
+use std::hint::black_box;
+
+use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main};
+use fasrt::srt::Parser;
+
+const SMALL_SRT: &str = "\
+1
+00:00:01,000 --> 00:00:04,000
+Hello world!
+
+2
+00:00:05,000 --> 00:00:08,000
+Goodbye world!
+";
+
+const MEDIUM_SRT: &str = include_str!("../fixtures/srt/DeathNote_01.eng.srt");
+
+fn load_all_fixtures() -> String {
+  let mut buf = String::new();
+  for entry in std::fs::read_dir("fixtures/srt").unwrap() {
+    let entry = entry.unwrap();
+    if entry.path().extension().is_some_and(|e| e == "srt") {
+      if let Ok(content) = std::fs::read_to_string(entry.path()) {
+        buf.push_str(&content);
+      }
+    }
+  }
+  buf
+}
+
+fn bench_srt_parse(c: &mut Criterion) {
+  let all_fixtures = load_all_fixtures();
+
+  let mut group = c.benchmark_group("srt/parse");
+
+  // Small inline SRT
+  group.throughput(Throughput::Bytes(SMALL_SRT.len() as u64));
+  group.bench_function(BenchmarkId::new("strict", "small_2_cues"), |b| {
+    b.iter(|| {
+      let count = Parser::strict(black_box(SMALL_SRT)).count();
+      black_box(count);
+    });
+  });
+
+  // Medium real file (~26 KB)
+  group.throughput(Throughput::Bytes(MEDIUM_SRT.len() as u64));
+  group.bench_function(BenchmarkId::new("strict", "medium_26kb"), |b| {
+    b.iter(|| {
+      let count = Parser::strict(black_box(MEDIUM_SRT)).count();
+      black_box(count);
+    });
+  });
+
+  // All fixtures (~8 MB)
+  group.throughput(Throughput::Bytes(all_fixtures.len() as u64));
+  group.bench_function(BenchmarkId::new("lossy", "all_fixtures_8mb"), |b| {
+    b.iter(|| {
+      let count = Parser::lossy(black_box(&all_fixtures)).count();
+      black_box(count);
+    });
+  });
+
+  group.finish();
+}
+
+fn bench_srt_collect(c: &mut Criterion) {
+  let mut group = c.benchmark_group("srt/collect");
+
+  // Collect into Vec to measure allocation overhead
+  group.throughput(Throughput::Bytes(MEDIUM_SRT.len() as u64));
+  group.bench_function("medium_26kb", |b| {
+    b.iter(|| {
+      let entries: Vec<_> = Parser::strict(black_box(MEDIUM_SRT))
+        .collect::<Result<_, _>>()
+        .unwrap();
+      black_box(entries.len());
+    });
+  });
+
+  group.finish();
+}
+
+criterion_group!(benches, bench_srt_parse, bench_srt_collect);
+criterion_main!(benches);
diff --git a/benches/vtt.rs b/benches/vtt.rs
new file mode 100644
index 0000000..229a32b
--- /dev/null
+++ b/benches/vtt.rs
@@ -0,0 +1,111 @@
+use std::hint::black_box;
+
+use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main};
+use fasrt::vtt::Parser;
+
+const SMALL_VTT: &str = "\
+WEBVTT
+
+00:00:00.000 --> 00:00:01.000
+Hello world!
+
+00:00:01.000 --> 00:00:02.000
+Goodbye world!
+";
+
+const SETTINGS_VTT: &str = "\
+WEBVTT
+
+STYLE
+::cue { color: white; }
+
+REGION
+id:region1
+width:40%
+lines:3
+regionanchor:0%,100%
+viewportanchor:10%,90%
+scroll:up
+
+cue-1
+00:00:00.000 --> 00:00:01.000 align:start position:10% size:80% line:0 vertical:rl region:region1
+<b>Bold</b> and <i>italic</i> text
+
+NOTE This is a comment
+
+00:00:01.000 --> 00:00:05.000
+Second cue with <v Roger Bingham>voice tag</v>
+";
+
+fn load_all_vtt_fixtures() -> String {
+  let mut buf = String::new();
+  for dir in &[
+    "fixtures/webvtt/wpt-file-parsing",
+    "fixtures/webvtt/wpt-cue-parsing",
+  ] {
+    if let Ok(entries) = std::fs::read_dir(dir) {
+      for entry in entries {
+        let entry = entry.unwrap();
+        if entry.path().extension().is_some_and(|e| e == "vtt") {
+          buf.push_str(&std::fs::read_to_string(entry.path()).unwrap());
+          buf.push_str("\n\n");
+        }
+      }
+    }
+  }
+  buf
+}
+
+fn bench_vtt_parse(c: &mut Criterion) {
+  let all_fixtures = load_all_vtt_fixtures();
+
+  let mut group = c.benchmark_group("vtt/parse");
+
+  // Small inline VTT
+  group.throughput(Throughput::Bytes(SMALL_VTT.len() as u64));
+  group.bench_function(BenchmarkId::new("parse", "small_2_cues"), |b| {
+    b.iter(|| {
+      let count = Parser::new(black_box(SMALL_VTT)).count();
+      black_box(count);
+    });
+  });
+
+  // VTT with settings, regions, styles, cue options
+  group.throughput(Throughput::Bytes(SETTINGS_VTT.len() as u64));
+  group.bench_function(BenchmarkId::new("parse", "with_settings"), |b| {
+    b.iter(|| {
+      let count = Parser::new(black_box(SETTINGS_VTT)).count();
+      black_box(count);
+    });
+  });
+
+  // All VTT fixtures
+  group.throughput(Throughput::Bytes(all_fixtures.len() as u64));
+  group.bench_function(BenchmarkId::new("parse", "all_fixtures"), |b| {
+    b.iter(|| {
+      let count = Parser::new(black_box(&all_fixtures)).count();
+      black_box(count);
+    });
+  });
+
+  group.finish();
+}
+
+fn bench_vtt_collect(c: &mut Criterion) {
+  let mut group = c.benchmark_group("vtt/collect");
+
+  group.throughput(Throughput::Bytes(SETTINGS_VTT.len() as u64));
+  group.bench_function("with_settings", |b| {
+    b.iter(|| {
+      let blocks: Vec<_> = Parser::new(black_box(SETTINGS_VTT))
+        .collect::<Result<_, _>>()
+        .unwrap();
+      black_box(blocks.len());
+    });
+  });
+
+  group.finish();
+}
+
+criterion_group!(benches, bench_vtt_parse, bench_vtt_collect);
+criterion_main!(benches);
diff --git a/src/srt.rs b/src/srt.rs
index 29291a2..841e726 100644
--- a/src/srt.rs
+++ b/src/srt.rs
@@ -3,6 +3,7 @@ use core::num::NonZeroU64;
 use logos::{Lexer, Logos};
 
 use crate::error::*;
+use crate::types::{Millisecond, Minute, Second};
 
 pub use types::{Entry, Header, Hour, Timestamp};
 
@@ -322,35 +323,45 @@ fn parse_number(s: &mut Lexer<'_, Token>) -> Result<NonZeroU64, ParseSrtError> {
 
 #[inline]
 fn parse_header(s: &mut Lexer<'_, Token>) -> Result<Header, ParseSrtError> {
-  let s = s.slice().trim();
-  let mut parts = s.split(" --> ");
-  match (parts.next(), parts.next()) {
-    (Some(start), Some(end)) => {
-      let start = parse_timestamp(start)?;
-      let end = parse_timestamp(end)?;
-      Ok(Header::new(start, end))
-    }
-    _ => panic!("logos regex should guarantee this never happens"),
-  }
+  let slice = s.slice();
+  // The regex guarantees `H+:MM:SS,mmm <ws> --> <ws> H+:MM:SS,mmm`.
+  // Find the arrow separator and extract both timestamps via byte arithmetic.
+  let arrow = slice.find("-->").unwrap();
+  let start_str = slice[..arrow].trim();
+  let end_str = slice[arrow + 3..].trim();
+  let start = parse_timestamp_bytes(start_str.as_bytes())?;
+  let end = parse_timestamp_bytes(end_str.as_bytes())?;
+  Ok(Header::new(start, end))
 }
 
+/// Parse an SRT timestamp from raw bytes using direct digit extraction.
+///
+/// Format: `H+:MM:SS,mmm` where H is 2-3 digits (regex-validated),
+/// MM and SS are exactly 2 digits, mmm is exactly 3 digits.
+/// Layout from the end: `mmm`(3) `,`(1) `SS`(2) `:`(1) `MM`(2) `:`(1) = 10 fixed bytes.
 #[inline]
-fn parse_timestamp(s: &str) -> Result<Timestamp, ParseSrtError> {
-  let mut parts = s.split(",");
-
-  match (parts.next(), parts.next()) {
-    (Some(hms), Some(millis)) => {
-      let mut hms_parts = hms.split(':');
-
-      let (h, m, s) = match (hms_parts.next(), hms_parts.next(), hms_parts.next()) {
-        (Some(h), Some(m), Some(s)) => (h.parse()?, m.parse()?, s.parse()?),
-        _ => panic!("logos regex should guarantee this never happens"),
-      };
-      let millis = millis.parse()?;
-      Ok(Timestamp::from_hmsm(h, m, s, millis))
-    }
-    _ => panic!("logos regex should guarantee this never happens"),
-  }
+fn parse_timestamp_bytes(b: &[u8]) -> Result<Timestamp, ParseSrtError> {
+  let len = b.len();
+  let millis = Millisecond(digit3(&b[len - 3..]));
+  let seconds = Second(digit2(&b[len - 6..len - 4]));
+  let minutes = Minute(digit2(&b[len - 9..len - 7]));
+  let hour_len = len - 10;
+  let hours = match hour_len {
+    2 => Hour(digit2(&b[..2]) as u16),
+    3 => Hour(digit3(&b[..3])),
+    _ => return Err(ParseHourError::NotPadded.into()),
+  };
+  Ok(Timestamp::from_hmsm(hours, minutes, seconds, millis))
+}
+
+#[cfg_attr(not(tarpaulin), inline(always))]
+const fn digit2(b: &[u8]) -> u8 {
+  (b[0] - b'0') * 10 + (b[1] - b'0')
+}
+
+#[cfg_attr(not(tarpaulin), inline(always))]
+const fn digit3(b: &[u8]) -> u16 {
+  (b[0] - b'0') as u16 * 100 + (b[1] - b'0') as u16 * 10 + (b[2] - b'0') as u16
 }
 
 struct StateBody {
@@ -736,18 +747,23 @@ impl<'a> Iterator for Lines<'a> {
       return None;
     }
 
-    let remaining = &self.input[self.pos..];
-    let line_end = remaining
-      .find('\n')
+    let bytes = &self.input.as_bytes()[self.pos..];
+
+    #[cfg(feature = "memchr")]
+    let found = memchr::memchr(b'\n', bytes);
+    #[cfg(not(feature = "memchr"))]
+    let found = bytes.iter().position(|&b| b == b'\n');
+
+    let line_end = found
       .map(|i| {
-        let end = if i > 0 && remaining.as_bytes()[i - 1] == b'\r' {
+        let end = if i > 0 && bytes[i - 1] == b'\r' {
           i - 1
         } else {
           i
         };
         (end, i + 1)
       })
-      .unwrap_or((remaining.len(), remaining.len()));
+      .unwrap_or((bytes.len(), bytes.len()));
 
     let line = &self.input[self.pos..self.pos + line_end.0];
     self.pos += line_end.1;
diff --git a/src/vtt.rs b/src/vtt.rs
index 46104dd..ec8ab48 100644
--- a/src/vtt.rs
+++ b/src/vtt.rs
@@ -1,6 +1,9 @@
 use logos::{Lexer, Logos};
 
-use crate::{error::*, types::Millisecond};
+use crate::{
+  error::*,
+  types::{Millisecond, Minute, Second},
+};
 
 pub use types::{
   Align, Anchor, Block, Cue, CueId, CueOptions, Header, Hour, Line, LineAlign, LineValue,
@@ -104,36 +107,55 @@ fn split_arrow(s: &str) -> Result<(&str, &str), ParseVttError> {
 }
 
 /// Parse a timestamp in either long form `HH:MM:SS.mmm` or short form
-/// `MM:SS.mmm`. Auto-detects by counting `:` separators.
+/// `MM:SS.mmm` using direct byte extraction.
+///
+/// The logos regex guarantees the format, so we can extract fields by
+/// fixed offsets from the end: `mmm`(3) `.`(1) `SS`(2) `:`(1) `MM`(2)
+/// = 9 fixed bytes. If there's a `:`(1) before that, everything before
+/// it is the hours component.
 #[inline]
 pub(crate) fn parse_timestamp(s: &str) -> Result<Timestamp, ParseVttError> {
-  let (time, ms) = s
-    .split_once('.')
-    .ok_or(ParseVttError::InvalidTimestamp("missing '.'"))?;
-  let millis: Millisecond = ms.parse()?;
-  let mut parts = time.splitn(3, ':');
-  let first = parts
-    .next()
-    .ok_or(ParseVttError::InvalidTimestamp("empty timestamp"))?;
-  let second = parts.next().ok_or(ParseVttError::InvalidTimestamp(
-    "missing minutes or seconds",
-  ))?;
-  match parts.next() {
-    // Long form: HH:MM:SS
-    Some(sec_str) => Ok(Timestamp::from_hmsm(
-      first.parse()?,
-      second.parse()?,
-      sec_str.parse()?,
-      millis,
-    )),
-    // Short form: MM:SS
-    None => Ok(Timestamp::from_hmsm(
-      Hour::new(),
-      first.parse()?,
-      second.parse()?,
-      millis,
-    )),
+  let b = s.as_bytes();
+  let len = b.len();
+  if len < 9 {
+    return Err(ParseVttError::InvalidTimestamp("too short"));
   }
+  let millis = Millisecond(vtt_digit3(&b[len - 3..]));
+  // b[len-4] == b'.'
+  let seconds = Second(vtt_digit2(&b[len - 6..len - 4]));
+  // b[len-7] == b':'
+  let minutes = Minute(vtt_digit2(&b[len - 9..len - 7]));
+  let hours = if len > 9 {
+    // Long form: has hours before the second ':'
+    // b[len-10] == b':'
+    let hour_str = &s[..len - 10];
+    // VTT hours are unbounded u64; parse the variable-length prefix
+    parse_vtt_hour_bytes(hour_str.as_bytes())
+      .map_err(|_| ParseVttError::InvalidTimestamp("invalid hours"))?
+  } else {
+    Hour::new()
+  };
+  Ok(Timestamp::from_hmsm(hours, minutes, seconds, millis))
+}
+
+/// Parse VTT hour bytes (variable-length, unbounded u64).
+#[cfg_attr(not(tarpaulin), inline(always))]
+fn parse_vtt_hour_bytes(b: &[u8]) -> Result<Hour, ParseHourError> {
+  let mut val: u64 = 0;
+  for &byte in b {
+    val = val * 10 + (byte - b'0') as u64;
+  }
+  Ok(Hour(val))
+}
+
+#[cfg_attr(not(tarpaulin), inline(always))]
+const fn vtt_digit2(b: &[u8]) -> u8 {
+  (b[0] - b'0') * 10 + (b[1] - b'0')
+}
+
+#[cfg_attr(not(tarpaulin), inline(always))]
+const fn vtt_digit3(b: &[u8]) -> u16 {
+  (b[0] - b'0') as u16 * 100 + (b[1] - b'0') as u16 * 10 + (b[2] - b'0') as u16
 }
 
 /// Lex the next token from a line. Returns `Ok(None)` when the line
@@ -1253,28 +1275,27 @@ impl<'a> Iterator for Lines<'a> {
     }
 
     let bytes = &self.input.as_bytes()[self.pos..];
-    let mut i = 0;
-    while i < bytes.len() {
-      if bytes[i] == b'\n' {
-        let line = &self.input[self.pos..self.pos + i];
-        self.pos += i + 1;
-        return Some(line);
-      } else if bytes[i] == b'\r' {
+
+    #[cfg(feature = "memchr")]
+    let found = memchr::memchr2(b'\n', b'\r', bytes);
+    #[cfg(not(feature = "memchr"))]
+    let found = bytes.iter().position(|&b| b == b'\n' || b == b'\r');
+
+    match found {
+      Some(i) => {
         let line = &self.input[self.pos..self.pos + i];
-        // Check for CRLF
-        if i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
+        if bytes[i] == b'\r' && i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
           self.pos += i + 2;
         } else {
           self.pos += i + 1;
         }
-        return Some(line);
+        Some(line)
+      }
+      None => {
+        let line = &self.input[self.pos..];
+        self.pos = self.input.len();
+        Some(line)
       }
-      i += 1;
     }
-
-    // No line terminator found - rest of input
-    let line = &self.input[self.pos..];
-    self.pos = self.input.len();
-    Some(line)
   }
 }

From 1f89be3ae95041a02c7b8e484b977b84348f7d5f Mon Sep 17 00:00:00 2001
From: al8n <scygliu1@gmail.com>
Date: Thu, 12 Mar 2026 02:11:11 +0800
Subject: [PATCH 02/20] fix comments

---
 benches/srt.rs |  3 +++
 src/srt.rs     |  1 +
 src/vtt.rs     | 24 +++++++++++++++++++-----
 3 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/benches/srt.rs b/benches/srt.rs
index 977ecc8..19ef303 100644
--- a/benches/srt.rs
+++ b/benches/srt.rs
@@ -21,6 +21,9 @@ fn load_all_fixtures() -> String {
     let entry = entry.unwrap();
     if entry.path().extension().is_some_and(|e| e == "srt") {
       if let Ok(content) = std::fs::read_to_string(entry.path()) {
+        if !buf.is_empty() {
+          buf.push_str("\n\n");
+        }
         buf.push_str(&content);
       }
     }
diff --git a/src/srt.rs b/src/srt.rs
index 841e726..e55bd61 100644
--- a/src/srt.rs
+++ b/src/srt.rs
@@ -347,6 +347,7 @@ fn parse_timestamp_bytes(b: &[u8]) -> Result<Timestamp, ParseSrtError> {
   let minutes = Minute(digit2(&b[len - 9..len - 7]));
   let hour_len = len - 10;
   let hours = match hour_len {
+    1 => Hour((b[0] - b'0') as u16),
     2 => Hour(digit2(&b[..2]) as u16),
     3 => Hour(digit3(&b[..3])),
     _ => return Err(ParseHourError::NotPadded.into()),
diff --git a/src/vtt.rs b/src/vtt.rs
index ec8ab48..292a63d 100644
--- a/src/vtt.rs
+++ b/src/vtt.rs
@@ -120,14 +120,19 @@ pub(crate) fn parse_timestamp(s: &str) -> Result<Timestamp, ParseVttError> {
   if len < 9 {
     return Err(ParseVttError::InvalidTimestamp("too short"));
   }
+  // Validate expected separators before doing byte arithmetic.
+  // This guards against malformed input from cue-text parsing where
+  // the regex only loosely validates the format.
+  if b[len - 4] != b'.' || b[len - 7] != b':' {
+    return Err(ParseVttError::InvalidTimestamp("invalid format"));
+  }
   let millis = Millisecond(vtt_digit3(&b[len - 3..]));
-  // b[len-4] == b'.'
   let seconds = Second(vtt_digit2(&b[len - 6..len - 4]));
-  // b[len-7] == b':'
   let minutes = Minute(vtt_digit2(&b[len - 9..len - 7]));
   let hours = if len > 9 {
-    // Long form: has hours before the second ':'
-    // b[len-10] == b':'
+    if b[len - 10] != b':' {
+      return Err(ParseVttError::InvalidTimestamp("invalid format"));
+    }
     let hour_str = &s[..len - 10];
     // VTT hours are unbounded u64; parse the variable-length prefix
     parse_vtt_hour_bytes(hour_str.as_bytes())
@@ -139,11 +144,20 @@ pub(crate) fn parse_timestamp(s: &str) -> Result<Timestamp, ParseVttError> {
 }
 
 /// Parse VTT hour bytes (variable-length, unbounded u64).
+///
+/// Uses checked arithmetic to prevent overflow on extremely large
+/// hour values from untrusted input.
 #[cfg_attr(not(tarpaulin), inline(always))]
 fn parse_vtt_hour_bytes(b: &[u8]) -> Result<Hour, ParseHourError> {
   let mut val: u64 = 0;
   for &byte in b {
-    val = val * 10 + (byte - b'0') as u64;
+    if !byte.is_ascii_digit() {
+      return Err(ParseHourError::NotPadded);
+    }
+    val = val
+      .checked_mul(10)
+      .and_then(|v| v.checked_add((byte - b'0') as u64))
+      .ok_or(ParseHourError::NotPadded)?;
   }
   Ok(Hour(val))
 }

From 13d0edade6c809beb26e22e99f53aafaa1915df7 Mon Sep 17 00:00:00 2001
From: al8n <scygliu1@gmail.com>
Date: Thu, 12 Mar 2026 02:33:48 +0800
Subject: [PATCH 03/20] improve performance

---
 src/srt.rs |  3 +--
 src/vtt.rs | 13 ++++++++++---
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/srt.rs b/src/srt.rs
index e55bd61..9d133c3 100644
--- a/src/srt.rs
+++ b/src/srt.rs
@@ -294,7 +294,7 @@ impl Default for Options {
 enum Token {
   /// Header "HH:MM:SS,mmm --> HH:MM:SS,mmm"
   #[regex(
-    r"[0-9]{1,3}:[0-5][0-9]:[0-5][0-9],[0-9]{3}[ \t\x0C]+-->[ \t\x0C]+[0-9]{1,3}:[0-5][0-9]:[0-5][0-9],[0-9]{3}",
+    r"[0-9]{2,3}:[0-5][0-9]:[0-5][0-9],[0-9]{3}[ \t\x0C]+-->[ \t\x0C]+[0-9]{2,3}:[0-5][0-9]:[0-5][0-9],[0-9]{3}",
     parse_header
   )]
   Header(Header),
@@ -347,7 +347,6 @@ fn parse_timestamp_bytes(b: &[u8]) -> Result<Timestamp, ParseSrtError> {
   let minutes = Minute(digit2(&b[len - 9..len - 7]));
   let hour_len = len - 10;
   let hours = match hour_len {
-    1 => Hour((b[0] - b'0') as u16),
     2 => Hour(digit2(&b[..2]) as u16),
     3 => Hour(digit3(&b[..3])),
     _ => return Err(ParseHourError::NotPadded.into()),
diff --git a/src/vtt.rs b/src/vtt.rs
index 292a63d..bae0461 100644
--- a/src/vtt.rs
+++ b/src/vtt.rs
@@ -126,9 +126,16 @@ pub(crate) fn parse_timestamp(s: &str) -> Result<Timestamp, ParseVttError> {
   if b[len - 4] != b'.' || b[len - 7] != b':' {
     return Err(ParseVttError::InvalidTimestamp("invalid format"));
   }
-  let millis = Millisecond(vtt_digit3(&b[len - 3..]));
-  let seconds = Second(vtt_digit2(&b[len - 6..len - 4]));
-  let minutes = Minute(vtt_digit2(&b[len - 9..len - 7]));
+  let millis_val = vtt_digit3(&b[len - 3..]);
+  let seconds_val = vtt_digit2(&b[len - 6..len - 4]);
+  let minutes_val = vtt_digit2(&b[len - 9..len - 7]);
+  // Validate ranges — needed for cue-text path where input is not regex-validated.
+  if seconds_val >= 60 || minutes_val >= 60 {
+    return Err(ParseVttError::InvalidTimestamp("out of range"));
+  }
+  let millis = Millisecond(millis_val);
+  let seconds = Second(seconds_val);
+  let minutes = Minute(minutes_val);
   let hours = if len > 9 {
     if b[len - 10] != b':' {
       return Err(ParseVttError::InvalidTimestamp("invalid format"));

From a2a379e53f82da967ceacbfd51ebda48fdbedbd7 Mon Sep 17 00:00:00 2001
From: Al Liu <scygliu1@gmail.com>
Date: Thu, 12 Mar 2026 02:42:14 +0800
Subject: [PATCH 04/20] Update benches/vtt.rs

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 benches/vtt.rs | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/benches/vtt.rs b/benches/vtt.rs
index 229a32b..1c02cc2 100644
--- a/benches/vtt.rs
+++ b/benches/vtt.rs
@@ -43,16 +43,20 @@ fn load_all_vtt_fixtures() -> String {
     "fixtures/webvtt/wpt-file-parsing",
     "fixtures/webvtt/wpt-cue-parsing",
   ] {
-    if let Ok(entries) = std::fs::read_dir(dir) {
-      for entry in entries {
-        let entry = entry.unwrap();
-        if entry.path().extension().is_some_and(|e| e == "vtt") {
-          buf.push_str(&std::fs::read_to_string(entry.path()).unwrap());
-          buf.push_str("\n\n");
-        }
+    let entries = std::fs::read_dir(dir)
+      .expect("failed to read VTT fixture directory");
+    for entry in entries {
+      let entry = entry.unwrap();
+      if entry.path().extension().is_some_and(|e| e == "vtt") {
+        buf.push_str(&std::fs::read_to_string(entry.path()).unwrap());
+        buf.push_str("\n\n");
       }
     }
   }
+  assert!(
+    !buf.is_empty(),
+    "No VTT fixtures were loaded from fixtures/webvtt"
+  );
   buf
 }
 

From 30b5c4a19d1c33b255711b51811574044d689988 Mon Sep 17 00:00:00 2001
From: al8n <scygliu1@gmail.com>
Date: Thu, 12 Mar 2026 02:51:49 +0800
Subject: [PATCH 05/20] improve performance

---
 src/error.rs |  5 +++++
 src/vtt.rs   | 21 +++++++++++----------
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/src/error.rs b/src/error.rs
index 9dee2bb..f0a5437 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -50,6 +50,11 @@ pub enum ParseHourError {
   /// The hour component is out of range (not between 0-999).
   #[error("hour component must be between 0-999, but was {0}")]
   Overflow(u16),
+  /// The hour component overflowed u64 (VTT unbounded hours).
+  #[error("hour component overflowed")]
+  #[unwrap(ignore)]
+  #[try_unwrap(ignore)]
+  HourOverflow,
   /// Not a valid number.
   #[error(transparent)]
   ParseInt(#[from] ParseIntError),
diff --git a/src/vtt.rs b/src/vtt.rs
index bae0461..314984e 100644
--- a/src/vtt.rs
+++ b/src/vtt.rs
@@ -129,13 +129,14 @@ pub(crate) fn parse_timestamp(s: &str) -> Result<Timestamp, ParseVttError> {
   let millis_val = vtt_digit3(&b[len - 3..]);
   let seconds_val = vtt_digit2(&b[len - 6..len - 4]);
   let minutes_val = vtt_digit2(&b[len - 9..len - 7]);
-  // Validate ranges — needed for cue-text path where input is not regex-validated.
-  if seconds_val >= 60 || minutes_val >= 60 {
-    return Err(ParseVttError::InvalidTimestamp("out of range"));
-  }
-  let millis = Millisecond(millis_val);
-  let seconds = Second(seconds_val);
-  let minutes = Minute(minutes_val);
+  // Use try_with to validate ranges — needed for cue-text path where input
+  // is not regex-validated.
+  let millis = Millisecond::try_with(millis_val)
+    .ok_or(ParseVttError::InvalidTimestamp("milliseconds out of range"))?;
+  let seconds =
+    Second::try_with(seconds_val).ok_or(ParseVttError::InvalidTimestamp("seconds out of range"))?;
+  let minutes =
+    Minute::try_with(minutes_val).ok_or(ParseVttError::InvalidTimestamp("minutes out of range"))?;
   let hours = if len > 9 {
     if b[len - 10] != b':' {
       return Err(ParseVttError::InvalidTimestamp("invalid format"));
@@ -164,7 +165,7 @@ fn parse_vtt_hour_bytes(b: &[u8]) -> Result<Hour, ParseHourError> {
     val = val
       .checked_mul(10)
       .and_then(|v| v.checked_add((byte - b'0') as u64))
-      .ok_or(ParseHourError::NotPadded)?;
+      .ok_or(ParseHourError::HourOverflow)?;
   }
   Ok(Hour(val))
 }
@@ -1297,9 +1298,9 @@ impl<'a> Iterator for Lines<'a> {
 
     let bytes = &self.input.as_bytes()[self.pos..];
 
-    #[cfg(feature = "memchr")]
+    #[cfg(all(feature = "memchr", miri))]
     let found = memchr::memchr2(b'\n', b'\r', bytes);
-    #[cfg(not(feature = "memchr"))]
+    #[cfg(not(all(feature = "memchr", miri)))]
     let found = bytes.iter().position(|&b| b == b'\n' || b == b'\r');
 
     match found {

From cb9bee5f4a5b88685465457cfd778574823606c0 Mon Sep 17 00:00:00 2001
From: al8n <scygliu1@gmail.com>
Date: Thu, 12 Mar 2026 02:57:27 +0800
Subject: [PATCH 06/20] improve performance

---
 benches/vtt.rs | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/benches/vtt.rs b/benches/vtt.rs
index 1c02cc2..a7eb32e 100644
--- a/benches/vtt.rs
+++ b/benches/vtt.rs
@@ -43,8 +43,7 @@ fn load_all_vtt_fixtures() -> String {
     "fixtures/webvtt/wpt-file-parsing",
     "fixtures/webvtt/wpt-cue-parsing",
   ] {
-    let entries = std::fs::read_dir(dir)
-      .expect("failed to read VTT fixture directory");
+    let entries = std::fs::read_dir(dir).expect("failed to read VTT fixture directory");
     for entry in entries {
       let entry = entry.unwrap();
       if entry.path().extension().is_some_and(|e| e == "vtt") {

From da9be044cfc78f6f067f1215d245b967f93429e4 Mon Sep 17 00:00:00 2001
From: al8n <scygliu1@gmail.com>
Date: Thu, 12 Mar 2026 03:10:52 +0800
Subject: [PATCH 07/20] improve performance

---
 src/vtt.rs     | 78 +++++++++++++++++++++++++++++++++++++++++++-------
 src/vtt/cue.rs |  8 ++++--
 2 files changed, 72 insertions(+), 14 deletions(-)

diff --git a/src/vtt.rs b/src/vtt.rs
index 314984e..b499ce6 100644
--- a/src/vtt.rs
+++ b/src/vtt.rs
@@ -113,6 +113,13 @@ fn split_arrow(s: &str) -> Result<(&str, &str), ParseVttError> {
 /// fixed offsets from the end: `mmm`(3) `.`(1) `SS`(2) `:`(1) `MM`(2)
 /// = 9 fixed bytes. If there's a `:`(1) before that, everything before
 /// it is the hours component.
+///
+/// # Safety assumption
+///
+/// Callers **must** ensure that `s` has already been validated by a
+/// logos regex (which guarantees digit/separator placement).
+/// For loosely-validated input (e.g. unterminated cue-text tags), use
+/// [`parse_timestamp_cue`] instead.
 #[inline]
 pub(crate) fn parse_timestamp(s: &str) -> Result<Timestamp, ParseVttError> {
   let b = s.as_bytes();
@@ -120,17 +127,42 @@ pub(crate) fn parse_timestamp(s: &str) -> Result<Timestamp, ParseVttError> {
   if len < 9 {
     return Err(ParseVttError::InvalidTimestamp("too short"));
   }
-  // Validate expected separators before doing byte arithmetic.
-  // This guards against malformed input from cue-text parsing where
-  // the regex only loosely validates the format.
+  let millis = Millisecond(vtt_digit3(&b[len - 3..]));
+  let seconds = Second(vtt_digit2(&b[len - 6..len - 4]));
+  let minutes = Minute(vtt_digit2(&b[len - 9..len - 7]));
+  let hours = if len > 9 {
+    let hour_str = &s[..len - 10];
+    parse_vtt_hour_bytes(hour_str.as_bytes())
+      .map_err(|_| ParseVttError::InvalidTimestamp("invalid hours"))?
+  } else {
+    Hour::new()
+  };
+  Ok(Timestamp::from_hmsm(hours, minutes, seconds, millis))
+}
+
+/// Parse a timestamp from cue-text, where input is only loosely validated
+/// by the regex `<[0-9:]+\.[0-9]{3}>`.
+///
+/// Unlike [`parse_timestamp`], this validates separators and digit bytes
+/// before doing any arithmetic, so it is safe for untrusted input.
+#[inline]
+pub(crate) fn parse_timestamp_cue(s: &str) -> Result<Timestamp, ParseVttError> {
+  let b = s.as_bytes();
+  let len = b.len();
+  if len < 9 {
+    return Err(ParseVttError::InvalidTimestamp("too short"));
+  }
+  // Validate separators.
   if b[len - 4] != b'.' || b[len - 7] != b':' {
     return Err(ParseVttError::InvalidTimestamp("invalid format"));
   }
-  let millis_val = vtt_digit3(&b[len - 3..]);
-  let seconds_val = vtt_digit2(&b[len - 6..len - 4]);
-  let minutes_val = vtt_digit2(&b[len - 9..len - 7]);
-  // Use try_with to validate ranges — needed for cue-text path where input
-  // is not regex-validated.
+  // Validate digit bytes before arithmetic.
+  let millis_val =
+    vtt_digit3_checked(&b[len - 3..]).ok_or(ParseVttError::InvalidTimestamp("invalid digits"))?;
+  let seconds_val = vtt_digit2_checked(&b[len - 6..len - 4])
+    .ok_or(ParseVttError::InvalidTimestamp("invalid digits"))?;
+  let minutes_val = vtt_digit2_checked(&b[len - 9..len - 7])
+    .ok_or(ParseVttError::InvalidTimestamp("invalid digits"))?;
   let millis = Millisecond::try_with(millis_val)
     .ok_or(ParseVttError::InvalidTimestamp("milliseconds out of range"))?;
   let seconds =
@@ -142,7 +174,6 @@ pub(crate) fn parse_timestamp(s: &str) -> Result<Timestamp, ParseVttError> {
       return Err(ParseVttError::InvalidTimestamp("invalid format"));
     }
     let hour_str = &s[..len - 10];
-    // VTT hours are unbounded u64; parse the variable-length prefix
     parse_vtt_hour_bytes(hour_str.as_bytes())
       .map_err(|_| ParseVttError::InvalidTimestamp("invalid hours"))?
   } else {
@@ -170,16 +201,41 @@ fn parse_vtt_hour_bytes(b: &[u8]) -> Result<Hour, ParseHourError> {
   Ok(Hour(val))
 }
 
+/// Unchecked 2-digit extraction. Caller must guarantee ASCII digits.
 #[cfg_attr(not(tarpaulin), inline(always))]
 const fn vtt_digit2(b: &[u8]) -> u8 {
   (b[0] - b'0') * 10 + (b[1] - b'0')
 }
 
+/// Unchecked 3-digit extraction. Caller must guarantee ASCII digits.
 #[cfg_attr(not(tarpaulin), inline(always))]
 const fn vtt_digit3(b: &[u8]) -> u16 {
   (b[0] - b'0') as u16 * 100 + (b[1] - b'0') as u16 * 10 + (b[2] - b'0') as u16
 }
 
+/// Checked 2-digit extraction. Returns `None` if any byte is not an ASCII digit.
+#[cfg_attr(not(tarpaulin), inline(always))]
+fn vtt_digit2_checked(b: &[u8]) -> Option<u8> {
+  let d0 = b[0].wrapping_sub(b'0');
+  let d1 = b[1].wrapping_sub(b'0');
+  if d0 > 9 || d1 > 9 {
+    return None;
+  }
+  Some(d0 * 10 + d1)
+}
+
+/// Checked 3-digit extraction. Returns `None` if any byte is not an ASCII digit.
+#[cfg_attr(not(tarpaulin), inline(always))]
+fn vtt_digit3_checked(b: &[u8]) -> Option<u16> {
+  let d0 = b[0].wrapping_sub(b'0');
+  let d1 = b[1].wrapping_sub(b'0');
+  let d2 = b[2].wrapping_sub(b'0');
+  if d0 > 9 || d1 > 9 || d2 > 9 {
+    return None;
+  }
+  Some(d0 as u16 * 100 + d1 as u16 * 10 + d2 as u16)
+}
+
 /// Lex the next token from a line. Returns `Ok(None)` when the line
 /// produces no tokens.
 #[cfg_attr(not(tarpaulin), inline(always))]
@@ -1298,9 +1354,9 @@ impl<'a> Iterator for Lines<'a> {
 
     let bytes = &self.input.as_bytes()[self.pos..];
 
-    #[cfg(all(feature = "memchr", miri))]
+    #[cfg(all(feature = "memchr", not(miri)))]
     let found = memchr::memchr2(b'\n', b'\r', bytes);
-    #[cfg(not(all(feature = "memchr", miri)))]
+    #[cfg(any(not(feature = "memchr"), miri))]
     let found = bytes.iter().position(|&b| b == b'\n' || b == b'\r');
 
     match found {
diff --git a/src/vtt/cue.rs b/src/vtt/cue.rs
index 64483d9..a810b86 100644
--- a/src/vtt/cue.rs
+++ b/src/vtt/cue.rs
@@ -95,8 +95,9 @@ enum RawCueToken<'a> {
   StartLang(&'a str),
 
   // ── timestamp tag ─────────────────────────────────────────────────────
-  /// `<[digits/colons].[3 digits]>` — validated by `parse_timestamp` later.
-  #[regex(r"<[0-9:]+\.[0-9]{3}>")]
+  /// `<HH:MM:SS.mmm>` or `<MM:SS.mmm>` — fully validated by the DFA so
+  /// the fast unchecked `parse_timestamp` can be used directly.
+  #[regex(r"<(?:[0-9]+:)?[0-5][0-9]:[0-5][0-9]\.[0-9]{3}>")]
   Timestamp(&'a str),
 
   // ── fallbacks (skipped by the iterator) ───────────────────────────────
@@ -574,6 +575,7 @@ impl<'a> Iterator for CueParser<'a> {
         // ── timestamp ──
         Ok(RawCueToken::Timestamp(s)) => {
           let content = &s[1..s.len() - 1]; // strip `<` and `>`
+          // Regex already validates format; use unchecked fast path.
           if let Ok(ts) = super::parse_timestamp(content) {
             return Some(CueToken::Timestamp(ts));
           }
@@ -608,7 +610,7 @@ fn try_parse_unterminated<'a>(slice: &'a str) -> Option<CueToken<'a>> {
 
   // Try timestamp: digits/colons + "." + 3 digits
   if inner.as_bytes()[0].is_ascii_digit() {
-    if let Ok(ts) = super::parse_timestamp(inner) {
+    if let Ok(ts) = super::parse_timestamp_cue(inner) {
       return Some(CueToken::Timestamp(ts));
     }
     return None;

From 3f6146bddc718f9f20d402ffd9757b68463c66f6 Mon Sep 17 00:00:00 2001
From: al8n <scygliu1@gmail.com>
Date: Thu, 12 Mar 2026 03:19:32 +0800
Subject: [PATCH 08/20] improve performance

---
 benches/vtt.rs | 58 +++++++++++++++++++++++++++++++++++++++++++++++++-
 src/srt.rs     |  4 ++--
 2 files changed, 59 insertions(+), 3 deletions(-)

diff --git a/benches/vtt.rs b/benches/vtt.rs
index a7eb32e..b3e6a61 100644
--- a/benches/vtt.rs
+++ b/benches/vtt.rs
@@ -2,6 +2,7 @@ use std::hint::black_box;
 
 use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main};
 use fasrt::vtt::Parser;
+use fasrt::vtt::cue::CueParser;
 
 const SMALL_VTT: &str = "\
 WEBVTT
@@ -110,5 +111,60 @@ fn bench_vtt_collect(c: &mut Criterion) {
   group.finish();
 }
 
-criterion_group!(benches, bench_vtt_parse, bench_vtt_collect);
+/// Cue text with many timestamp tags to benchmark the cue-text parsing path.
+fn build_cue_text_with_timestamps() -> String {
+  let mut s = String::new();
+  for i in 0..500 {
+    let h = i / 3600;
+    let m = (i % 3600) / 60;
+    let sec = i % 60;
+    s.push_str(&format!(
+      "word <{h:02}:{m:02}:{sec:02}.{ms:03}>text",
+      h = h,
+      m = m,
+      sec = sec,
+      ms = (i * 7) % 1000
+    ));
+  }
+  s
+}
+
+/// Cue text with tags but no timestamps.
+const CUE_TEXT_TAGS: &str = "\
+<b>bold <i>bold-italic</i></b> plain <u>underline</u> \
+<v Roger>voice</v> <lang en>english</lang> <ruby>base<rt>ruby</rt></ruby> \
+<c.highlight.big>classed</c> &amp; &lt; &gt; &nbsp; end";
+
+fn bench_vtt_cue_text(c: &mut Criterion) {
+  let ts_input = build_cue_text_with_timestamps();
+
+  let mut group = c.benchmark_group("vtt/cue_text");
+
+  // Tags only (no timestamps)
+  group.throughput(Throughput::Bytes(CUE_TEXT_TAGS.len() as u64));
+  group.bench_function("tags_only", |b| {
+    b.iter(|| {
+      let count = CueParser::new(black_box(CUE_TEXT_TAGS)).count();
+      black_box(count);
+    });
+  });
+
+  // Timestamp-heavy cue text
+  group.throughput(Throughput::Bytes(ts_input.len() as u64));
+  group.bench_function("500_timestamps", |b| {
+    b.iter(|| {
+      let count = CueParser::new(black_box(&ts_input)).count();
+      black_box(count);
+    });
+  });
+
+  group.finish();
+}
+
+criterion_group!(
+  benches,
+  bench_vtt_parse,
+  bench_vtt_collect,
+  bench_vtt_cue_text
+);
 criterion_main!(benches);
diff --git a/src/srt.rs b/src/srt.rs
index 9d133c3..1f279eb 100644
--- a/src/srt.rs
+++ b/src/srt.rs
@@ -749,9 +749,9 @@ impl<'a> Iterator for Lines<'a> {
 
     let bytes = &self.input.as_bytes()[self.pos..];
 
-    #[cfg(feature = "memchr")]
+    #[cfg(all(feature = "memchr", not(miri)))]
     let found = memchr::memchr(b'\n', bytes);
-    #[cfg(not(feature = "memchr"))]
+    #[cfg(not(all(feature = "memchr", not(miri))))]
     let found = bytes.iter().position(|&b| b == b'\n');
 
     let line_end = found

From f5e7549fe13629cfc43265484503dcdc17a0d22f Mon Sep 17 00:00:00 2001
From: al8n <scygliu1@gmail.com>
Date: Thu, 12 Mar 2026 10:41:13 +0800
Subject: [PATCH 09/20] improve performance

---
 README.md             | 23 +++++++----
 src/error.rs          | 14 +++++--
 src/vtt.rs            | 12 +++++-
 tests/vtt_cue_text.rs | 90 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 126 insertions(+), 13 deletions(-)

diff --git a/README.md b/README.md
index 50300a5..8177d26 100644
--- a/README.md
+++ b/README.md
@@ -78,19 +78,26 @@ Measured on Apple Silicon with `cargo bench` (Criterion).
 
 | Benchmark | Input | Time | Throughput |
 |-----------|-------|------|------------|
-| Parse (strict) | 2 cues, 89 B | ~316 ns | 281 MiB/s |
-| Parse (strict) | 26 KB file | ~39 µs | 645 MiB/s |
-| Parse (lossy) | 332 files, ~8 MB | ~12.5 ms | 624 MiB/s |
-| Collect into `Vec` | 26 KB file | ~41 µs | 606 MiB/s |
+| Parse (strict) | 2 cues, 89 B | ~170 ns | 520 MiB/s |
+| Parse (strict) | 26 KB file | ~38 µs | 661 MiB/s |
+| Parse (lossy) | 332 files, ~8 MB | ~12.1 ms | 646 MiB/s |
+| Collect into `Vec` | 26 KB file | ~40 µs | 616 MiB/s |
 
 ### WebVTT
 
 | Benchmark | Input | Time | Throughput |
 |-----------|-------|------|------------|
-| Parse | 2 cues, 96 B | ~299 ns | 309 MiB/s |
-| Parse | Settings + region + style, 354 B | ~829 ns | 427 MiB/s |
-| Parse | All WPT fixtures, ~34 KB | ~103 µs | 342 MiB/s |
-| Collect into `Vec` | Settings + region + style, 354 B | ~1.08 µs | 329 MiB/s |
+| Parse | 2 cues, 96 B | ~318 ns | 291 MiB/s |
+| Parse | Settings + region + style, 354 B | ~915 ns | 387 MiB/s |
+| Parse | All WPT fixtures, ~34 KB | ~113 µs | 314 MiB/s |
+| Collect into `Vec` | Settings + region + style, 354 B | ~973 ns | 364 MiB/s |
+
+### Cue Text
+
+| Benchmark | Input | Time | Throughput |
+|-----------|-------|------|------------|
+| Parse | Tags only, 166 B | ~316 ns | 552 MiB/s |
+| Parse | 500 timestamps, ~11 KB | ~14.1 µs | 776 MiB/s |
 
 Run benchmarks yourself:
 
diff --git a/src/error.rs b/src/error.rs
index f0a5437..c4546fb 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -38,19 +38,27 @@ pub enum ParseSecondError {
 }
 
 /// The error type for parsing hour components of timestamps.
+///
+/// This enum is shared by both SRT and WebVTT parsers:
+/// - **SRT** hours are 2–3 digits (0–999): uses [`NotPadded`](Self::NotPadded)
+///   and [`Overflow(u16)`](Self::Overflow).
+/// - **WebVTT** hours are unbounded (`u64`): uses [`NotPadded`](Self::NotPadded)
+///   for non-digit input and [`HourOverflow`](Self::HourOverflow) when the
+///   value exceeds `u64::MAX`.
 #[derive(Debug, Clone, PartialEq, Eq, IsVariant, Unwrap, TryUnwrap, thiserror::Error)]
 #[unwrap(ref, ref_mut)]
 #[try_unwrap(ref, ref_mut)]
 pub enum ParseHourError {
-  /// The hour component is not zero-padded to 2 digits.
+  /// The hour component is not zero-padded to 2 digits (SRT),
+  /// or contains non-digit characters (VTT).
   #[error("hour component is not zero-padded to 2 digits")]
   #[unwrap(ignore)]
   #[try_unwrap(ignore)]
   NotPadded,
-  /// The hour component is out of range (not between 0-999).
+  /// The hour component is out of the SRT range (0–999).
   #[error("hour component must be between 0-999, but was {0}")]
   Overflow(u16),
-  /// The hour component overflowed u64 (VTT unbounded hours).
+  /// The hour component overflowed `u64` (VTT unbounded hours).
   #[error("hour component overflowed")]
   #[unwrap(ignore)]
   #[try_unwrap(ignore)]
diff --git a/src/vtt.rs b/src/vtt.rs
index b499ce6..0939698 100644
--- a/src/vtt.rs
+++ b/src/vtt.rs
@@ -202,14 +202,22 @@ fn parse_vtt_hour_bytes(b: &[u8]) -> Result<Hour, ParseHourError> {
 }
 
 /// Unchecked 2-digit extraction. Caller must guarantee ASCII digits.
+///
+/// Only called from [`parse_timestamp`], which requires logos-regex-validated input.
+/// Debug assertions catch misuse during development.
 #[cfg_attr(not(tarpaulin), inline(always))]
-const fn vtt_digit2(b: &[u8]) -> u8 {
+fn vtt_digit2(b: &[u8]) -> u8 {
+  debug_assert!(b[0].is_ascii_digit() && b[1].is_ascii_digit());
   (b[0] - b'0') * 10 + (b[1] - b'0')
 }
 
 /// Unchecked 3-digit extraction. Caller must guarantee ASCII digits.
+///
+/// Only called from [`parse_timestamp`], which requires logos-regex-validated input.
+/// Debug assertions catch misuse during development.
 #[cfg_attr(not(tarpaulin), inline(always))]
-const fn vtt_digit3(b: &[u8]) -> u16 {
+fn vtt_digit3(b: &[u8]) -> u16 {
+  debug_assert!(b[0].is_ascii_digit() && b[1].is_ascii_digit() && b[2].is_ascii_digit());
   (b[0] - b'0') as u16 * 100 + (b[1] - b'0') as u16 * 10 + (b[2] - b'0') as u16
 }
 
diff --git a/tests/vtt_cue_text.rs b/tests/vtt_cue_text.rs
index 446d53e..5a61641 100644
--- a/tests/vtt_cue_text.rs
+++ b/tests/vtt_cue_text.rs
@@ -374,3 +374,93 @@ impl CueTokenExt for CueToken<'_> {
     }
   }
 }
+
+// ── Malformed timestamp rejection tests ──────────────────────────────────────
+//
+// These verify that malformed cue-text timestamp tags are safely rejected
+// (treated as unknown tags or skipped) without panicking, even in debug builds.
+
+/// Colons where digits are expected: `<:::.000>` matches the old loose regex
+/// but is rejected by the tightened DFA.
+#[test]
+fn cue_text_rejects_colons_as_digits() {
+  let tokens: Vec<_> = CueParser::new("<:::.000>").collect();
+  assert!(
+    !tokens.iter().any(|t| matches!(t, CueToken::Timestamp(_))),
+    "colons-only should not parse as timestamp"
+  );
+}
+
+/// Minutes out of range: `<99:99.000>` — rejected by the DFA (`[0-5][0-9]`).
+#[test]
+fn cue_text_rejects_out_of_range_minutes() {
+  let tokens: Vec<_> = CueParser::new("<99:99.000>").collect();
+  assert!(
+    !tokens.iter().any(|t| matches!(t, CueToken::Timestamp(_))),
+    "99:99 should not parse as timestamp"
+  );
+}
+
+/// Seconds out of range: `<00:60.000>` — rejected by the DFA.
+#[test]
+fn cue_text_rejects_out_of_range_seconds() {
+  let tokens: Vec<_> = CueParser::new("<00:60.000>").collect();
+  assert!(
+    !tokens.iter().any(|t| matches!(t, CueToken::Timestamp(_))),
+    "60 seconds should not parse as timestamp"
+  );
+}
+
+/// Non-digit bytes in hour position: `<ab:00:00.000>`.
+#[test]
+fn cue_text_rejects_non_digit_hours() {
+  let tokens: Vec<_> = CueParser::new("<ab:00:00.000>").collect();
+  assert!(
+    !tokens.iter().any(|t| matches!(t, CueToken::Timestamp(_))),
+    "non-digit hours should not parse as timestamp"
+  );
+}
+
+/// Empty hour prefix: `<:00:00.000>` — colon where a digit is expected.
+#[test]
+fn cue_text_rejects_empty_hour_prefix() {
+  let tokens: Vec<_> = CueParser::new("<:00:00.000>").collect();
+  assert!(
+    !tokens.iter().any(|t| matches!(t, CueToken::Timestamp(_))),
+    "empty hour prefix should not parse as timestamp"
+  );
+}
+
+/// Very large hours that would overflow u64: 30-digit hour value.
+#[test]
+fn cue_text_rejects_overflowing_hours() {
+  // 30 digits exceeds u64::MAX (20 digits)
+  let tag = format!("<{}:00:00.000>", "9".repeat(30));
+  let tokens: Vec<_> = CueParser::new(&tag).collect();
+  assert!(
+    !tokens.iter().any(|t| matches!(t, CueToken::Timestamp(_))),
+    "overflowing hours should not parse as timestamp"
+  );
+}
+
+/// Unterminated timestamp tag with valid format goes through `parse_timestamp_cue`.
+#[test]
+fn cue_text_unterminated_valid_timestamp() {
+  // `<00:05.000` without closing `>` — handled by try_parse_unterminated
+  let tokens: Vec<_> = CueParser::new("<00:05.000").collect();
+  assert!(
+    tokens.iter().any(|t| matches!(t, CueToken::Timestamp(_))),
+    "unterminated but valid timestamp should parse"
+  );
+}
+
+/// Unterminated timestamp with invalid digits goes through `parse_timestamp_cue`
+/// and is safely rejected.
+#[test]
+fn cue_text_unterminated_invalid_timestamp() {
+  let tokens: Vec<_> = CueParser::new("<99:99.000").collect();
+  assert!(
+    !tokens.iter().any(|t| matches!(t, CueToken::Timestamp(_))),
+    "unterminated invalid timestamp should be rejected"
+  );
+}

From ca93584b936a5cec3941e8482f6ad00d0ff2eb58 Mon Sep 17 00:00:00 2001
From: Al Liu <scygliu1@gmail.com>
Date: Thu, 12 Mar 2026 10:52:16 +0800
Subject: [PATCH 10/20] Update benches/vtt.rs

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 benches/vtt.rs | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/benches/vtt.rs b/benches/vtt.rs
index b3e6a61..5414b43 100644
--- a/benches/vtt.rs
+++ b/benches/vtt.rs
@@ -44,9 +44,12 @@ fn load_all_vtt_fixtures() -> String {
     "fixtures/webvtt/wpt-file-parsing",
     "fixtures/webvtt/wpt-cue-parsing",
   ] {
-    let entries = std::fs::read_dir(dir).expect("failed to read VTT fixture directory");
+    let mut entries: Vec<_> = std::fs::read_dir(dir)
+      .expect("failed to read VTT fixture directory")
+      .map(|e| e.unwrap())
+      .collect();
+    entries.sort_by(|a, b| a.file_name().cmp(&b.file_name()));
     for entry in entries {
-      let entry = entry.unwrap();
       if entry.path().extension().is_some_and(|e| e == "vtt") {
         buf.push_str(&std::fs::read_to_string(entry.path()).unwrap());
         buf.push_str("\n\n");

From 71494a9de745346999bdbb6638e434ceb3ca2291 Mon Sep 17 00:00:00 2001
From: Al Liu <scygliu1@gmail.com>
Date: Thu, 12 Mar 2026 10:52:38 +0800
Subject: [PATCH 11/20] Update benches/srt.rs

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 benches/srt.rs | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/benches/srt.rs b/benches/srt.rs
index 19ef303..fdf541e 100644
--- a/benches/srt.rs
+++ b/benches/srt.rs
@@ -16,16 +16,28 @@ Goodbye world!
 const MEDIUM_SRT: &str = include_str!("../fixtures/srt/DeathNote_01.eng.srt");
 
 fn load_all_fixtures() -> String {
+  let mut paths: Vec<_> = std::fs::read_dir("fixtures/srt")
+    .unwrap()
+    .filter_map(|entry| {
+      let entry = entry.ok()?;
+      let path = entry.path();
+      if path.extension().is_some_and(|e| e == "srt") {
+        Some(path)
+      } else {
+        None
+      }
+    })
+    .collect();
+
+  paths.sort();
+
   let mut buf = String::new();
-  for entry in std::fs::read_dir("fixtures/srt").unwrap() {
-    let entry = entry.unwrap();
-    if entry.path().extension().is_some_and(|e| e == "srt") {
-      if let Ok(content) = std::fs::read_to_string(entry.path()) {
-        if !buf.is_empty() {
-          buf.push_str("\n\n");
-        }
-        buf.push_str(&content);
+  for path in paths {
+    if let Ok(content) = std::fs::read_to_string(path) {
+      if !buf.is_empty() {
+        buf.push_str("\n\n");
       }
+      buf.push_str(&content);
     }
   }
   buf

From 30ac3eefb6d9ca6a2bfefd0b518cb3a067882bc1 Mon Sep 17 00:00:00 2001
From: Al Liu <scygliu1@gmail.com>
Date: Thu, 12 Mar 2026 10:53:28 +0800
Subject: [PATCH 12/20] Update src/vtt.rs

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 src/vtt.rs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/vtt.rs b/src/vtt.rs
index 0939698..8820de3 100644
--- a/src/vtt.rs
+++ b/src/vtt.rs
@@ -174,6 +174,9 @@ pub(crate) fn parse_timestamp_cue(s: &str) -> Result<Timestamp, ParseVttError> {
       return Err(ParseVttError::InvalidTimestamp("invalid format"));
     }
     let hour_str = &s[..len - 10];
+    if hour_str.is_empty() {
+      return Err(ParseVttError::InvalidTimestamp("invalid hours"));
+    }
     parse_vtt_hour_bytes(hour_str.as_bytes())
       .map_err(|_| ParseVttError::InvalidTimestamp("invalid hours"))?
   } else {

From d633aeb98932a8a628b14a18507456f0d62409a0 Mon Sep 17 00:00:00 2001
From: al8n <scygliu1@gmail.com>
Date: Thu, 12 Mar 2026 10:56:06 +0800
Subject: [PATCH 13/20] update stale docs

---
 src/srt.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/srt.rs b/src/srt.rs
index 1f279eb..7f01ddf 100644
--- a/src/srt.rs
+++ b/src/srt.rs
@@ -324,7 +324,7 @@ fn parse_number(s: &mut Lexer<'_, Token>) -> Result<NonZeroU64, ParseSrtError> {
 #[inline]
 fn parse_header(s: &mut Lexer<'_, Token>) -> Result<Header, ParseSrtError> {
   let slice = s.slice();
-  // The regex guarantees `H+:MM:SS,mmm <ws> --> <ws> H+:MM:SS,mmm`.
+  // The regex guarantees `HH+:MM:SS,mmm <ws> --> <ws> HH+:MM:SS,mmm`.
   // Find the arrow separator and extract both timestamps via byte arithmetic.
   let arrow = slice.find("-->").unwrap();
   let start_str = slice[..arrow].trim();

From 6bb45953d83670ef0ea99e747af50bd1d84ed517 Mon Sep 17 00:00:00 2001
From: al8n <scygliu1@gmail.com>
Date: Thu, 12 Mar 2026 11:04:06 +0800
Subject: [PATCH 14/20] update ci sh

---
 ci/miri_sb.sh | 2 +-
 ci/miri_tb.sh | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ci/miri_sb.sh b/ci/miri_sb.sh
index cc3c6e0..568741b 100755
--- a/ci/miri_sb.sh
+++ b/ci/miri_sb.sh
@@ -35,4 +35,4 @@ cargo miri setup
 
 export MIRIFLAGS="-Zmiri-strict-provenance -Zmiri-disable-isolation -Zmiri-symbolic-alignment-check"
 
-cargo miri test --all-targets --target "$TARGET"
+cargo miri test --lib --tests --doc --target "$TARGET"
diff --git a/ci/miri_tb.sh b/ci/miri_tb.sh
index 5d374c7..a05e475 100755
--- a/ci/miri_tb.sh
+++ b/ci/miri_tb.sh
@@ -35,4 +35,4 @@ cargo miri setup
 
 export MIRIFLAGS="-Zmiri-strict-provenance -Zmiri-disable-isolation -Zmiri-symbolic-alignment-check -Zmiri-tree-borrows"
 
-cargo miri test --all-targets --target "$TARGET"
+cargo miri test --lib --tests --doc --target "$TARGET"

From 29fa662d698ce3fbc598bf9083d176e6dbb03569 Mon Sep 17 00:00:00 2001
From: Al Liu <scygliu1@gmail.com>
Date: Thu, 12 Mar 2026 11:06:34 +0800
Subject: [PATCH 15/20] Update src/error.rs

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 src/error.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/error.rs b/src/error.rs
index c4546fb..d1e5d8f 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -51,7 +51,7 @@ pub enum ParseSecondError {
 pub enum ParseHourError {
   /// The hour component is not zero-padded to 2 digits (SRT),
   /// or contains non-digit characters (VTT).
-  #[error("hour component is not zero-padded to 2 digits")]
+  #[error("hour component is not zero-padded to 2 digits or contains invalid characters")]
   #[unwrap(ignore)]
   #[try_unwrap(ignore)]
   NotPadded,

From 87a967e0ea37dce8b20c3fa926fa4f22acad2d32 Mon Sep 17 00:00:00 2001
From: Al Liu <scygliu1@gmail.com>
Date: Thu, 12 Mar 2026 11:07:14 +0800
Subject: [PATCH 16/20] Update benches/vtt.rs

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 benches/vtt.rs | 31 ++++++++++++++++++-------------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/benches/vtt.rs b/benches/vtt.rs
index 5414b43..77bdb05 100644
--- a/benches/vtt.rs
+++ b/benches/vtt.rs
@@ -44,22 +44,27 @@ fn load_all_vtt_fixtures() -> String {
     "fixtures/webvtt/wpt-file-parsing",
     "fixtures/webvtt/wpt-cue-parsing",
   ] {
-    let mut entries: Vec<_> = std::fs::read_dir(dir)
-      .expect("failed to read VTT fixture directory")
-      .map(|e| e.unwrap())
-      .collect();
-    entries.sort_by(|a, b| a.file_name().cmp(&b.file_name()));
-    for entry in entries {
-      if entry.path().extension().is_some_and(|e| e == "vtt") {
-        buf.push_str(&std::fs::read_to_string(entry.path()).unwrap());
-        buf.push_str("\n\n");
+    if let Ok(read_dir) = std::fs::read_dir(dir) {
+      let mut entries: Vec<_> = read_dir
+        .filter_map(Result::ok)
+        .collect();
+      entries.sort_by(|a, b| a.file_name().cmp(&b.file_name()));
+      for entry in entries {
+        if entry.path().extension().is_some_and(|e| e == "vtt") {
+          if let Ok(contents) = std::fs::read_to_string(entry.path()) {
+            buf.push_str(&contents);
+            buf.push_str("\n\n");
+          }
+        }
       }
     }
   }
-  assert!(
-    !buf.is_empty(),
-    "No VTT fixtures were loaded from fixtures/webvtt"
-  );
+  if buf.is_empty() {
+    // Fallback to embedded samples so benches still run without fixtures.
+    buf.push_str(SMALL_VTT);
+    buf.push_str("\n\n");
+    buf.push_str(SETTINGS_VTT);
+  }
   buf
 }
 

From a8ed2dc10b252b716a8c6e4b2ac1fe9bf04592b2 Mon Sep 17 00:00:00 2001
From: al8n <scygliu1@gmail.com>
Date: Thu, 12 Mar 2026 11:37:11 +0800
Subject: [PATCH 17/20] cleanup

---
 ci/miri_sb.sh |  4 +++-
 ci/miri_tb.sh |  4 +++-
 src/error.rs  | 20 ++++++++++++++++
 src/vtt.rs    | 66 ++++++++++++++++++++++++++++++---------------------
 4 files changed, 65 insertions(+), 29 deletions(-)

diff --git a/ci/miri_sb.sh b/ci/miri_sb.sh
index 568741b..de9aa95 100755
--- a/ci/miri_sb.sh
+++ b/ci/miri_sb.sh
@@ -35,4 +35,6 @@ cargo miri setup
 
 export MIRIFLAGS="-Zmiri-strict-provenance -Zmiri-disable-isolation -Zmiri-symbolic-alignment-check"
 
-cargo miri test --lib --tests --doc --target "$TARGET"
+cargo miri test --lib --tests --target "$TARGET"
+
+cargo miri test --doc --target "$TARGET"
diff --git a/ci/miri_tb.sh b/ci/miri_tb.sh
index a05e475..a9c8beb 100755
--- a/ci/miri_tb.sh
+++ b/ci/miri_tb.sh
@@ -35,4 +35,6 @@ cargo miri setup
 
 export MIRIFLAGS="-Zmiri-strict-provenance -Zmiri-disable-isolation -Zmiri-symbolic-alignment-check -Zmiri-tree-borrows"
 
-cargo miri test --lib --tests --doc --target "$TARGET"
+cargo miri test --lib --tests --target "$TARGET"
+
+cargo miri test --doc --target "$TARGET"
diff --git a/src/error.rs b/src/error.rs
index d1e5d8f..ad6775a 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -86,6 +86,26 @@ pub enum ParseMillisecondError {
   ParseInt(#[from] ParseIntError),
 }
 
+/// Specific reason why a VTT timestamp has invalid structure.
+///
+/// This covers structural validation errors only (length, separators, digits).
+/// Component range errors (hours, minutes, seconds, milliseconds) are
+/// represented by their dedicated error types ([`ParseHourError`],
+/// [`ParseMinuteError`], [`ParseSecondError`], [`ParseMillisecondError`])
+/// which are separate variants of [`crate::vtt::ParseVttError`].
+#[derive(Debug, Clone, Copy, PartialEq, Eq, thiserror::Error)]
+pub enum TimestampError {
+  /// The input is too short or has an invalid length for a VTT timestamp.
+  #[error("invalid length")]
+  InvalidLength,
+  /// A separator (`.` or `:`) is not in the expected position.
+  #[error("invalid format")]
+  InvalidFormat,
+  /// One or more digit positions contain non-digit bytes.
+  #[error("invalid digits")]
+  InvalidDigits,
+}
+
 /// The error type for parsing index numbers of subtitles.
 #[derive(Debug, Clone, PartialEq, Eq, IsVariant, Unwrap, TryUnwrap, thiserror::Error)]
 #[unwrap(ref, ref_mut)]
diff --git a/src/vtt.rs b/src/vtt.rs
index 8820de3..8bc9aa3 100644
--- a/src/vtt.rs
+++ b/src/vtt.rs
@@ -45,7 +45,7 @@ pub enum ParseVttError {
 
   /// A timestamp could not be parsed.
   #[error("invalid timestamp: {0}")]
-  InvalidTimestamp(&'static str),
+  InvalidTimestamp(TimestampError),
 
   /// The timing line is malformed (missing `-->`).
   #[error("invalid timing line: missing '-->' separator")]
@@ -125,15 +125,16 @@ pub(crate) fn parse_timestamp(s: &str) -> Result<Timestamp, ParseVttError> {
   let b = s.as_bytes();
   let len = b.len();
   if len < 9 {
-    return Err(ParseVttError::InvalidTimestamp("too short"));
+    return Err(ParseVttError::InvalidTimestamp(
+      TimestampError::InvalidLength,
+    ));
   }
   let millis = Millisecond(vtt_digit3(&b[len - 3..]));
   let seconds = Second(vtt_digit2(&b[len - 6..len - 4]));
   let minutes = Minute(vtt_digit2(&b[len - 9..len - 7]));
   let hours = if len > 9 {
     let hour_str = &s[..len - 10];
-    parse_vtt_hour_bytes(hour_str.as_bytes())
-      .map_err(|_| ParseVttError::InvalidTimestamp("invalid hours"))?
+    parse_vtt_hour_bytes(hour_str.as_bytes())?
   } else {
     Hour::new()
   };
@@ -149,36 +150,47 @@ pub(crate) fn parse_timestamp(s: &str) -> Result<Timestamp, ParseVttError> {
 pub(crate) fn parse_timestamp_cue(s: &str) -> Result<Timestamp, ParseVttError> {
   let b = s.as_bytes();
   let len = b.len();
-  if len < 9 {
-    return Err(ParseVttError::InvalidTimestamp("too short"));
+  // Short form `MM:SS.mmm` = 9 bytes (0 hour digits).
+  // Long form `H+:MM:SS.mmm` = 12..=29 bytes (1..=20 hour digits).
+  // Reject lengths outside [9, 29] and the gap [10, 11].
+  if len < 9 || len > 29 || (len > 9 && len < 12) {
+    return Err(ParseVttError::InvalidTimestamp(
+      TimestampError::InvalidLength,
+    ));
   }
   // Validate separators.
   if b[len - 4] != b'.' || b[len - 7] != b':' {
-    return Err(ParseVttError::InvalidTimestamp("invalid format"));
+    return Err(ParseVttError::InvalidTimestamp(
+      TimestampError::InvalidFormat,
+    ));
   }
   // Validate digit bytes before arithmetic.
-  let millis_val =
-    vtt_digit3_checked(&b[len - 3..]).ok_or(ParseVttError::InvalidTimestamp("invalid digits"))?;
-  let seconds_val = vtt_digit2_checked(&b[len - 6..len - 4])
-    .ok_or(ParseVttError::InvalidTimestamp("invalid digits"))?;
-  let minutes_val = vtt_digit2_checked(&b[len - 9..len - 7])
-    .ok_or(ParseVttError::InvalidTimestamp("invalid digits"))?;
-  let millis = Millisecond::try_with(millis_val)
-    .ok_or(ParseVttError::InvalidTimestamp("milliseconds out of range"))?;
-  let seconds =
-    Second::try_with(seconds_val).ok_or(ParseVttError::InvalidTimestamp("seconds out of range"))?;
-  let minutes =
-    Minute::try_with(minutes_val).ok_or(ParseVttError::InvalidTimestamp("minutes out of range"))?;
-  let hours = if len > 9 {
+  let millis_val = vtt_digit3_checked(&b[len - 3..]).ok_or(ParseVttError::InvalidTimestamp(
+    TimestampError::InvalidDigits,
+  ))?;
+  let seconds_val = vtt_digit2_checked(&b[len - 6..len - 4]).ok_or(
+    ParseVttError::InvalidTimestamp(TimestampError::InvalidDigits),
+  )?;
+  let minutes_val = vtt_digit2_checked(&b[len - 9..len - 7]).ok_or(
+    ParseVttError::InvalidTimestamp(TimestampError::InvalidDigits),
+  )?;
+  let millis = Millisecond::try_with(millis_val).ok_or(ParseVttError::ParseMillisecond(
+    ParseMillisecondError::Overflow(millis_val),
+  ))?;
+  let seconds = Second::try_with(seconds_val).ok_or(ParseVttError::ParseSecond(
+    ParseSecondError::Overflow(seconds_val),
+  ))?;
+  let minutes = Minute::try_with(minutes_val).ok_or(ParseVttError::ParseMinute(
+    ParseMinuteError::Overflow(minutes_val),
+  ))?;
+  let hours = if len >= 12 {
     if b[len - 10] != b':' {
-      return Err(ParseVttError::InvalidTimestamp("invalid format"));
-    }
-    let hour_str = &s[..len - 10];
-    if hour_str.is_empty() {
-      return Err(ParseVttError::InvalidTimestamp("invalid hours"));
+      return Err(ParseVttError::InvalidTimestamp(
+        TimestampError::InvalidFormat,
+      ));
     }
-    parse_vtt_hour_bytes(hour_str.as_bytes())
-      .map_err(|_| ParseVttError::InvalidTimestamp("invalid hours"))?
+    let hour_bytes = &b[..len - 10];
+    parse_vtt_hour_bytes(hour_bytes)?
   } else {
     Hour::new()
   };

From c36a4cb6278be38646fb0cff16d920ca55cb42f3 Mon Sep 17 00:00:00 2001
From: al8n <scygliu1@gmail.com>
Date: Thu, 12 Mar 2026 11:44:51 +0800
Subject: [PATCH 18/20] cleanup

---
 benches/vtt.rs | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/benches/vtt.rs b/benches/vtt.rs
index 77bdb05..6b6d2d8 100644
--- a/benches/vtt.rs
+++ b/benches/vtt.rs
@@ -45,9 +45,7 @@ fn load_all_vtt_fixtures() -> String {
     "fixtures/webvtt/wpt-cue-parsing",
   ] {
     if let Ok(read_dir) = std::fs::read_dir(dir) {
-      let mut entries: Vec<_> = read_dir
-        .filter_map(Result::ok)
-        .collect();
+      let mut entries: Vec<_> = read_dir.filter_map(Result::ok).collect();
       entries.sort_by(|a, b| a.file_name().cmp(&b.file_name()));
       for entry in entries {
         if entry.path().extension().is_some_and(|e| e == "vtt") {

From 59ee779e540d797f25972ed91b02d7a695d6f23b Mon Sep 17 00:00:00 2001
From: Al Liu <scygliu1@gmail.com>
Date: Thu, 12 Mar 2026 11:54:53 +0800
Subject: [PATCH 19/20] Update benches/srt.rs

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 benches/srt.rs | 48 ++++++++++++++++++++++++++----------------------
 1 file changed, 26 insertions(+), 22 deletions(-)

diff --git a/benches/srt.rs b/benches/srt.rs
index fdf541e..55bb31e 100644
--- a/benches/srt.rs
+++ b/benches/srt.rs
@@ -16,31 +16,35 @@ Goodbye world!
 const MEDIUM_SRT: &str = include_str!("../fixtures/srt/DeathNote_01.eng.srt");
 
 fn load_all_fixtures() -> String {
-  let mut paths: Vec<_> = std::fs::read_dir("fixtures/srt")
-    .unwrap()
-    .filter_map(|entry| {
-      let entry = entry.ok()?;
-      let path = entry.path();
-      if path.extension().is_some_and(|e| e == "srt") {
-        Some(path)
-      } else {
-        None
+  if let Ok(read_dir) = std::fs::read_dir("fixtures/srt") {
+    let mut paths: Vec<_> = read_dir
+      .filter_map(|entry| {
+        let entry = entry.ok()?;
+        let path = entry.path();
+        if path.extension().is_some_and(|e| e == "srt") {
+          Some(path)
+        } else {
+          None
+        }
+      })
+      .collect();
+
+    paths.sort();
+
+    let mut buf = String::new();
+    for path in paths {
+      if let Ok(content) = std::fs::read_to_string(path) {
+        if !buf.is_empty() {
+          buf.push_str("\n\n");
+        }
+        buf.push_str(&content);
       }
-    })
-    .collect();
-
-  paths.sort();
-
-  let mut buf = String::new();
-  for path in paths {
-    if let Ok(content) = std::fs::read_to_string(path) {
-      if !buf.is_empty() {
-        buf.push_str("\n\n");
-      }
-      buf.push_str(&content);
     }
+    buf
+  } else {
+    // Fallback: use a small embedded sample when fixtures are unavailable.
+    SMALL_SRT.to_string()
   }
-  buf
 }
 
 fn bench_srt_parse(c: &mut Criterion) {

From cef59220d79e3251228377ae609283de2ea8fe18 Mon Sep 17 00:00:00 2001
From: al8n <scygliu1@gmail.com>
Date: Thu, 12 Mar 2026 11:56:19 +0800
Subject: [PATCH 20/20] fix clippy

---
 src/vtt.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/vtt.rs b/src/vtt.rs
index 8bc9aa3..52e3cae 100644
--- a/src/vtt.rs
+++ b/src/vtt.rs
@@ -153,7 +153,7 @@ pub(crate) fn parse_timestamp_cue(s: &str) -> Result<Timestamp, ParseVttError> {
   // Short form `MM:SS.mmm` = 9 bytes (0 hour digits).
   // Long form `H+:MM:SS.mmm` = 12..=29 bytes (1..=20 hour digits).
   // Reject lengths outside [9, 29] and the gap [10, 11].
-  if len < 9 || len > 29 || (len > 9 && len < 12) {
+  if !(9..=29).contains(&len) || (len > 9 && len < 12) {
     return Err(ParseVttError::InvalidTimestamp(
       TimestampError::InvalidLength,
     ));