From 0386246bc07b140781f688dfbde04d14d112a5b0 Mon Sep 17 00:00:00 2001 From: James Date: Sun, 17 May 2026 19:02:12 +0000 Subject: [PATCH 1/2] fix(ci): pin chrome-headless-shell to fix regression baseline drift MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `Dockerfile.test:56` installed `chrome-headless-shell@stable` via `@puppeteer/browsers`. `@stable` is a moving tag, so every Chrome stable bump shifted pixel output enough to fail PSNR on the golden baselines. The regression suite silently broke whenever Docker.test rebuilt against a freshly-promoted stable. Pin to `chrome-headless-shell@148.0.7778.167` — the Chrome 148 stable build that `@stable` currently resolves to, matching what most goldens on `main` were captured against. Comment notes that future bumps must be paired with `docker:test:update` so the pin and the baselines stay in lockstep. Also regenerates the `style-12-prod` golden baseline. PR #918 regenerated it once at b9bdc80d, but that commit landed *before* the `refactor: extract shared inlineSubCompositions from bundler and producer` (581e7a7e) and the linkedom-fragment fix (754b0edc) in the same stack. The compiler refactor changes `__hfRootSelector` from `null` to a scoped `[data-composition-id="..."]` selector in the inlined sub-compositions, which affects the rendered output. style-12-prod was the one fixture in that stack that didn't get a second regen pass after the refactor, so it has been failing on plain `origin/main` (PSNR ~13 from frame 8.26s onward — the mondrian-colors blocks no longer match expected). The new baseline regenerated under this pin passes at PSNR 62-102 dB. --- Dockerfile.test | 7 ++++++- .../tests/style-12-prod/output/compiled.html | 18 ++++++++++++------ .../tests/style-12-prod/output/output.mp4 | 4 ++-- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/Dockerfile.test b/Dockerfile.test index a20d795b1..d6e83f9c2 100644 --- a/Dockerfile.test +++ b/Dockerfile.test @@ -53,7 +53,12 @@ ENV CONTAINER=true # Install chrome-headless-shell for deterministic BeginFrame rendering. # This lightweight Chrome binary supports HeadlessExperimental.beginFrame. # Install to ~/.cache/puppeteer/ where resolveHeadlessShellPath() looks. -RUN npx --yes @puppeteer/browsers install chrome-headless-shell@stable \ +# +# Pinned to a specific build (NOT @stable) so the regression-test golden +# baselines in packages/producer/tests/*/output/output.mp4 stay reproducible. +# Each Chrome stable bump shifts pixel output enough to fail PSNR. Bump this +# version together with regenerating baselines via `docker:test:update`. +RUN npx --yes @puppeteer/browsers install chrome-headless-shell@148.0.7778.167 \ --path /root/.cache/puppeteer \ && find /root/.cache/puppeteer/chrome-headless-shell -name "chrome-headless-shell" -type f \ && echo "chrome-headless-shell installed" diff --git a/packages/producer/tests/style-12-prod/output/compiled.html b/packages/producer/tests/style-12-prod/output/compiled.html index 93ab16160..aa64d9f9f 100644 --- a/packages/producer/tests/style-12-prod/output/compiled.html +++ b/packages/producer/tests/style-12-prod/output/compiled.html @@ -492,7 +492,7 @@ var __hfEscapeAttr = function(value) { return (value + "").replace(/\\/g, "\\\\").replace(/"/g, "\\\""); }; - var __hfRootSelector = null || (__hfCompId + var __hfRootSelector = "[data-composition-id=\"mondrian-bg\"]" || (__hfCompId ? '[data-composition-id="' + __hfEscapeAttr(__hfCompId) + '"]' : ""); var __hfRoot = null; @@ -754,7 +754,8 @@ var __hfRun = function() { try { (function(document, gsap, window, __hyperframes) { -(function () { + + (function () { const tl = gsap.timeline({ paused: true }); tl.to(".bar-1", { left: "100%", duration: 0.45, ease: "expo.out" }, 1.1); @@ -764,6 +765,7 @@ window.__timelines["mondrian-bg"] = tl; })(); + }).call(window, __hfScopedDocument, __hfScopedGsap, __hfScopedWindow, __hfScopedHyperframes); } catch (_err) { console.error(__hfErrorLabel, __hfCompId, _err); @@ -782,7 +784,7 @@ var __hfEscapeAttr = function(value) { return (value + "").replace(/\\/g, "\\\\").replace(/"/g, "\\\""); }; - var __hfRootSelector = null || (__hfCompId + var __hfRootSelector = "[data-composition-id=\"mondrian-colors\"]" || (__hfCompId ? '[data-composition-id="' + __hfEscapeAttr(__hfCompId) + '"]' : ""); var __hfRoot = null; @@ -1044,7 +1046,8 @@ var __hfRun = function() { try { (function(document, gsap, window, __hyperframes) { -(function () { + + (function () { const TRANSCRIPT = [ { text: "We", start: 0.14, end: 0.239 }, { text: "asked", start: 0.28, end: 0.459 }, @@ -1181,6 +1184,7 @@ window.__timelines["mondrian-colors"] = tl; })(); + }).call(window, __hfScopedDocument, __hfScopedGsap, __hfScopedWindow, __hfScopedHyperframes); } catch (_err) { console.error(__hfErrorLabel, __hfCompId, _err); @@ -1199,7 +1203,7 @@ var __hfEscapeAttr = function(value) { return (value + "").replace(/\\/g, "\\\\").replace(/"/g, "\\\""); }; - var __hfRootSelector = null || (__hfCompId + var __hfRootSelector = "[data-composition-id=\"mondrian-captions\"]" || (__hfCompId ? '[data-composition-id="' + __hfEscapeAttr(__hfCompId) + '"]' : ""); var __hfRoot = null; @@ -1461,7 +1465,8 @@ var __hfRun = function() { try { (function(document, gsap, window, __hyperframes) { -(function () { + + (function () { const TRANSCRIPT = [ { text: "We", start: 0.14, end: 0.239 }, { text: "asked", start: 0.28, end: 0.459 }, @@ -1551,6 +1556,7 @@ window.__timelines["mondrian-captions"] = tl; })(); + }).call(window, __hfScopedDocument, __hfScopedGsap, __hfScopedWindow, __hfScopedHyperframes); } catch (_err) { console.error(__hfErrorLabel, __hfCompId, _err); diff --git a/packages/producer/tests/style-12-prod/output/output.mp4 b/packages/producer/tests/style-12-prod/output/output.mp4 index 57ffe0b9c..1dce2a6c3 100644 --- a/packages/producer/tests/style-12-prod/output/output.mp4 +++ b/packages/producer/tests/style-12-prod/output/output.mp4 @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:04de22fa8fe2bd25a40ffb6577fcf6da7d0317a44ed753c291f238f1ae048ccc -size 8714819 +oid sha256:b217384799298e6e828bbeac09dc44c699db64c2902c6af7bc229df5dd73c101 +size 11091047 From bbd8996728e7f21418d0253ec9531344da89d8dc Mon Sep 17 00:00:00 2001 From: James Date: Sun, 17 May 2026 21:36:35 +0000 Subject: [PATCH 2/2] fix(regression-harness): clamp last checkpoint to a valid video frame MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two related fixes pulled out of CI failures on the Chrome pin run: 1. **regression-harness PSNR-parse crash on many-cuts.** Container duration includes audio padding past the last video frame (many-cuts: 5.654s container, 5.6s of video at 30fps = 168 frames). At i=99 the raw container duration mapped to time 5.59746s → frame index 168 (round(5.59746 * 30)), which is one past the last frame the stream contains. ffmpeg's `psnr` filter emits no `average:` line for a non-existent frame, so the harness crashed with `Unable to parse PSNR output at 5.59746s`. The fix subtracts one frame interval from the sampling duration so the last checkpoint always lands on a frame the video stream actually contains. PR #918 admin-merged through this same failure on shard-2 (so main is currently red on many-cuts), and Miguel's regen via `--update` didn't catch it because `--update` only writes the snapshot — it doesn't validate. 2. **style-1-prod baseline regen.** Same pattern as style-12-prod: PR #918's regen was done before / between the `refactor: extract shared inlineSubCompositions from bundler and producer` (581e7a7e) and the linkedom-fragment fix (754b0edc), so the committed baseline doesn't match what the compiler now emits. Reproduced locally: frames 14.62s onward fail at PSNR ~10-16 because the graphics sub-composition layer (`#a-roll-frame` overlay) now correctly renders through host duration but was absent in the committed baseline. Regenerated under the Chrome 148.0.7778.167 pin from this PR — now passes at PSNR 53-62 dB across all checkpoints. --- packages/producer/src/regression-harness.ts | 17 ++++++++++------- .../tests/style-1-prod/output/output.mp4 | 4 ++-- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/packages/producer/src/regression-harness.ts b/packages/producer/src/regression-harness.ts index 5b9d3c153..ad4f991fc 100644 --- a/packages/producer/src/regression-harness.ts +++ b/packages/producer/src/regression-harness.ts @@ -1079,16 +1079,19 @@ async function runTestSuite( videoMetadata.durationSeconds, snapshotMetadata.durationSeconds, ); + const fps = fpsToNumber(suite.meta.renderConfig.fps); + // Container duration includes audio padding past the last video frame + // (e.g. many-cuts: 5.654s container vs 5.6s of video). At i=99 the + // raw container duration maps to a frame index past nb_frames, and + // ffmpeg's PSNR filter emits no `average:` line for a non-existent + // frame. Subtract one frame interval so the last checkpoint always + // lands on a frame the video stream actually contains. + const sampleDuration = Math.max(0, videoDuration - 1 / fps); const minPsnrForMode = resolveMinPsnrForMode(options.mode, suite.meta.minPsnr); for (let i = 0; i < 100; i++) { - const time = (videoDuration * i) / 100; - const psnr = psnrAtCheckpoint( - renderedOutputPath, - snapshotVideoPath, - time, - fpsToNumber(suite.meta.renderConfig.fps), - ); + const time = (sampleDuration * i) / 100; + const psnr = psnrAtCheckpoint(renderedOutputPath, snapshotVideoPath, time, fps); visualCheckpoints.push({ time, psnr, diff --git a/packages/producer/tests/style-1-prod/output/output.mp4 b/packages/producer/tests/style-1-prod/output/output.mp4 index 65819d2f9..696141418 100644 --- a/packages/producer/tests/style-1-prod/output/output.mp4 +++ b/packages/producer/tests/style-1-prod/output/output.mp4 @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6aba60a5a3919132986e785b89f4b3434d23d337d95958a515b1fe2e11906721 -size 5154704 +oid sha256:d2fb75093153c6f03ade0848121b0072a4b91934935cba970d42883d456d9383 +size 8389204