Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 48 additions & 3 deletions packages/producer/src/regression-harness.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,11 @@ import { createRenderJob, executeRenderJob } from "./services/renderOrchestrator
import { compileForRender } from "./services/htmlCompiler.js";
import { validateCompilation } from "./services/compilationTester.js";
import { extractMediaMetadata } from "./utils/ffprobe.js";
import { buildRmsEnvelope, compareAudioEnvelopes } from "./utils/audioRegression.js";
import {
buildRmsEnvelope,
compareAudioEnvelopes,
computeAudioResidualRmsDb,
} from "./utils/audioRegression.js";
import { parseFps, fpsToNumber } from "@hyperframes/core";
import {
checkDistributedSupport,
Expand All @@ -38,6 +42,15 @@ type TestMetadata = {
maxFrameFailures: number;
minAudioCorrelation: number;
maxAudioLagWindows: number;
/**
* Optional Rio-style residual-RMS check. Subtracts the rendered audio
* from the baseline and reads the residual Overall RMS via `astats`.
* A value of `-50` (Rio's convention) treats residuals at-or-below
* -50 dBFS as effectively-silent — i.e. the streams are sample-level
* equivalent. Omit (undefined) to skip the check; in-process renders
* authored before this field was introduced have implicit `undefined`.
*/
maxAudioResidualRmsDb?: number;
renderConfig: {
/**
* Frame rate. Stored on disk as a JSON number (integer fps, e.g. `30`)
Expand Down Expand Up @@ -229,6 +242,12 @@ function validateMetadata(meta: unknown): TestMetadata {
if (typeof m.maxAudioLagWindows !== "number" || m.maxAudioLagWindows < 1) {
throw new Error("meta.json: 'maxAudioLagWindows' must be >= 1");
}
if (
m.maxAudioResidualRmsDb !== undefined &&
(typeof m.maxAudioResidualRmsDb !== "number" || !Number.isFinite(m.maxAudioResidualRmsDb))
) {
throw new Error("meta.json: 'maxAudioResidualRmsDb' must be a finite number when present");
}
if (!m.renderConfig || typeof m.renderConfig !== "object") {
throw new Error("meta.json: 'renderConfig' must be an object");
}
Expand Down Expand Up @@ -1051,6 +1070,7 @@ async function runTestSuite(
let audioPassed = true;
let audioCorrelation = 1;
let audioLagWindows = 0;
let audioResidualRmsDb: number | null = null;

if (!isPngSequence) {
logPretty("Comparing audio quality...", "🔊");
Expand All @@ -1068,6 +1088,24 @@ async function runTestSuite(
audioCorrelation = audio.correlation;
audioLagWindows = audio.lagWindows;
audioPassed = audio.correlation >= suite.meta.minAudioCorrelation;

// Rio-style residual RMS check, sample-level rather than
// envelope-level. Only runs when the fixture opts in by
// setting `maxAudioResidualRmsDb`; the envelope-correlation
// gate above stays in place either way for legacy fixtures
// (correlation is shape similarity; residual RMS is exact
// cancellation — both surface different drift classes).
if (suite.meta.maxAudioResidualRmsDb !== undefined) {
const residual = computeAudioResidualRmsDb(
renderedOutputPath,
snapshotVideoPath,
suite.meta.maxAudioResidualRmsDb,
);
audioResidualRmsDb = residual.overallDb;
if (!residual.ok) {
audioPassed = false;
}
}
}
}

Expand All @@ -1084,17 +1122,24 @@ async function runTestSuite(
passed: audioPassed,
correlation: audioCorrelation,
lagWindows: audioLagWindows,
residualRmsDb: audioResidualRmsDb,
}),
);

const residualSuffix =
audioResidualRmsDb === null
? ""
: `, residualRMS: ${
Number.isFinite(audioResidualRmsDb) ? audioResidualRmsDb.toFixed(2) : "-inf"
} dBFS`;
if (audioPassed) {
logPretty(
`Audio quality: PASSED (correlation: ${audioCorrelation.toFixed(3)}, lag: ${audioLagWindows})`,
`Audio quality: PASSED (correlation: ${audioCorrelation.toFixed(3)}, lag: ${audioLagWindows}${residualSuffix})`,
"✓",
);
} else {
logPretty(
`Audio quality: FAILED (correlation: ${audioCorrelation.toFixed(3)}, threshold: ${suite.meta.minAudioCorrelation})`,
`Audio quality: FAILED (correlation: ${audioCorrelation.toFixed(3)}, threshold: ${suite.meta.minAudioCorrelation}${residualSuffix})`,
"✗",
);
}
Expand Down
92 changes: 90 additions & 2 deletions packages/producer/src/utils/audioRegression.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
import { describe, expect, it } from "vitest";
import { buildRmsEnvelope, compareAudioEnvelopes } from "./audioRegression.js";
import { spawnSync } from "node:child_process";
import { mkdtempSync, rmSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { afterAll, beforeAll, describe, expect, it } from "vitest";
import {
buildRmsEnvelope,
compareAudioEnvelopes,
computeAudioResidualRmsDb,
} from "./audioRegression.js";

describe("compareAudioEnvelopes", () => {
it("treats silent-vs-silent audio as a perfect match", () => {
Expand All @@ -14,3 +22,83 @@ describe("compareAudioEnvelopes", () => {
});
});
});

// Skip the spawn-based tests entirely on hosts without ffmpeg. The
// regression harness only runs in environments where ffmpeg is present
// (`Dockerfile.test`, dev boxes with apt's ffmpeg), so an absent ffmpeg
// is a developer-laptop fact, not a producer regression.
const HAS_FFMPEG = spawnSync("ffmpeg", ["-version"], { encoding: "utf-8" }).status === 0;

describe.skipIf(!HAS_FFMPEG)("computeAudioResidualRmsDb", () => {
let tmp: string;

beforeAll(() => {
tmp = mkdtempSync(join(tmpdir(), "hf-audio-residual-test-"));
// Two test wavs: identical 1-second 440 Hz sine, and a 880 Hz sine
// that's audibly different from the 440 reference.
for (const [name, freq] of [
["sine-440-a.wav", 440],
["sine-440-b.wav", 440],
["sine-880.wav", 880],
] as const) {
const result = spawnSync(
"ffmpeg",
[
"-nostdin",
"-v",
"error",
"-f",
"lavfi",
"-i",
`sine=frequency=${freq}:duration=1:sample_rate=48000`,
"-ac",
"2",
"-c:a",
"pcm_s16le",
join(tmp, name),
],
{ encoding: "utf-8" },
);
if (result.status !== 0) {
throw new Error(`ffmpeg setup failed for ${name}: ${result.stderr}`);
}
}
});

afterAll(() => {
rmSync(tmp, { recursive: true, force: true });
});

it("returns -inf (or very low dBFS) for two identical streams", () => {
const result = computeAudioResidualRmsDb(
join(tmp, "sine-440-a.wav"),
join(tmp, "sine-440-b.wav"),
);
expect(result.ok).toBe(true);
// 440-vs-440 PCM cancels to silence; ffmpeg reports -inf which we
// normalize to NEGATIVE_INFINITY, OR a value well below -90 if the
// resampler introduces sub-bit-quantization noise.
expect(result.overallDb).toBeLessThan(-80);
});

it("fails when streams are audibly different (440 Hz vs 880 Hz)", () => {
const result = computeAudioResidualRmsDb(
join(tmp, "sine-440-a.wav"),
join(tmp, "sine-880.wav"),
);
expect(result.ok).toBe(false);
// The residual of two uncorrelated unit-amplitude sines is roughly
// the sum of both signals at near-full level — typically around
// -3 dBFS in this resampled-stereo configuration.
expect(result.overallDb).toBeGreaterThan(-30);
});

it("reports ok=false when an input has no audio stream", () => {
// A bare empty file: ffmpeg can't probe it, so the function reports
// a parse failure (ok=false, NaN). Callers decide whether to treat
// that as a pass (no-audio fixture) or a fail (audio expected).
const result = computeAudioResidualRmsDb("/dev/null", join(tmp, "sine-440-a.wav"));
expect(result.ok).toBe(false);
expect(Number.isNaN(result.overallDb)).toBe(true);
});
});
131 changes: 131 additions & 0 deletions packages/producer/src/utils/audioRegression.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,134 @@ export function compareAudioEnvelopes(

return bestEnvelopeCorrelation(rendered, snapshot, maxLagWindows);
}

// ── Sample-level residual RMS ───────────────────────────────────────────────
//
// Rio-style precise equivalence check: subtract one audio stream from
// the other, run `astats`, read the residual Overall RMS in dBFS.
// Perfectly-equivalent streams produce silence (≤ -90 dBFS in practice
// for AAC-vs-AAC); the Rio convention is `≤ -50 dBFS = effectively
// identical`.
//
// This catches level/phase drift the envelope-correlation check cannot.
// Correlation measures shape similarity at envelope granularity (2048-
// sample windows by default); residual RMS measures sample-level
// cancellation, so it falls out as soon as the two streams disagree by
// a fraction of a sample in alignment or by a fraction of a dB in
// level.
//
// `astats` is invoked via `ffmpeg` spawned in-process. We require ffmpeg
// on PATH — the regression harness already requires it for encode +
// envelope extraction.

import { spawnSync } from "node:child_process";

/**
* Result of {@link computeAudioResidualRmsDb}.
*
* `overallDb` is the residual Overall RMS reading from astats. For
* exact-cancellation (truly identical streams), ffmpeg returns `-inf`;
* this helper normalizes that to `Number.NEGATIVE_INFINITY` so callers
* don't have to special-case the literal string.
*/
export interface AudioResidualRms {
overallDb: number;
ok: boolean;
/** Raw stderr lines that mention `RMS level` (one per channel + overall). Useful for debugging unexpected drift. */
rmsLines: string[];
}

/**
* Compute the residual Overall RMS (dBFS) of `rendered - snapshot`.
*
* Both inputs are paths to media files containing an audio stream.
* They're resampled to 48 kHz stereo, the snapshot is phase-inverted,
* the two are summed via `amix`, and `astats` reports the residual
* level.
*
* Returns `{ ok: false, overallDb: NaN }` if either input lacks an
* audio stream, or if ffmpeg's output didn't contain a parseable RMS
* line — the caller decides whether that's a pass (no-audio fixture)
* or a fail (audio expected but missing).
*
* `maxResidualRmsDb` defaults to `-50` (Rio convention). Pass `-Infinity`
* to compute the value without gating it.
*/
export function computeAudioResidualRmsDb(
rendered: string,
snapshot: string,
maxResidualRmsDb = -50,
): AudioResidualRms {
const proc = spawnSync(
"ffmpeg",
[
"-nostdin",
"-v",
"info",
"-i",
rendered,
"-i",
snapshot,
"-filter_complex",
// Align both streams (resample + stereo + zero-based PTS), invert the
// snapshot, sum via amix, run astats. Avoids amix's `normalize`
// option (not available on ffmpeg 4.x) — we use volume=-1 + amix to
// subtract.
[
"[0:a]aresample=48000,pan=stereo|c0=c0|c1=c1,asetpts=N/SR/TB[a0]",
"[1:a]aresample=48000,pan=stereo|c0=c0|c1=c1,asetpts=N/SR/TB,volume=-1[a1]",
"[a0][a1]amix=inputs=2:duration=shortest:dropout_transition=0,astats=metadata=1:reset=1[out]",
].join(";"),
"-map",
"[out]",
"-f",
"null",
"-",
],
{ encoding: "utf-8" },
);

const stderr = proc.stderr || "";
// Per-channel + overall RMS lines look like:
// [Parsed_astats_8 @ 0x...] Overall RMS level dB: -90.32
// [Parsed_astats_8 @ 0x...] RMS level dB: -90.36 (per-channel; no "Overall" prefix)
// Older ffmpeg builds use `Overall RMS level: -inf dB` — handle both shapes.
const rmsLines = stderr.split(/\r?\n/).filter((line) => /RMS level/.test(line));

// Prefer the "Overall" line if it appears; otherwise take the max
// per-channel RMS (the most pessimistic channel — that's what Rio
// does as its fallback path).
const overall = pickRms(rmsLines, /Overall RMS level(?:\s*dB)?:\s*(-?inf|[-\d.]+)/i);
const channelMax =
pickRms(rmsLines, /RMS level\s*dB:\s*(-?inf|[-\d.]+)/i, "max") ??
pickRms(rmsLines, /RMS level:\s*(-?inf|[-\d.]+)/i, "max");

const value = overall ?? channelMax;
if (value === null) {
return { overallDb: Number.NaN, ok: false, rmsLines };
}
return {
overallDb: value,
ok: value <= maxResidualRmsDb,
rmsLines,
};
}

function pickRms(lines: string[], re: RegExp, mode: "first" | "max" = "first"): number | null {
const values: number[] = [];
for (const line of lines) {
const m = re.exec(line);
if (!m) continue;
const raw = m[1];
if (raw === "-inf" || raw === "inf") {
values.push(Number.NEGATIVE_INFINITY);
} else {
const n = Number.parseFloat(raw ?? "");
if (!Number.isNaN(n)) values.push(n);
}
if (mode === "first") break;
}
if (values.length === 0) return null;
if (mode === "max") return Math.max(...values);
return values[0] ?? null;
}
Loading