From eb325687e4976e5713962b8d6531ddc0739f88dc Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sun, 15 Feb 2026 01:03:45 +0000 Subject: [PATCH 01/27] feat: GPU NV12 export path + export-optimized encoder settings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Major export pipeline speed optimization: 1. GPU NV12 Export Path (Phase 1): - Added render_video_to_channel_nv12() that renders frames and converts RGBA→NV12 on the GPU using the existing compute shader - Export now reads back NV12 (1.5 bytes/pixel) instead of RGBA (4 bytes/pixel) = 62% less GPU→CPU data transfer per frame - Eliminates CPU swscale RGBA→NV12 conversion entirely - Uses with_external_conversion() to skip encoder's internal converter - NV12 frames fed directly to H264 encoder via queue_frame() - Falls back to RGBA path if output dimensions aren't NV12-compatible - Screenshots generated from NV12 using cpu_yuv::nv12_to_rgba_simd() 2. Export-Optimized Encoder Settings (Phase 2): - VideoToolbox: realtime=false, profile=main (was realtime=true, baseline) - NVENC: preset=p5, tune=hq (was p4, tune=ll) - QSV: preset=medium, look_ahead_depth=20 (was faster) - AMF: quality=quality, rc=vbr_peak (was balanced, vbr_latency) - libx264: preset=veryfast (was ultrafast with zerolatency) - These settings optimize for throughput/quality instead of latency 3. Pipeline Tuning (Phase 4): - Increased channel buffer sizes from 8 to 16 for both stages - Allows better pipeline overlap between render/process/encode stages Added Nv12RenderedFrame helper methods (y_plane, uv_plane, clone_metadata_with_data) and NV12→ffmpeg frame conversion utility. Co-authored-by: Richie McIlroy --- crates/enc-ffmpeg/src/video/h264.rs | 106 +++++-- crates/export/src/mp4.rs | 388 ++++++++++++++++++++++++- crates/rendering/src/frame_pipeline.rs | 28 ++ crates/rendering/src/lib.rs | 255 ++++++++++++++++ 4 files changed, 741 insertions(+), 36 deletions(-) diff --git a/crates/enc-ffmpeg/src/video/h264.rs b/crates/enc-ffmpeg/src/video/h264.rs index eb793341b8e..29efdb58904 100644 --- a/crates/enc-ffmpeg/src/video/h264.rs +++ b/crates/enc-ffmpeg/src/video/h264.rs @@ -27,6 +27,7 @@ pub struct H264EncoderBuilder { output_size: Option<(u32, u32)>, external_conversion: bool, encoder_priority_override: Option<&'static [&'static str]>, + is_export: bool, } #[derive(Clone, Copy, Debug, PartialEq)] @@ -60,6 +61,7 @@ impl H264EncoderBuilder { output_size: None, external_conversion: false, encoder_priority_override: None, + is_export: false, } } @@ -94,6 +96,11 @@ impl H264EncoderBuilder { self } + pub fn with_export_settings(mut self) -> Self { + self.is_export = true; + self + } + pub fn build( self, output: &mut format::context::Output, @@ -116,8 +123,12 @@ impl H264EncoderBuilder { ); } - let candidates = - get_codec_and_options(&input_config, self.preset, self.encoder_priority_override); + let candidates = get_codec_and_options( + &input_config, + self.preset, + self.encoder_priority_override, + self.is_export, + ); if candidates.is_empty() { return Err(H264EncoderError::CodecNotFound); } @@ -671,6 +682,7 @@ fn get_codec_and_options( config: &VideoInfo, preset: H264Preset, encoder_priority_override: Option<&'static [&'static str]>, + is_export: bool, ) -> Vec<(Codec, Dictionary<'static>)> { let keyframe_interval_secs = DEFAULT_KEYFRAME_INTERVAL_SECS; let denominator = config.frame_rate.denominator(); @@ -695,45 +707,87 @@ fn get_codec_and_options( match *encoder_name { "h264_videotoolbox" => { - options.set("realtime", "true"); - options.set("prio_speed", "true"); - options.set("profile", "baseline"); + if is_export { + options.set("realtime", "false"); + options.set("profile", "main"); + options.set("allow_sw", "0"); + } else { + options.set("realtime", "true"); + options.set("prio_speed", "true"); + options.set("profile", "baseline"); + } } "h264_nvenc" => { - options.set("preset", "p4"); - options.set("tune", "ll"); - options.set("rc", "vbr"); - options.set("spatial-aq", "1"); - options.set("temporal-aq", "1"); + if is_export { + options.set("preset", "p5"); + options.set("tune", "hq"); + options.set("rc", "vbr"); + options.set("spatial-aq", "1"); + options.set("temporal-aq", "1"); + options.set("b_ref_mode", "middle"); + } else { + options.set("preset", "p4"); + options.set("tune", "ll"); + options.set("rc", "vbr"); + options.set("spatial-aq", "1"); + options.set("temporal-aq", "1"); + } options.set("g", &keyframe_interval_str); } "h264_qsv" => { - options.set("preset", "faster"); - options.set("look_ahead", "1"); + if is_export { + options.set("preset", "medium"); + options.set("look_ahead", "1"); + options.set("look_ahead_depth", "20"); + } else { + options.set("preset", "faster"); + options.set("look_ahead", "1"); + } options.set("g", &keyframe_interval_str); } "h264_amf" => { - options.set("quality", "balanced"); - options.set("rc", "vbr_latency"); + if is_export { + options.set("quality", "quality"); + options.set("rc", "vbr_peak"); + } else { + options.set("quality", "balanced"); + options.set("rc", "vbr_latency"); + } options.set("g", &keyframe_interval_str); } "h264_mf" => { options.set("hw_encoding", "true"); - options.set("scenario", "4"); - options.set("quality", "1"); + if is_export { + options.set("scenario", "0"); + options.set("quality", "0"); + } else { + options.set("scenario", "4"); + options.set("quality", "1"); + } options.set("g", &keyframe_interval_str); } "libx264" => { - options.set( - "preset", - match preset { - H264Preset::Slow => "slow", - H264Preset::Medium => "medium", - H264Preset::Ultrafast | H264Preset::HighThroughput => "ultrafast", - }, - ); - if matches!(preset, H264Preset::Ultrafast | H264Preset::HighThroughput) { - options.set("tune", "zerolatency"); + if is_export { + options.set( + "preset", + match preset { + H264Preset::Slow => "slow", + H264Preset::Medium => "medium", + _ => "veryfast", + }, + ); + } else { + options.set( + "preset", + match preset { + H264Preset::Slow => "slow", + H264Preset::Medium => "medium", + H264Preset::Ultrafast | H264Preset::HighThroughput => "ultrafast", + }, + ); + if matches!(preset, H264Preset::Ultrafast | H264Preset::HighThroughput) { + options.set("tune", "zerolatency"); + } } options.set("vsync", "1"); options.set("g", &keyframe_interval_str); diff --git a/crates/export/src/mp4.rs b/crates/export/src/mp4.rs index acdc56d290e..51de472956d 100644 --- a/crates/export/src/mp4.rs +++ b/crates/export/src/mp4.rs @@ -3,7 +3,7 @@ use cap_editor::{AudioRenderer, get_audio_segments}; use cap_enc_ffmpeg::{AudioEncoder, aac::AACEncoder, h264::H264Encoder, mp4::*}; use cap_media_info::{RawVideoFormat, VideoInfo}; use cap_project::XY; -use cap_rendering::{ProjectUniforms, RenderSegment, RenderedFrame}; +use cap_rendering::{Nv12RenderedFrame, ProjectUniforms, RenderSegment}; use futures::FutureExt; use image::ImageBuffer; use serde::Deserialize; @@ -51,17 +51,11 @@ impl Mp4ExportSettings { pub async fn export( self, base: ExporterBase, - mut on_progress: impl FnMut(u32) -> bool + Send + 'static, + on_progress: impl FnMut(u32) -> bool + Send + 'static, ) -> Result { - let output_path = base.output_path.clone(); - let meta = &base.studio_meta; - info!("Exporting mp4 with settings: {:?}", &self); info!("Expected to render {} frames", base.total_frames(self.fps)); - let (tx_image_data, mut video_rx) = tokio::sync::mpsc::channel::<(RenderedFrame, u32)>(8); - let (frame_tx, frame_rx) = std::sync::mpsc::sync_channel::(8); - let fps = self.fps; let output_size = ProjectUniforms::get_output_size( @@ -70,6 +64,273 @@ impl Mp4ExportSettings { self.resolution_base, ); + let nv12_compatible = output_size.0.is_multiple_of(4) && output_size.1.is_multiple_of(2); + + if nv12_compatible { + info!( + width = output_size.0, + height = output_size.1, + "Using GPU NV12 export path (reduced readback + no CPU swscale)" + ); + self.export_nv12(base, output_size, fps, on_progress).await + } else { + info!( + width = output_size.0, + height = output_size.1, + "Falling back to RGBA export path (dimensions not NV12-compatible)" + ); + self.export_rgba(base, output_size, fps, on_progress).await + } + } + + async fn export_nv12( + self, + base: ExporterBase, + output_size: (u32, u32), + fps: u32, + mut on_progress: impl FnMut(u32) -> bool + Send + 'static, + ) -> Result { + let output_path = base.output_path.clone(); + let meta = &base.studio_meta; + + let (tx_image_data, mut video_rx) = + tokio::sync::mpsc::channel::<(Nv12RenderedFrame, u32)>(16); + let (frame_tx, frame_rx) = std::sync::mpsc::sync_channel::(16); + + let mut video_info = + VideoInfo::from_raw(RawVideoFormat::Nv12, output_size.0, output_size.1, fps); + video_info.time_base = ffmpeg::Rational::new(1, fps as i32); + + let audio_segments = get_audio_segments(&base.segments); + + let mut audio_renderer = audio_segments + .first() + .filter(|_| !base.project_config.audio.mute) + .map(|_| AudioRenderer::new(audio_segments.clone())); + let has_audio = audio_renderer.is_some(); + + let encoder_thread = tokio::task::spawn_blocking(move || { + trace!("Creating MP4File encoder (NV12 path)"); + + let mut encoder = MP4File::init( + "output", + base.output_path.clone(), + |o| { + H264Encoder::builder(video_info) + .with_bpp(self.effective_bpp()) + .with_export_priority() + .with_export_settings() + .with_external_conversion() + .build(o) + }, + |o| { + has_audio.then(|| { + AACEncoder::init(AudioRenderer::info(), o) + .map(|v| v.boxed()) + .map_err(Into::into) + }) + }, + ) + .map_err(|v| v.to_string())?; + + info!("Created MP4File encoder (NV12, external conversion, export settings)"); + + while let Ok(frame) = frame_rx.recv() { + encoder + .queue_video_frame(frame.video, Duration::MAX) + .map_err(|err| err.to_string())?; + if let Some(audio) = frame.audio { + encoder.queue_audio_frame(audio); + } + } + + let res = encoder + .finish() + .map_err(|e| format!("Failed to finish encoding: {e}"))?; + + if let Err(e) = res.video_finish { + return Err(format!("Video encoding failed: {e}")); + } + if let Err(e) = res.audio_finish { + return Err(format!("Audio encoding failed: {e}")); + } + + Ok::<_, String>(base.output_path) + }) + .then(|r| async { r.map_err(|e| e.to_string()).and_then(|v| v) }); + + let render_task = tokio::spawn({ + let project = base.project_config.clone(); + let project_path = base.project_path.clone(); + async move { + let mut frame_count = 0; + let mut first_frame_data: Option = None; + let sample_rate = u64::from(AudioRenderer::SAMPLE_RATE); + let fps_u64 = u64::from(fps); + let mut audio_sample_cursor = 0u64; + + let mut consecutive_timeouts = 0u32; + const MAX_CONSECUTIVE_TIMEOUTS: u32 = 3; + + loop { + let timeout_secs = if frame_count == 0 { 120 } else { 90 }; + let (frame, frame_number) = match tokio::time::timeout( + Duration::from_secs(timeout_secs), + video_rx.recv(), + ) + .await + { + Err(_) => { + consecutive_timeouts += 1; + + if consecutive_timeouts >= MAX_CONSECUTIVE_TIMEOUTS { + tracing::error!( + frame_count = frame_count, + timeout_secs = timeout_secs, + consecutive_timeouts = consecutive_timeouts, + "Export render_task timed out {} consecutive times - aborting", + MAX_CONSECUTIVE_TIMEOUTS + ); + return Err(format!( + "Export timed out {MAX_CONSECUTIVE_TIMEOUTS} times consecutively after {timeout_secs}s each waiting for frame {frame_count} - GPU/decoder may be unresponsive" + )); + } + + tracing::warn!( + frame_count = frame_count, + timeout_secs = timeout_secs, + consecutive_timeouts = consecutive_timeouts, + "Frame receive timed out, waiting for next frame..." + ); + continue; + } + Ok(Some(v)) => { + consecutive_timeouts = 0; + v + } + Ok(None) => { + tracing::debug!( + frame_count = frame_count, + "Render channel closed - rendering complete" + ); + break; + } + }; + + if !(on_progress)(frame_count) { + return Err("Export cancelled".to_string()); + } + + if frame_count == 0 { + first_frame_data = Some(FirstFrameNv12 { + data: frame.data.clone(), + width: frame.width, + height: frame.height, + y_stride: frame.y_stride, + }); + if let Some(audio) = &mut audio_renderer { + audio.set_playhead(0.0, &project); + } + } + + let audio_frame = audio_renderer.as_mut().and_then(|audio| { + let n = u64::from(frame_number); + let end = ((n + 1) * sample_rate) / fps_u64; + if end <= audio_sample_cursor { + return None; + } + let pts = audio_sample_cursor as i64; + let samples = (end - audio_sample_cursor) as usize; + audio_sample_cursor = end; + audio.render_frame(samples, &project).map(|mut frame| { + frame.set_pts(Some(pts)); + frame + }) + }); + + let video_frame = nv12_to_ffmpeg_frame(&frame, frame_number as i64); + + if frame_tx + .send(MP4Input { + audio: audio_frame, + video: video_frame, + }) + .is_err() + { + warn!("Renderer task sender dropped. Exiting"); + return Ok(()); + } + + frame_count += 1; + } + + drop(frame_tx); + + if let Some(first) = first_frame_data { + let project_path = project_path.clone(); + let screenshot_task = tokio::task::spawn_blocking(move || { + save_screenshot_from_nv12( + &first.data, + first.width, + first.height, + first.y_stride, + &project_path, + ); + }); + + if let Err(e) = screenshot_task.await { + warn!("Screenshot task failed: {e}"); + } + } else { + warn!("No frames were processed, cannot save screenshot or thumbnail"); + } + + Ok::<_, String>(()) + } + }) + .then(|r| async { + r.map_err(|e| e.to_string()) + .and_then(|v| v.map_err(|e| e.to_string())) + }); + + let render_video_task = cap_rendering::render_video_to_channel_nv12( + &base.render_constants, + &base.project_config, + tx_image_data, + &base.recording_meta, + meta, + base.segments + .iter() + .map(|s| RenderSegment { + cursor: s.cursor.clone(), + decoders: s.decoders.clone(), + }) + .collect(), + fps, + self.resolution_base, + &base.recordings, + ) + .then(|v| async { v.map_err(|e| e.to_string()) }); + + tokio::try_join!(encoder_thread, render_video_task, render_task)?; + + Ok(output_path) + } + + async fn export_rgba( + self, + base: ExporterBase, + output_size: (u32, u32), + fps: u32, + mut on_progress: impl FnMut(u32) -> bool + Send + 'static, + ) -> Result { + let output_path = base.output_path.clone(); + let meta = &base.studio_meta; + + let (tx_image_data, mut video_rx) = + tokio::sync::mpsc::channel::<(cap_rendering::RenderedFrame, u32)>(16); + let (frame_tx, frame_rx) = std::sync::mpsc::sync_channel::(16); + let mut video_info = VideoInfo::from_raw(RawVideoFormat::Rgba, output_size.0, output_size.1, fps); video_info.time_base = ffmpeg::Rational::new(1, fps as i32); @@ -83,7 +344,7 @@ impl Mp4ExportSettings { let has_audio = audio_renderer.is_some(); let encoder_thread = tokio::task::spawn_blocking(move || { - trace!("Creating MP4File encoder"); + trace!("Creating MP4File encoder (RGBA fallback)"); let mut encoder = MP4File::init( "output", @@ -92,6 +353,7 @@ impl Mp4ExportSettings { H264Encoder::builder(video_info) .with_bpp(self.effective_bpp()) .with_export_priority() + .with_export_settings() .build(o) }, |o| { @@ -104,7 +366,7 @@ impl Mp4ExportSettings { ) .map_err(|v| v.to_string())?; - info!("Created MP4File encoder"); + info!("Created MP4File encoder (RGBA fallback, export settings)"); while let Ok(frame) = frame_rx.recv() { encoder @@ -304,6 +566,112 @@ impl Mp4ExportSettings { } } +struct FirstFrameNv12 { + data: Vec, + width: u32, + height: u32, + y_stride: u32, +} + +fn nv12_to_ffmpeg_frame(frame: &Nv12RenderedFrame, pts: i64) -> ffmpeg::frame::Video { + let width = frame.width; + let height = frame.height; + let y_stride = frame.y_stride; + + let mut video_frame = ffmpeg::frame::Video::new(ffmpeg::format::Pixel::NV12, width, height); + video_frame.set_pts(Some(pts)); + + let y_plane_size = (y_stride as usize) * (height as usize); + let y_src = &frame.data[..y_plane_size.min(frame.data.len())]; + let uv_src = if y_plane_size < frame.data.len() { + &frame.data[y_plane_size..] + } else { + &[] + }; + + let dst_y_stride = video_frame.stride(0); + let dst_uv_stride = video_frame.stride(1); + + if dst_y_stride == y_stride as usize { + let copy_len = y_src.len().min(video_frame.data_mut(0).len()); + video_frame.data_mut(0)[..copy_len].copy_from_slice(&y_src[..copy_len]); + } else { + for row in 0..height as usize { + let src_start = row * y_stride as usize; + let dst_start = row * dst_y_stride; + let copy_width = (width as usize).min(y_stride as usize).min(dst_y_stride); + if src_start + copy_width <= y_src.len() + && dst_start + copy_width <= video_frame.data_mut(0).len() + { + video_frame.data_mut(0)[dst_start..dst_start + copy_width] + .copy_from_slice(&y_src[src_start..src_start + copy_width]); + } + } + } + + let uv_height = height as usize / 2; + let uv_width = width as usize; + if dst_uv_stride == uv_width { + let copy_len = uv_src.len().min(video_frame.data_mut(1).len()); + video_frame.data_mut(1)[..copy_len].copy_from_slice(&uv_src[..copy_len]); + } else { + for row in 0..uv_height { + let src_start = row * uv_width; + let dst_start = row * dst_uv_stride; + let copy_width = uv_width.min(dst_uv_stride); + if src_start + copy_width <= uv_src.len() + && dst_start + copy_width <= video_frame.data_mut(1).len() + { + video_frame.data_mut(1)[dst_start..dst_start + copy_width] + .copy_from_slice(&uv_src[src_start..src_start + copy_width]); + } + } + } + + video_frame +} + +fn save_screenshot_from_nv12( + nv12_data: &[u8], + width: u32, + height: u32, + y_stride: u32, + project_path: &std::path::Path, +) { + let y_plane_size = (y_stride as usize) * (height as usize); + let y_data = &nv12_data[..y_plane_size.min(nv12_data.len())]; + let uv_data = if y_plane_size < nv12_data.len() { + &nv12_data[y_plane_size..] + } else { + return; + }; + + let mut rgba = vec![0u8; (width * height * 4) as usize]; + cap_rendering::cpu_yuv::nv12_to_rgba_simd( + y_data, uv_data, width, height, y_stride, width, &mut rgba, + ); + + let rgb_img = ImageBuffer::, Vec>::from_raw( + width, + height, + rgba.chunks(4) + .flat_map(|chunk| [chunk[0], chunk[1], chunk[2]]) + .collect::>(), + ); + + let Some(rgb_img) = rgb_img else { + return; + }; + + let screenshots_dir = project_path.join("screenshots"); + if std::fs::create_dir_all(&screenshots_dir).is_err() { + return; + } + + let screenshot_path = screenshots_dir.join("display.jpg"); + let _ = rgb_img.save(&screenshot_path); +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/rendering/src/frame_pipeline.rs b/crates/rendering/src/frame_pipeline.rs index 5b9e207ee24..b1348e93956 100644 --- a/crates/rendering/src/frame_pipeline.rs +++ b/crates/rendering/src/frame_pipeline.rs @@ -336,6 +336,34 @@ pub struct Nv12RenderedFrame { pub format: GpuOutputFormat, } +impl Nv12RenderedFrame { + pub fn clone_metadata_with_data(&self) -> Self { + Self { + data: self.data.clone(), + width: self.width, + height: self.height, + y_stride: self.y_stride, + frame_number: self.frame_number, + target_time_ns: self.target_time_ns, + format: self.format, + } + } + + pub fn y_plane(&self) -> &[u8] { + let y_size = (self.y_stride as usize) * (self.height as usize); + &self.data[..y_size.min(self.data.len())] + } + + pub fn uv_plane(&self) -> &[u8] { + let y_size = (self.y_stride as usize) * (self.height as usize); + if y_size < self.data.len() { + &self.data[y_size..] + } else { + &[] + } + } +} + pub struct PendingReadback { rx: oneshot::Receiver>, buffer: Arc, diff --git a/crates/rendering/src/lib.rs b/crates/rendering/src/lib.rs index 0d693f35fd6..d18a0b64129 100644 --- a/crates/rendering/src/lib.rs +++ b/crates/rendering/src/lib.rs @@ -271,6 +271,8 @@ pub enum RenderingError { BufferMapFailed(#[from] wgpu::BufferAsyncError), #[error("Sending frame to channel failed")] ChannelSendFrameFailed(#[from] mpsc::error::SendError<(RenderedFrame, u32)>), + #[error("Sending NV12 frame to channel failed")] + ChannelSendNv12FrameFailed(#[from] mpsc::error::SendError<(Nv12RenderedFrame, u32)>), #[error("Failed to load image: {0}")] ImageLoadError(String), #[error("Error polling wgpu: {0}")] @@ -551,6 +553,259 @@ pub async fn render_video_to_channel( Ok(()) } +#[allow(clippy::too_many_arguments)] +pub async fn render_video_to_channel_nv12( + constants: &RenderVideoConstants, + project: &ProjectConfiguration, + sender: mpsc::Sender<(Nv12RenderedFrame, u32)>, + recording_meta: &RecordingMeta, + meta: &StudioRecordingMeta, + render_segments: Vec, + fps: u32, + resolution_base: XY, + recordings: &ProjectRecordingsMeta, +) -> Result<(), RenderingError> { + ffmpeg::init().unwrap(); + + let start_time = Instant::now(); + + let duration = get_duration(recordings, recording_meta, meta, project); + + let total_frames = (fps as f64 * duration).ceil() as u32; + + let cursor_smoothing = + (!project.cursor.raw).then_some(spring_mass_damper::SpringMassDamperSimulationConfig { + tension: project.cursor.tension, + mass: project.cursor.mass, + friction: project.cursor.friction, + }); + + let zoom_focus_interpolators: Vec = render_segments + .iter() + .map(|segment| { + let mut interp = ZoomFocusInterpolator::new( + &segment.cursor, + cursor_smoothing, + project.screen_movement_spring, + duration, + ); + interp.precompute(); + interp + }) + .collect(); + + let mut frame_number = 0; + + let mut frame_renderer = FrameRenderer::new(constants); + + let mut layers = RendererLayers::new_with_options( + &constants.device, + &constants.queue, + constants.is_software_adapter, + ); + + if let Some(first_segment) = render_segments.first() { + let (screen_w, screen_h) = first_segment.decoders.screen_video_dimensions(); + let camera_dims = first_segment.decoders.camera_video_dimensions(); + layers.prepare_for_video_dimensions( + &constants.device, + screen_w, + screen_h, + camera_dims.map(|(w, _)| w), + camera_dims.map(|(_, h)| h), + ); + } + + let mut last_successful_frame: Option = None; + let mut consecutive_failures = 0u32; + const MAX_CONSECUTIVE_FAILURES: u32 = 200; + + loop { + if frame_number >= total_frames { + break; + } + + let Some((segment_time, segment)) = + project.get_segment_time(frame_number as f64 / fps as f64) + else { + break; + }; + + let clip_config = project + .clips + .iter() + .find(|v| v.index == segment.recording_clip); + + let current_frame_number = { + let prev = frame_number; + std::mem::replace(&mut frame_number, prev + 1) + }; + + let render_segment = &render_segments[segment.recording_clip as usize]; + + let mut segment_frames = None; + let mut retry_count = 0; + const MAX_RETRIES: u32 = 5; + let is_initial_frame = current_frame_number == 0 || last_successful_frame.is_none(); + + while segment_frames.is_none() && retry_count < MAX_RETRIES { + if retry_count > 0 { + let delay = if is_initial_frame { + 500 * (retry_count as u64 + 1) + } else { + 50 * retry_count as u64 + }; + tokio::time::sleep(std::time::Duration::from_millis(delay)).await; + } + + segment_frames = if is_initial_frame { + render_segment + .decoders + .get_frames_initial( + segment_time as f32, + !project.camera.hide, + clip_config.map(|v| v.offsets).unwrap_or_default(), + ) + .await + } else { + render_segment + .decoders + .get_frames( + segment_time as f32, + !project.camera.hide, + clip_config.map(|v| v.offsets).unwrap_or_default(), + ) + .await + }; + + if segment_frames.is_none() { + retry_count += 1; + if retry_count < MAX_RETRIES { + tracing::warn!( + frame_number = current_frame_number, + segment_time = segment_time, + retry_count = retry_count, + is_initial = is_initial_frame, + "Frame decode failed, retrying..." + ); + } + } + } + + let frame = if let Some(segment_frames) = segment_frames { + consecutive_failures = 0; + + let zoom_focus_interp = &zoom_focus_interpolators[segment.recording_clip as usize]; + + let uniforms = ProjectUniforms::new( + constants, + project, + current_frame_number, + fps, + resolution_base, + &render_segment.cursor, + &segment_frames, + duration, + zoom_focus_interp, + ); + + match frame_renderer + .render_nv12( + segment_frames, + uniforms, + &render_segment.cursor, + &mut layers, + ) + .await + { + Ok(frame) if frame.width > 0 && frame.height > 0 => { + last_successful_frame = Some(frame.clone_metadata_with_data()); + frame + } + Ok(_) => { + tracing::warn!( + frame_number = current_frame_number, + "Rendered NV12 frame has zero dimensions" + ); + if let Some(ref last_frame) = last_successful_frame { + let mut fallback = last_frame.clone_metadata_with_data(); + fallback.frame_number = current_frame_number; + fallback.target_time_ns = + (current_frame_number as u64 * 1_000_000_000) / fps as u64; + fallback + } else { + continue; + } + } + Err(e) => { + tracing::error!( + frame_number = current_frame_number, + error = %e, + "NV12 frame rendering failed" + ); + if let Some(ref last_frame) = last_successful_frame { + let mut fallback = last_frame.clone_metadata_with_data(); + fallback.frame_number = current_frame_number; + fallback.target_time_ns = + (current_frame_number as u64 * 1_000_000_000) / fps as u64; + fallback + } else { + return Err(e); + } + } + } + } else { + consecutive_failures += 1; + + if consecutive_failures >= MAX_CONSECUTIVE_FAILURES { + tracing::error!( + frame_number = current_frame_number, + consecutive_failures = consecutive_failures, + "Too many consecutive frame failures - aborting export" + ); + return Err(RenderingError::FrameDecodeFailed { + frame_number: current_frame_number, + consecutive_failures, + }); + } + + if let Some(ref last_frame) = last_successful_frame { + tracing::warn!( + frame_number = current_frame_number, + segment_time = segment_time, + consecutive_failures = consecutive_failures, + max_retries = MAX_RETRIES, + "Frame decode failed after retries - using previous NV12 frame" + ); + let mut fallback = last_frame.clone_metadata_with_data(); + fallback.frame_number = current_frame_number; + fallback.target_time_ns = + (current_frame_number as u64 * 1_000_000_000) / fps as u64; + fallback + } else { + tracing::error!( + frame_number = current_frame_number, + segment_time = segment_time, + max_retries = MAX_RETRIES, + "First frame decode failed after retries - cannot continue" + ); + continue; + } + }; + + sender.send((frame, current_frame_number)).await?; + } + + let total_time = start_time.elapsed(); + tracing::info!( + frames = frame_number, + elapsed_secs = format!("{:.2}", total_time.as_secs_f32()), + "NV12 render complete" + ); + + Ok(()) +} + pub fn get_duration( recordings: &ProjectRecordingsMeta, recording_meta: &RecordingMeta, From 989eb3ef7dad892b982e08de2ab7a093af907aa3 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sun, 15 Feb 2026 01:09:56 +0000 Subject: [PATCH 02/27] fix: handle RGBA fallback in NV12 export path When the GPU NV12 converter fails (e.g. incompatible dimensions at runtime), finish_encoder_nv12 returns an Nv12RenderedFrame with format=GpuOutputFormat::Rgba. The nv12_to_ffmpeg_frame function now checks this format field and falls back to wrapping as RGBA when needed, preventing corrupted output in edge cases. Also added rgba_video_info for the fallback path to correctly describe RGBA pixel format to the ffmpeg frame wrapper. Co-authored-by: Richie McIlroy --- crates/export/src/mp4.rs | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/crates/export/src/mp4.rs b/crates/export/src/mp4.rs index 51de472956d..6d68b42d10c 100644 --- a/crates/export/src/mp4.rs +++ b/crates/export/src/mp4.rs @@ -101,6 +101,10 @@ impl Mp4ExportSettings { VideoInfo::from_raw(RawVideoFormat::Nv12, output_size.0, output_size.1, fps); video_info.time_base = ffmpeg::Rational::new(1, fps as i32); + let mut rgba_video_info = + VideoInfo::from_raw(RawVideoFormat::Rgba, output_size.0, output_size.1, fps); + rgba_video_info.time_base = ffmpeg::Rational::new(1, fps as i32); + let audio_segments = get_audio_segments(&base.segments); let mut audio_renderer = audio_segments @@ -248,7 +252,8 @@ impl Mp4ExportSettings { }) }); - let video_frame = nv12_to_ffmpeg_frame(&frame, frame_number as i64); + let video_frame = + nv12_to_ffmpeg_frame(&frame, frame_number as i64, &rgba_video_info); if frame_tx .send(MP4Input { @@ -573,7 +578,17 @@ struct FirstFrameNv12 { y_stride: u32, } -fn nv12_to_ffmpeg_frame(frame: &Nv12RenderedFrame, pts: i64) -> ffmpeg::frame::Video { +fn nv12_to_ffmpeg_frame( + frame: &Nv12RenderedFrame, + pts: i64, + video_info: &VideoInfo, +) -> ffmpeg::frame::Video { + use cap_rendering::GpuOutputFormat; + + if frame.format == GpuOutputFormat::Rgba { + return video_info.wrap_frame(&frame.data, pts, frame.y_stride as usize); + } + let width = frame.width; let height = frame.height; let y_stride = frame.y_stride; From c6b81be1c5607abe553df981cba121ba18a8890a Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sun, 15 Feb 2026 01:10:26 +0000 Subject: [PATCH 03/27] =?UTF-8?q?fix:=20CPU=20RGBA=E2=86=92NV12=20fallback?= =?UTF-8?q?=20when=20GPU=20conversion=20fails?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the GPU NV12 converter falls back to returning RGBA data (rare edge case), we now convert RGBA→NV12 using ffmpeg's swscale on CPU before sending to the encoder. This ensures the encoder always receives NV12 frames since it was configured with external_conversion (no internal converter). Co-authored-by: Richie McIlroy --- crates/export/src/mp4.rs | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/crates/export/src/mp4.rs b/crates/export/src/mp4.rs index 6d68b42d10c..9673cf17580 100644 --- a/crates/export/src/mp4.rs +++ b/crates/export/src/mp4.rs @@ -581,12 +581,33 @@ struct FirstFrameNv12 { fn nv12_to_ffmpeg_frame( frame: &Nv12RenderedFrame, pts: i64, - video_info: &VideoInfo, + rgba_video_info: &VideoInfo, ) -> ffmpeg::frame::Video { use cap_rendering::GpuOutputFormat; if frame.format == GpuOutputFormat::Rgba { - return video_info.wrap_frame(&frame.data, pts, frame.y_stride as usize); + tracing::warn!( + frame_number = frame.frame_number, + "NV12 conversion fell back to RGBA - converting on CPU" + ); + let rgba_frame = rgba_video_info.wrap_frame(&frame.data, pts, frame.y_stride as usize); + let mut converter = ffmpeg::software::scaling::Context::get( + ffmpeg::format::Pixel::RGBA, + frame.width, + frame.height, + ffmpeg::format::Pixel::NV12, + frame.width, + frame.height, + ffmpeg::software::scaling::flag::Flags::FAST_BILINEAR, + ) + .expect("failed to create RGBA→NV12 scaler"); + let mut nv12_frame = + ffmpeg::frame::Video::new(ffmpeg::format::Pixel::NV12, frame.width, frame.height); + converter + .run(&rgba_frame, &mut nv12_frame) + .expect("RGBA→NV12 conversion failed"); + nv12_frame.set_pts(Some(pts)); + return nv12_frame; } let width = frame.width; From aea1455a2b712458f4d1f5bdde6a468d9d49a011 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sun, 15 Feb 2026 01:12:06 +0000 Subject: [PATCH 04/27] fix: flush pending NV12 frame at end of export render loop The NV12 pipelined readback always has one frame in flight. Without flushing, the last exported frame would be lost. Added flush_pipeline_nv12() to FrameRenderer and call it at the end of render_video_to_channel_nv12() to ensure all frames are delivered. Co-authored-by: Richie McIlroy --- crates/rendering/src/lib.rs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/crates/rendering/src/lib.rs b/crates/rendering/src/lib.rs index d18a0b64129..f04015b247a 100644 --- a/crates/rendering/src/lib.rs +++ b/crates/rendering/src/lib.rs @@ -796,6 +796,15 @@ pub async fn render_video_to_channel_nv12( sender.send((frame, current_frame_number)).await?; } + if let Some(Ok(final_frame)) = frame_renderer.flush_pipeline_nv12().await + && final_frame.width > 0 + && final_frame.height > 0 + { + sender + .send((final_frame, frame_number.saturating_sub(1))) + .await?; + } + let total_time = start_time.elapsed(); tracing::info!( frames = frame_number, @@ -2151,6 +2160,14 @@ impl<'a> FrameRenderer<'a> { } } + pub async fn flush_pipeline_nv12( + &mut self, + ) -> Option> { + let nv12_converter = self.nv12_converter.as_mut()?; + let pending = nv12_converter.take_pending()?; + Some(pending.wait(&self.constants.device).await) + } + pub async fn render_nv12( &mut self, segment_frames: DecodedSegmentFrames, From 8e0417b0efbedd9ced80e877b1cd0ca6b55c24b6 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sun, 15 Feb 2026 01:17:29 +0000 Subject: [PATCH 05/27] improve: robust RGBA fallback + encoder timing instrumentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove panic-prone expect() calls in RGBA→NV12 CPU fallback path; gracefully handle scaler creation and conversion failures - Add encoder thread timing: logs encoded frame count, elapsed time, and effective encode FPS for both NV12 and RGBA paths - These metrics help identify whether the encoder is the bottleneck when profiling export performance Co-authored-by: Richie McIlroy --- crates/export/src/mp4.rs | 57 ++++++++++++++++++++++++++++++++-------- 1 file changed, 46 insertions(+), 11 deletions(-) diff --git a/crates/export/src/mp4.rs b/crates/export/src/mp4.rs index 9673cf17580..e6f87924cdb 100644 --- a/crates/export/src/mp4.rs +++ b/crates/export/src/mp4.rs @@ -139,6 +139,9 @@ impl Mp4ExportSettings { info!("Created MP4File encoder (NV12, external conversion, export settings)"); + let mut encoded_frames = 0u32; + let encode_start = std::time::Instant::now(); + while let Ok(frame) = frame_rx.recv() { encoder .queue_video_frame(frame.video, Duration::MAX) @@ -146,6 +149,18 @@ impl Mp4ExportSettings { if let Some(audio) = frame.audio { encoder.queue_audio_frame(audio); } + encoded_frames += 1; + } + + let encode_elapsed = encode_start.elapsed(); + if encoded_frames > 0 { + let encode_fps = encoded_frames as f64 / encode_elapsed.as_secs_f64().max(0.001); + info!( + encoded_frames = encoded_frames, + elapsed_secs = format!("{:.2}", encode_elapsed.as_secs_f64()), + encode_fps = format!("{:.1}", encode_fps), + "Encoder thread finished" + ); } let res = encoder @@ -172,7 +187,6 @@ impl Mp4ExportSettings { let sample_rate = u64::from(AudioRenderer::SAMPLE_RATE); let fps_u64 = u64::from(fps); let mut audio_sample_cursor = 0u64; - let mut consecutive_timeouts = 0u32; const MAX_CONSECUTIVE_TIMEOUTS: u32 = 3; @@ -373,6 +387,9 @@ impl Mp4ExportSettings { info!("Created MP4File encoder (RGBA fallback, export settings)"); + let mut encoded_frames = 0u32; + let encode_start = std::time::Instant::now(); + while let Ok(frame) = frame_rx.recv() { encoder .queue_video_frame(frame.video, Duration::MAX) @@ -380,6 +397,18 @@ impl Mp4ExportSettings { if let Some(audio) = frame.audio { encoder.queue_audio_frame(audio); } + encoded_frames += 1; + } + + let encode_elapsed = encode_start.elapsed(); + if encoded_frames > 0 { + let encode_fps = encoded_frames as f64 / encode_elapsed.as_secs_f64().max(0.001); + info!( + encoded_frames = encoded_frames, + elapsed_secs = format!("{:.2}", encode_elapsed.as_secs_f64()), + encode_fps = format!("{:.1}", encode_fps), + "Encoder thread finished (RGBA)" + ); } let res = encoder @@ -591,7 +620,8 @@ fn nv12_to_ffmpeg_frame( "NV12 conversion fell back to RGBA - converting on CPU" ); let rgba_frame = rgba_video_info.wrap_frame(&frame.data, pts, frame.y_stride as usize); - let mut converter = ffmpeg::software::scaling::Context::get( + + if let Ok(mut converter) = ffmpeg::software::scaling::Context::get( ffmpeg::format::Pixel::RGBA, frame.width, frame.height, @@ -599,15 +629,20 @@ fn nv12_to_ffmpeg_frame( frame.width, frame.height, ffmpeg::software::scaling::flag::Flags::FAST_BILINEAR, - ) - .expect("failed to create RGBA→NV12 scaler"); - let mut nv12_frame = - ffmpeg::frame::Video::new(ffmpeg::format::Pixel::NV12, frame.width, frame.height); - converter - .run(&rgba_frame, &mut nv12_frame) - .expect("RGBA→NV12 conversion failed"); - nv12_frame.set_pts(Some(pts)); - return nv12_frame; + ) { + let mut nv12_frame = + ffmpeg::frame::Video::new(ffmpeg::format::Pixel::NV12, frame.width, frame.height); + if converter.run(&rgba_frame, &mut nv12_frame).is_ok() { + nv12_frame.set_pts(Some(pts)); + return nv12_frame; + } + } + + tracing::error!( + frame_number = frame.frame_number, + "RGBA to NV12 CPU conversion failed, sending RGBA frame directly" + ); + return rgba_frame; } let width = frame.width; From fc4d7bcdea44698d7b17f5c7de9290becd1b96ff Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sun, 15 Feb 2026 01:25:27 +0000 Subject: [PATCH 06/27] improve: zero-alloc NV12 frame encoding via reusable frame buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Restructured the NV12 export path to eliminate per-frame ffmpeg frame allocation (~3.1MB per frame at 1080p): - Encoder thread now owns a single reusable frame::Video (NV12) that is filled from raw NV12 bytes each frame via fill_nv12_frame() - Render task sends raw Nv12ExportFrame (Vec + metadata) through the sync_channel instead of pre-built frame::Video objects - Added MP4File::queue_video_frame_reusable() that delegates to H264Encoder::queue_frame_reusable() for zero-allocation encoding - Added ensure_nv12_data() for CPU RGBA→NV12 fallback when GPU converter returns RGBA data (extremely rare edge case) This eliminates one heap allocation + zeroing per frame in the hot encoding path, reducing memory pressure and improving cache locality. Co-authored-by: Richie McIlroy --- crates/enc-ffmpeg/src/mux/mp4.rs | 14 +++ crates/export/src/mp4.rs | 190 +++++++++++++++++++------------ 2 files changed, 130 insertions(+), 74 deletions(-) diff --git a/crates/enc-ffmpeg/src/mux/mp4.rs b/crates/enc-ffmpeg/src/mux/mp4.rs index 023564b8e10..882c06d96ed 100644 --- a/crates/enc-ffmpeg/src/mux/mp4.rs +++ b/crates/enc-ffmpeg/src/mux/mp4.rs @@ -96,6 +96,20 @@ impl MP4File { self.video.queue_frame(frame, timestamp, &mut self.output) } + pub fn queue_video_frame_reusable( + &mut self, + frame: &mut frame::Video, + converted_frame: &mut Option, + timestamp: Duration, + ) -> Result<(), h264::QueueFrameError> { + if self.is_finished { + return Ok(()); + } + + self.video + .queue_frame_reusable(frame, converted_frame, timestamp, &mut self.output) + } + pub fn queue_audio_frame(&mut self, frame: frame::Audio) { if self.is_finished { return; diff --git a/crates/export/src/mp4.rs b/crates/export/src/mp4.rs index e6f87924cdb..b633a24ea43 100644 --- a/crates/export/src/mp4.rs +++ b/crates/export/src/mp4.rs @@ -95,16 +95,12 @@ impl Mp4ExportSettings { let (tx_image_data, mut video_rx) = tokio::sync::mpsc::channel::<(Nv12RenderedFrame, u32)>(16); - let (frame_tx, frame_rx) = std::sync::mpsc::sync_channel::(16); + let (frame_tx, frame_rx) = std::sync::mpsc::sync_channel::(16); let mut video_info = VideoInfo::from_raw(RawVideoFormat::Nv12, output_size.0, output_size.1, fps); video_info.time_base = ffmpeg::Rational::new(1, fps as i32); - let mut rgba_video_info = - VideoInfo::from_raw(RawVideoFormat::Rgba, output_size.0, output_size.1, fps); - rgba_video_info.time_base = ffmpeg::Rational::new(1, fps as i32); - let audio_segments = get_audio_segments(&base.segments); let mut audio_renderer = audio_segments @@ -139,14 +135,25 @@ impl Mp4ExportSettings { info!("Created MP4File encoder (NV12, external conversion, export settings)"); + let mut reusable_frame = ffmpeg::frame::Video::new( + ffmpeg::format::Pixel::NV12, + output_size.0, + output_size.1, + ); + let mut converted_frame: Option = None; let mut encoded_frames = 0u32; let encode_start = std::time::Instant::now(); - while let Ok(frame) = frame_rx.recv() { + while let Ok(input) = frame_rx.recv() { + fill_nv12_frame(&mut reusable_frame, &input); encoder - .queue_video_frame(frame.video, Duration::MAX) + .queue_video_frame_reusable( + &mut reusable_frame, + &mut converted_frame, + Duration::MAX, + ) .map_err(|err| err.to_string())?; - if let Some(audio) = frame.audio { + if let Some(audio) = input.audio { encoder.queue_audio_frame(audio); } encoded_frames += 1; @@ -266,13 +273,16 @@ impl Mp4ExportSettings { }) }); - let video_frame = - nv12_to_ffmpeg_frame(&frame, frame_number as i64, &rgba_video_info); + let nv12_data = ensure_nv12_data(frame); if frame_tx - .send(MP4Input { + .send(Nv12ExportFrame { audio: audio_frame, - video: video_frame, + nv12_data, + width: output_size.0, + height: output_size.1, + y_stride: output_size.0, + pts: frame_number as i64, }) .is_err() { @@ -607,99 +617,131 @@ struct FirstFrameNv12 { y_stride: u32, } -fn nv12_to_ffmpeg_frame( - frame: &Nv12RenderedFrame, +struct Nv12ExportFrame { + nv12_data: Vec, + width: u32, + height: u32, + y_stride: u32, pts: i64, - rgba_video_info: &VideoInfo, -) -> ffmpeg::frame::Video { - use cap_rendering::GpuOutputFormat; + audio: Option, +} - if frame.format == GpuOutputFormat::Rgba { - tracing::warn!( - frame_number = frame.frame_number, - "NV12 conversion fell back to RGBA - converting on CPU" - ); - let rgba_frame = rgba_video_info.wrap_frame(&frame.data, pts, frame.y_stride as usize); - - if let Ok(mut converter) = ffmpeg::software::scaling::Context::get( - ffmpeg::format::Pixel::RGBA, - frame.width, - frame.height, - ffmpeg::format::Pixel::NV12, - frame.width, - frame.height, - ffmpeg::software::scaling::flag::Flags::FAST_BILINEAR, - ) { - let mut nv12_frame = - ffmpeg::frame::Video::new(ffmpeg::format::Pixel::NV12, frame.width, frame.height); - if converter.run(&rgba_frame, &mut nv12_frame).is_ok() { - nv12_frame.set_pts(Some(pts)); - return nv12_frame; - } - } +fn ensure_nv12_data(frame: Nv12RenderedFrame) -> Vec { + use cap_rendering::GpuOutputFormat; - tracing::error!( - frame_number = frame.frame_number, - "RGBA to NV12 CPU conversion failed, sending RGBA frame directly" - ); - return rgba_frame; + if frame.format != GpuOutputFormat::Rgba { + return frame.data; } + tracing::warn!( + frame_number = frame.frame_number, + "GPU NV12 converter returned RGBA - converting to NV12 on CPU" + ); + let width = frame.width; let height = frame.height; - let y_stride = frame.y_stride; + let y_size = (width * height) as usize; + let uv_size = (width * height / 2) as usize; + let mut nv12 = vec![0u8; y_size + uv_size]; + + let mut rgba_buf = vec![0u8; (width * height * 4) as usize]; + let copy_len = frame.data.len().min(rgba_buf.len()); + rgba_buf[..copy_len].copy_from_slice(&frame.data[..copy_len]); + + for y in 0..height as usize { + for x in 0..width as usize { + let rgba_off = (y * width as usize + x) * 4; + let r = rgba_buf[rgba_off] as f32; + let g = rgba_buf[rgba_off + 1] as f32; + let b = rgba_buf[rgba_off + 2] as f32; + nv12[y * width as usize + x] = + (16.0 + 65.481 * r / 255.0 + 128.553 * g / 255.0 + 24.966 * b / 255.0) + .clamp(0.0, 255.0) as u8; + } + } - let mut video_frame = ffmpeg::frame::Video::new(ffmpeg::format::Pixel::NV12, width, height); - video_frame.set_pts(Some(pts)); + for y in (0..height as usize).step_by(2) { + for x in (0..width as usize).step_by(2) { + let mut r_sum = 0.0f32; + let mut g_sum = 0.0f32; + let mut b_sum = 0.0f32; + for dy in 0..2usize { + for dx in 0..2usize { + let py = (y + dy).min(height as usize - 1); + let px = (x + dx).min(width as usize - 1); + let off = (py * width as usize + px) * 4; + r_sum += rgba_buf[off] as f32; + g_sum += rgba_buf[off + 1] as f32; + b_sum += rgba_buf[off + 2] as f32; + } + } + let r = r_sum / (4.0 * 255.0); + let g = g_sum / (4.0 * 255.0); + let b = b_sum / (4.0 * 255.0); + let u = (128.0 - 37.797 * r - 74.203 * g + 112.0 * b).clamp(0.0, 255.0) as u8; + let v = (128.0 + 112.0 * r - 93.786 * g - 18.214 * b).clamp(0.0, 255.0) as u8; + let uv_row = y / 2; + let uv_col = x; + let uv_off = y_size + uv_row * width as usize + uv_col; + nv12[uv_off] = u; + nv12[uv_off + 1] = v; + } + } - let y_plane_size = (y_stride as usize) * (height as usize); - let y_src = &frame.data[..y_plane_size.min(frame.data.len())]; - let uv_src = if y_plane_size < frame.data.len() { - &frame.data[y_plane_size..] + nv12 +} + +fn fill_nv12_frame(frame: &mut ffmpeg::frame::Video, input: &Nv12ExportFrame) { + frame.set_pts(Some(input.pts)); + + let width = input.width as usize; + let height = input.height as usize; + let y_stride = input.y_stride as usize; + + let y_plane_size = y_stride * height; + let y_src = &input.nv12_data[..y_plane_size.min(input.nv12_data.len())]; + let uv_src = if y_plane_size < input.nv12_data.len() { + &input.nv12_data[y_plane_size..] } else { &[] }; - let dst_y_stride = video_frame.stride(0); - let dst_uv_stride = video_frame.stride(1); - - if dst_y_stride == y_stride as usize { - let copy_len = y_src.len().min(video_frame.data_mut(0).len()); - video_frame.data_mut(0)[..copy_len].copy_from_slice(&y_src[..copy_len]); + let dst_y_stride = frame.stride(0); + if dst_y_stride == y_stride { + let copy_len = y_src.len().min(frame.data_mut(0).len()); + frame.data_mut(0)[..copy_len].copy_from_slice(&y_src[..copy_len]); } else { - for row in 0..height as usize { - let src_start = row * y_stride as usize; + for row in 0..height { + let src_start = row * y_stride; let dst_start = row * dst_y_stride; - let copy_width = (width as usize).min(y_stride as usize).min(dst_y_stride); + let copy_width = width.min(y_stride).min(dst_y_stride); if src_start + copy_width <= y_src.len() - && dst_start + copy_width <= video_frame.data_mut(0).len() + && dst_start + copy_width <= frame.data_mut(0).len() { - video_frame.data_mut(0)[dst_start..dst_start + copy_width] + frame.data_mut(0)[dst_start..dst_start + copy_width] .copy_from_slice(&y_src[src_start..src_start + copy_width]); } } } - let uv_height = height as usize / 2; - let uv_width = width as usize; - if dst_uv_stride == uv_width { - let copy_len = uv_src.len().min(video_frame.data_mut(1).len()); - video_frame.data_mut(1)[..copy_len].copy_from_slice(&uv_src[..copy_len]); + let uv_height = height / 2; + let dst_uv_stride = frame.stride(1); + if dst_uv_stride == width { + let copy_len = uv_src.len().min(frame.data_mut(1).len()); + frame.data_mut(1)[..copy_len].copy_from_slice(&uv_src[..copy_len]); } else { for row in 0..uv_height { - let src_start = row * uv_width; + let src_start = row * width; let dst_start = row * dst_uv_stride; - let copy_width = uv_width.min(dst_uv_stride); + let copy_width = width.min(dst_uv_stride); if src_start + copy_width <= uv_src.len() - && dst_start + copy_width <= video_frame.data_mut(1).len() + && dst_start + copy_width <= frame.data_mut(1).len() { - video_frame.data_mut(1)[dst_start..dst_start + copy_width] + frame.data_mut(1)[dst_start..dst_start + copy_width] .copy_from_slice(&uv_src[src_start..src_start + copy_width]); } } } - - video_frame } fn save_screenshot_from_nv12( From 7732fcdeca01ef1b51946953b242ccda2c547ba1 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sun, 15 Feb 2026 01:27:10 +0000 Subject: [PATCH 07/27] =?UTF-8?q?improve:=20use=20SIMD-optimized=20swscale?= =?UTF-8?q?=20for=20RGBA=E2=86=92NV12=20CPU=20fallback?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace manual pixel-by-pixel RGBA→NV12 conversion with ffmpeg's swscale (SIMD-optimized) in the rare GPU fallback path. This makes the fallback path significantly faster if it ever triggers. Co-authored-by: Richie McIlroy --- crates/export/src/mp4.rs | 93 ++++++++++++++++++++++------------------ 1 file changed, 52 insertions(+), 41 deletions(-) diff --git a/crates/export/src/mp4.rs b/crates/export/src/mp4.rs index b633a24ea43..5b4017340e2 100644 --- a/crates/export/src/mp4.rs +++ b/crates/export/src/mp4.rs @@ -640,55 +640,66 @@ fn ensure_nv12_data(frame: Nv12RenderedFrame) -> Vec { let width = frame.width; let height = frame.height; - let y_size = (width * height) as usize; - let uv_size = (width * height / 2) as usize; - let mut nv12 = vec![0u8; y_size + uv_size]; - - let mut rgba_buf = vec![0u8; (width * height * 4) as usize]; - let copy_len = frame.data.len().min(rgba_buf.len()); - rgba_buf[..copy_len].copy_from_slice(&frame.data[..copy_len]); - - for y in 0..height as usize { - for x in 0..width as usize { - let rgba_off = (y * width as usize + x) * 4; - let r = rgba_buf[rgba_off] as f32; - let g = rgba_buf[rgba_off + 1] as f32; - let b = rgba_buf[rgba_off + 2] as f32; - nv12[y * width as usize + x] = - (16.0 + 65.481 * r / 255.0 + 128.553 * g / 255.0 + 24.966 * b / 255.0) - .clamp(0.0, 255.0) as u8; + + let mut rgba_frame = ffmpeg::frame::Video::new(ffmpeg::format::Pixel::RGBA, width, height); + let stride = rgba_frame.stride(0); + let src_stride = frame.y_stride as usize; + for row in 0..height as usize { + let src_start = row * src_stride; + let dst_start = row * stride; + let copy_width = (width as usize * 4).min(stride).min(src_stride); + if src_start + copy_width <= frame.data.len() + && dst_start + copy_width <= rgba_frame.data_mut(0).len() + { + rgba_frame.data_mut(0)[dst_start..dst_start + copy_width] + .copy_from_slice(&frame.data[src_start..src_start + copy_width]); } } - for y in (0..height as usize).step_by(2) { - for x in (0..width as usize).step_by(2) { - let mut r_sum = 0.0f32; - let mut g_sum = 0.0f32; - let mut b_sum = 0.0f32; - for dy in 0..2usize { - for dx in 0..2usize { - let py = (y + dy).min(height as usize - 1); - let px = (x + dx).min(width as usize - 1); - let off = (py * width as usize + px) * 4; - r_sum += rgba_buf[off] as f32; - g_sum += rgba_buf[off + 1] as f32; - b_sum += rgba_buf[off + 2] as f32; + if let Ok(mut converter) = ffmpeg::software::scaling::Context::get( + ffmpeg::format::Pixel::RGBA, + width, + height, + ffmpeg::format::Pixel::NV12, + width, + height, + ffmpeg::software::scaling::flag::Flags::FAST_BILINEAR, + ) { + let mut nv12_frame = ffmpeg::frame::Video::new(ffmpeg::format::Pixel::NV12, width, height); + if converter.run(&rgba_frame, &mut nv12_frame).is_ok() { + let y_size = nv12_frame.stride(0) * height as usize; + let uv_size = nv12_frame.stride(1) * (height as usize / 2); + let y_data = &nv12_frame.data(0)[..y_size]; + let uv_data = &nv12_frame.data(1)[..uv_size]; + let mut result = Vec::with_capacity(width as usize * height as usize * 3 / 2); + + if nv12_frame.stride(0) == width as usize { + result.extend_from_slice(y_data); + } else { + for row in 0..height as usize { + let start = row * nv12_frame.stride(0); + result.extend_from_slice(&y_data[start..start + width as usize]); } } - let r = r_sum / (4.0 * 255.0); - let g = g_sum / (4.0 * 255.0); - let b = b_sum / (4.0 * 255.0); - let u = (128.0 - 37.797 * r - 74.203 * g + 112.0 * b).clamp(0.0, 255.0) as u8; - let v = (128.0 + 112.0 * r - 93.786 * g - 18.214 * b).clamp(0.0, 255.0) as u8; - let uv_row = y / 2; - let uv_col = x; - let uv_off = y_size + uv_row * width as usize + uv_col; - nv12[uv_off] = u; - nv12[uv_off + 1] = v; + + if nv12_frame.stride(1) == width as usize { + result.extend_from_slice(uv_data); + } else { + for row in 0..(height as usize / 2) { + let start = row * nv12_frame.stride(1); + result.extend_from_slice(&uv_data[start..start + width as usize]); + } + } + + return result; } } - nv12 + tracing::error!( + frame_number = frame.frame_number, + "swscale RGBA to NV12 conversion failed, using zeroed NV12" + ); + vec![0u8; width as usize * height as usize * 3 / 2] } fn fill_nv12_frame(frame: &mut ffmpeg::frame::Video, input: &Nv12ExportFrame) { From 182d5ec4ee21b1f1dd3fe6dff741a0c4dcf5e516 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sun, 15 Feb 2026 01:30:11 +0000 Subject: [PATCH 08/27] fix: ensure screenshot always receives NV12 data Move ensure_nv12_data() before first_frame_data capture so the screenshot data is always in NV12 format. Previously, if the GPU NV12 converter fell back to RGBA, the screenshot would receive RGBA data but interpret it as NV12, producing a corrupted image. Co-authored-by: Richie McIlroy --- crates/export/src/mp4.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/crates/export/src/mp4.rs b/crates/export/src/mp4.rs index 5b4017340e2..8236737a7ef 100644 --- a/crates/export/src/mp4.rs +++ b/crates/export/src/mp4.rs @@ -246,12 +246,14 @@ impl Mp4ExportSettings { return Err("Export cancelled".to_string()); } + let nv12_data = ensure_nv12_data(frame); + if frame_count == 0 { first_frame_data = Some(FirstFrameNv12 { - data: frame.data.clone(), - width: frame.width, - height: frame.height, - y_stride: frame.y_stride, + data: nv12_data.clone(), + width: output_size.0, + height: output_size.1, + y_stride: output_size.0, }); if let Some(audio) = &mut audio_renderer { audio.set_playhead(0.0, &project); @@ -273,8 +275,6 @@ impl Mp4ExportSettings { }) }); - let nv12_data = ensure_nv12_data(frame); - if frame_tx .send(Nv12ExportFrame { audio: audio_frame, From 16b1705dd79cca3120f4b2b04ed15bcfaf6bc0bb Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sun, 15 Feb 2026 01:33:06 +0000 Subject: [PATCH 09/27] test: add unit tests for NV12 export path helpers - fill_nv12_frame_preserves_data_layout: verifies Y and UV plane data is correctly copied into ffmpeg frame with proper stride handling - ensure_nv12_data_passthrough_for_nv12_format: verifies NV12 data passes through without conversion when format is already NV12 - nv12_export_frame_dimensions_match: validates NV12 size calculations and confirms 62.5% data reduction vs RGBA at 1080p Co-authored-by: Richie McIlroy --- crates/export/src/mp4.rs | 92 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) diff --git a/crates/export/src/mp4.rs b/crates/export/src/mp4.rs index 8236737a7ef..dabdfdae326 100644 --- a/crates/export/src/mp4.rs +++ b/crates/export/src/mp4.rs @@ -820,4 +820,96 @@ mod tests { assert_eq!(sum_samples(sample_rate, fps, frames), expected); } } + + #[test] + fn fill_nv12_frame_preserves_data_layout() { + ffmpeg::init().unwrap(); + + let width = 8u32; + let height = 4u32; + let y_size = (width * height) as usize; + let uv_size = (width * height / 2) as usize; + + let mut nv12_data = vec![0u8; y_size + uv_size]; + for i in 0..y_size { + nv12_data[i] = (i % 256) as u8; + } + for i in 0..uv_size { + nv12_data[y_size + i] = (128 + i % 128) as u8; + } + + let input = Nv12ExportFrame { + nv12_data: nv12_data.clone(), + width, + height, + y_stride: width, + pts: 42, + audio: None, + }; + + let mut frame = ffmpeg::frame::Video::new(ffmpeg::format::Pixel::NV12, width, height); + fill_nv12_frame(&mut frame, &input); + + assert_eq!(frame.pts(), Some(42)); + + for row in 0..height as usize { + for col in 0..width as usize { + let src_val = nv12_data[row * width as usize + col]; + let dst_val = frame.data(0)[row * frame.stride(0) + col]; + assert_eq!(src_val, dst_val, "Y mismatch at ({col}, {row})"); + } + } + + for row in 0..(height / 2) as usize { + for col in 0..width as usize { + let src_val = nv12_data[y_size + row * width as usize + col]; + let dst_val = frame.data(1)[row * frame.stride(1) + col]; + assert_eq!(src_val, dst_val, "UV mismatch at ({col}, {row})"); + } + } + } + + #[test] + fn ensure_nv12_data_passthrough_for_nv12_format() { + use cap_rendering::{GpuOutputFormat, Nv12RenderedFrame}; + + let data = vec![1u8, 2, 3, 4, 5, 6]; + let frame = Nv12RenderedFrame { + data: data.clone(), + width: 4, + height: 2, + y_stride: 4, + frame_number: 0, + target_time_ns: 0, + format: GpuOutputFormat::Nv12, + }; + + let result = ensure_nv12_data(frame); + assert_eq!(result, data); + } + + #[test] + fn nv12_export_frame_dimensions_match() { + let width = 1920u32; + let height = 1080u32; + assert!( + width.is_multiple_of(4), + "1920 should be NV12-compatible (divisible by 4)" + ); + assert!( + height.is_multiple_of(2), + "1080 should be NV12-compatible (divisible by 2)" + ); + + let nv12_size = width as usize * height as usize * 3 / 2; + assert_eq!(nv12_size, 3_110_400); + let rgba_size = width as usize * height as usize * 4; + assert_eq!(rgba_size, 8_294_400); + + let savings_pct = (1.0 - nv12_size as f64 / rgba_size as f64) * 100.0; + assert!( + savings_pct > 62.0 && savings_pct < 63.0, + "NV12 should save ~62.5% vs RGBA, got {savings_pct:.1}%" + ); + } } From 05fef9fe47fa518aaab10f61d34fb2e55298c95c Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sun, 15 Feb 2026 01:43:27 +0000 Subject: [PATCH 10/27] refactor: extract shared decode_segment_frames_with_retry helper Deduplicate the frame decode-with-retry logic that was duplicated between render_video_to_channel (RGBA) and render_video_to_channel_nv12. Both functions now call the shared decode_segment_frames_with_retry() helper, reducing ~100 lines of duplicated decode/retry/backoff code to a single source of truth. Co-authored-by: Richie McIlroy --- crates/rendering/src/lib.rs | 170 +++++++++++++++--------------------- 1 file changed, 72 insertions(+), 98 deletions(-) diff --git a/crates/rendering/src/lib.rs b/crates/rendering/src/lib.rs index f04015b247a..67cb34ccadb 100644 --- a/crates/rendering/src/lib.rs +++ b/crates/rendering/src/lib.rs @@ -380,55 +380,17 @@ pub async fn render_video_to_channel( }; let render_segment = &render_segments[segment.recording_clip as usize]; - - let mut segment_frames = None; - let mut retry_count = 0; - const MAX_RETRIES: u32 = 5; let is_initial_frame = current_frame_number == 0 || last_successful_frame.is_none(); - while segment_frames.is_none() && retry_count < MAX_RETRIES { - if retry_count > 0 { - let delay = if is_initial_frame { - 500 * (retry_count as u64 + 1) - } else { - 50 * retry_count as u64 - }; - tokio::time::sleep(std::time::Duration::from_millis(delay)).await; - } - - segment_frames = if is_initial_frame { - render_segment - .decoders - .get_frames_initial( - segment_time as f32, - !project.camera.hide, - clip_config.map(|v| v.offsets).unwrap_or_default(), - ) - .await - } else { - render_segment - .decoders - .get_frames( - segment_time as f32, - !project.camera.hide, - clip_config.map(|v| v.offsets).unwrap_or_default(), - ) - .await - }; - - if segment_frames.is_none() { - retry_count += 1; - if retry_count < MAX_RETRIES { - tracing::warn!( - frame_number = current_frame_number, - segment_time = segment_time, - retry_count = retry_count, - is_initial = is_initial_frame, - "Frame decode failed, retrying..." - ); - } - } - } + let segment_frames = decode_segment_frames_with_retry( + &render_segment.decoders, + segment_time, + !project.camera.hide, + clip_config.map(|v| v.offsets).unwrap_or_default(), + current_frame_number, + is_initial_frame, + ) + .await; let frame = if let Some(segment_frames) = segment_frames { consecutive_failures = 0; @@ -512,7 +474,7 @@ pub async fn render_video_to_channel( frame_number = current_frame_number, segment_time = segment_time, consecutive_failures = consecutive_failures, - max_retries = MAX_RETRIES, + max_retries = DECODE_MAX_RETRIES, "Frame decode failed after retries - using previous frame" ); let mut fallback = last_frame.clone(); @@ -524,7 +486,7 @@ pub async fn render_video_to_channel( tracing::error!( frame_number = current_frame_number, segment_time = segment_time, - max_retries = MAX_RETRIES, + max_retries = DECODE_MAX_RETRIES, "First frame decode failed after retries - cannot continue" ); continue; @@ -642,55 +604,17 @@ pub async fn render_video_to_channel_nv12( }; let render_segment = &render_segments[segment.recording_clip as usize]; - - let mut segment_frames = None; - let mut retry_count = 0; - const MAX_RETRIES: u32 = 5; let is_initial_frame = current_frame_number == 0 || last_successful_frame.is_none(); - while segment_frames.is_none() && retry_count < MAX_RETRIES { - if retry_count > 0 { - let delay = if is_initial_frame { - 500 * (retry_count as u64 + 1) - } else { - 50 * retry_count as u64 - }; - tokio::time::sleep(std::time::Duration::from_millis(delay)).await; - } - - segment_frames = if is_initial_frame { - render_segment - .decoders - .get_frames_initial( - segment_time as f32, - !project.camera.hide, - clip_config.map(|v| v.offsets).unwrap_or_default(), - ) - .await - } else { - render_segment - .decoders - .get_frames( - segment_time as f32, - !project.camera.hide, - clip_config.map(|v| v.offsets).unwrap_or_default(), - ) - .await - }; - - if segment_frames.is_none() { - retry_count += 1; - if retry_count < MAX_RETRIES { - tracing::warn!( - frame_number = current_frame_number, - segment_time = segment_time, - retry_count = retry_count, - is_initial = is_initial_frame, - "Frame decode failed, retrying..." - ); - } - } - } + let segment_frames = decode_segment_frames_with_retry( + &render_segment.decoders, + segment_time, + !project.camera.hide, + clip_config.map(|v| v.offsets).unwrap_or_default(), + current_frame_number, + is_initial_frame, + ) + .await; let frame = if let Some(segment_frames) = segment_frames { consecutive_failures = 0; @@ -774,7 +698,7 @@ pub async fn render_video_to_channel_nv12( frame_number = current_frame_number, segment_time = segment_time, consecutive_failures = consecutive_failures, - max_retries = MAX_RETRIES, + max_retries = DECODE_MAX_RETRIES, "Frame decode failed after retries - using previous NV12 frame" ); let mut fallback = last_frame.clone_metadata_with_data(); @@ -786,7 +710,7 @@ pub async fn render_video_to_channel_nv12( tracing::error!( frame_number = current_frame_number, segment_time = segment_time, - max_retries = MAX_RETRIES, + max_retries = DECODE_MAX_RETRIES, "First frame decode failed after retries - cannot continue" ); continue; @@ -815,6 +739,56 @@ pub async fn render_video_to_channel_nv12( Ok(()) } +const DECODE_MAX_RETRIES: u32 = 5; + +async fn decode_segment_frames_with_retry( + decoders: &RecordingSegmentDecoders, + segment_time: f64, + needs_camera: bool, + offsets: cap_project::ClipOffsets, + current_frame_number: u32, + is_initial_frame: bool, +) -> Option { + let mut result = None; + let mut retry_count = 0u32; + + while result.is_none() && retry_count < DECODE_MAX_RETRIES { + if retry_count > 0 { + let delay = if is_initial_frame { + 500 * (retry_count as u64 + 1) + } else { + 50 * retry_count as u64 + }; + tokio::time::sleep(std::time::Duration::from_millis(delay)).await; + } + + result = if is_initial_frame { + decoders + .get_frames_initial(segment_time as f32, needs_camera, offsets) + .await + } else { + decoders + .get_frames(segment_time as f32, needs_camera, offsets) + .await + }; + + if result.is_none() { + retry_count += 1; + if retry_count < DECODE_MAX_RETRIES { + tracing::warn!( + frame_number = current_frame_number, + segment_time = segment_time, + retry_count = retry_count, + is_initial = is_initial_frame, + "Frame decode failed, retrying..." + ); + } + } + } + + result +} + pub fn get_duration( recordings: &ProjectRecordingsMeta, recording_meta: &RecordingMeta, From 7bef34ce2f5113ef475e3582d9268d7a7c37cac1 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Sun, 15 Feb 2026 11:26:10 +0000 Subject: [PATCH 11/27] clippy --- crates/cap-test/src/config/types.rs | 11 +---- crates/cap-test/src/matrix/runner.rs | 58 ++++++++++++------------- crates/cap-test/src/suites/scenarios.rs | 17 ++++---- crates/cap-test/src/suites/validate.rs | 16 +++---- 4 files changed, 47 insertions(+), 55 deletions(-) diff --git a/crates/cap-test/src/config/types.rs b/crates/cap-test/src/config/types.rs index 41bf81d100f..2708ba4037b 100644 --- a/crates/cap-test/src/config/types.rs +++ b/crates/cap-test/src/config/types.rs @@ -136,21 +136,12 @@ impl Default for CameraConfig { } } -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, Default, Serialize, Deserialize)] pub struct AudioConfig { pub microphones: MicrophoneConfig, pub system: SystemAudioConfig, } -impl Default for AudioConfig { - fn default() -> Self { - Self { - microphones: MicrophoneConfig::default(), - system: SystemAudioConfig::default(), - } - } -} - #[derive(Debug, Clone, Serialize, Deserialize)] pub struct MicrophoneConfig { #[serde(default = "default_sample_rates")] diff --git a/crates/cap-test/src/matrix/runner.rs b/crates/cap-test/src/matrix/runner.rs index 83152fb778e..b90ab5f3858 100644 --- a/crates/cap-test/src/matrix/runner.rs +++ b/crates/cap-test/src/matrix/runner.rs @@ -362,39 +362,39 @@ impl CompatMatrixRunner { let mut blocking_failures = Vec::new(); for result in &matrix_results.results { - if let Some(classification) = classify_test_failure(result) { - if classification.is_blocking() { - blocking_failures.push(BlockingFailure { - test_id: result.test_id.clone(), - test_name: result.name.clone(), - classification, - reason: result - .failure_reason - .clone() - .unwrap_or_else(|| "Unknown failure".to_string()), - reproduction_steps: build_reproduction_steps(result), - }); - } + if let Some(classification) = classify_test_failure(result) + && classification.is_blocking() + { + blocking_failures.push(BlockingFailure { + test_id: result.test_id.clone(), + test_name: result.name.clone(), + classification, + reason: result + .failure_reason + .clone() + .unwrap_or_else(|| "Unknown failure".to_string()), + reproduction_steps: build_reproduction_steps(result), + }); } } for scenario in &scenario_results { - if let Some(classification) = &scenario.failure_classification { - if classification.is_blocking() { - blocking_failures.push(BlockingFailure { - test_id: scenario.scenario_id.clone(), - test_name: scenario.scenario_name.clone(), - classification: *classification, - reason: scenario - .failure_reason - .clone() - .unwrap_or_else(|| "Unknown failure".to_string()), - reproduction_steps: vec![format!( - "Run: cap-test compat-matrix --interactive (scenario: {})", - scenario.scenario_name - )], - }); - } + if let Some(classification) = &scenario.failure_classification + && classification.is_blocking() + { + blocking_failures.push(BlockingFailure { + test_id: scenario.scenario_id.clone(), + test_name: scenario.scenario_name.clone(), + classification: *classification, + reason: scenario + .failure_reason + .clone() + .unwrap_or_else(|| "Unknown failure".to_string()), + reproduction_steps: vec![format!( + "Run: cap-test compat-matrix --interactive (scenario: {})", + scenario.scenario_name + )], + }); } } diff --git a/crates/cap-test/src/suites/scenarios.rs b/crates/cap-test/src/suites/scenarios.rs index 65f3db109c0..940e1609355 100644 --- a/crates/cap-test/src/suites/scenarios.rs +++ b/crates/cap-test/src/suites/scenarios.rs @@ -132,6 +132,7 @@ impl ScenarioRunner { } } + #[allow(clippy::too_many_arguments)] async fn run_recording_scenario( &self, description: &str, @@ -492,14 +493,14 @@ fn classify_scenario_result( ); } - if let Some(sync) = &validation.sync_info { - if !sync.in_sync { - return ( - TestStatus::Fail, - Some(format!("A/V drift too high: {:.1}ms", sync.drift_ms)), - Some(FailureClassification::PerformanceBelowThreshold), - ); - } + if let Some(sync) = &validation.sync_info + && !sync.in_sync + { + return ( + TestStatus::Fail, + Some(format!("A/V drift too high: {:.1}ms", sync.drift_ms)), + Some(FailureClassification::PerformanceBelowThreshold), + ); } (TestStatus::Pass, None, None) diff --git a/crates/cap-test/src/suites/validate.rs b/crates/cap-test/src/suites/validate.rs index 6eaddc8ec74..c07b0c74144 100644 --- a/crates/cap-test/src/suites/validate.rs +++ b/crates/cap-test/src/suites/validate.rs @@ -115,14 +115,14 @@ fn validate_dash_display_dirs( } let m3u8_path = dir.join("media_0.m3u8"); - if m3u8_path.exists() { - if let Ok(contents) = std::fs::read_to_string(&m3u8_path) { - for line in contents.lines() { - if let Some(duration_str) = line.strip_prefix("#EXTINF:") { - let dur_str = duration_str.split(',').next().unwrap_or(""); - if let Ok(dur) = dur_str.parse::() { - total_duration += dur; - } + if m3u8_path.exists() + && let Ok(contents) = std::fs::read_to_string(&m3u8_path) + { + for line in contents.lines() { + if let Some(duration_str) = line.strip_prefix("#EXTINF:") { + let dur_str = duration_str.split(',').next().unwrap_or(""); + if let Ok(dur) = dur_str.parse::() { + total_duration += dur; } } } From cf4c1709d07577f7c9aa68f74e16e4591f5b863b Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Sun, 15 Feb 2026 20:43:44 +0000 Subject: [PATCH 12/27] perf(video-decode): enable slice threading and increase decode thread count --- crates/video-decode/src/ffmpeg.rs | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/crates/video-decode/src/ffmpeg.rs b/crates/video-decode/src/ffmpeg.rs index 53d5f9d4836..b78700796c9 100644 --- a/crates/video-decode/src/ffmpeg.rs +++ b/crates/video-decode/src/ffmpeg.rs @@ -144,30 +144,34 @@ pub fn get_hw_decoder_capabilities() -> &'static HwDecoderCapabilities { fn configure_software_threading(decoder: &mut avcodec::decoder::Video, width: u32, height: u32) { let pixel_count = (width as u64) * (height as u64); + let cpu_count = num_cpus::get(); let thread_count = if pixel_count > 8294400 { 0 } else if pixel_count > 2073600 { - (num_cpus::get() / 2).max(2) as i32 + cpu_count.min(8).max(2) as i32 } else { - 2 + cpu_count.min(6).max(2) as i32 }; + let thread_type = ffmpeg::sys::FF_THREAD_FRAME | ffmpeg::sys::FF_THREAD_SLICE; + unsafe { let codec_ctx = decoder.as_mut_ptr(); if !codec_ctx.is_null() { (*codec_ctx).thread_count = thread_count; - (*codec_ctx).thread_type = ffmpeg::sys::FF_THREAD_FRAME; + (*codec_ctx).thread_type = thread_type; } } info!( - "Software decode configured: {width}x{height}, thread_count={}, thread_type=frame", + "Software decode configured: {width}x{height}, thread_count={}, thread_type=frame+slice, cpus={}", if thread_count == 0 { "auto".to_string() } else { thread_count.to_string() - } + }, + cpu_count ); } From 7aa4f7e5f39b0845366bdabab80847993c1efab1 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Sun, 15 Feb 2026 20:43:55 +0000 Subject: [PATCH 13/27] perf(export): remove unnecessary 200ms sleep before export --- apps/desktop/src-tauri/src/export.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/apps/desktop/src-tauri/src/export.rs b/apps/desktop/src-tauri/src/export.rs index cf76f610835..9460dc4183d 100644 --- a/apps/desktop/src-tauri/src/export.rs +++ b/apps/desktop/src-tauri/src/export.rs @@ -111,7 +111,6 @@ pub async fn export_video( let _guard = if let Some(ref ed) = *editor { ed.export_active.store(true, Ordering::Release); tracing::info!("Pausing editor preview during export"); - tokio::time::sleep(std::time::Duration::from_millis(200)).await; Some(ExportActiveGuard(&ed.export_active)) } else { None From a7e73c8da30d6f0dc2ac78048d0116a327c23333 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Sun, 15 Feb 2026 20:44:00 +0000 Subject: [PATCH 14/27] style(desktop): collapse nested if into let-chain --- apps/desktop/src-tauri/src/frame_ws.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/apps/desktop/src-tauri/src/frame_ws.rs b/apps/desktop/src-tauri/src/frame_ws.rs index 3d959e4a926..0bc07343916 100644 --- a/apps/desktop/src-tauri/src/frame_ws.rs +++ b/apps/desktop/src-tauri/src/frame_ws.rs @@ -138,11 +138,11 @@ pub async fn create_watch_frame_ws( let borrowed = camera_rx.borrow(); borrowed.as_deref().map(pack_ws_frame_ref) }; - if let Some(packed) = packed { - if let Err(e) = socket.send(Message::Binary(packed)).await { - tracing::error!("Failed to send initial frame to socket: {:?}", e); - return; - } + if let Some(packed) = packed + && let Err(e) = socket.send(Message::Binary(packed)).await + { + tracing::error!("Failed to send initial frame to socket: {:?}", e); + return; } } From f89c7803afd822390cc7e9238962ede37c090500 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Sun, 15 Feb 2026 20:44:07 +0000 Subject: [PATCH 15/27] refactor(rendering): wrap frame data in Arc> --- apps/desktop/src-tauri/src/camera_legacy.rs | 2 +- apps/desktop/src-tauri/src/frame_ws.rs | 4 ++-- crates/rendering/src/frame_pipeline.rs | 12 ++++++++---- crates/rendering/src/main.rs | 2 +- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/apps/desktop/src-tauri/src/camera_legacy.rs b/apps/desktop/src-tauri/src/camera_legacy.rs index 012582207eb..096635a56ea 100644 --- a/apps/desktop/src-tauri/src/camera_legacy.rs +++ b/apps/desktop/src-tauri/src/camera_legacy.rs @@ -60,7 +60,7 @@ pub async fn create_camera_preview_ws() -> (Sender, u16, Cance frame_tx_clone .send(WSFrame { - data: frame.data(0).to_vec(), + data: std::sync::Arc::new(frame.data(0).to_vec()), width: frame.width(), height: frame.height(), stride: frame.stride(0) as u32, diff --git a/apps/desktop/src-tauri/src/frame_ws.rs b/apps/desktop/src-tauri/src/frame_ws.rs index 0bc07343916..10bbac06e36 100644 --- a/apps/desktop/src-tauri/src/frame_ws.rs +++ b/apps/desktop/src-tauri/src/frame_ws.rs @@ -73,7 +73,7 @@ pub enum WSFrameFormat { #[derive(Clone)] pub struct WSFrame { - pub data: Vec, + pub data: std::sync::Arc>, pub width: u32, pub height: u32, pub stride: u32, @@ -278,7 +278,7 @@ pub async fn create_frame_ws(frame_tx: broadcast::Sender) -> (u16, Canc match incoming_frame { Ok(frame) => { let packed = pack_frame_data( - frame.data, + std::sync::Arc::try_unwrap(frame.data).unwrap_or_else(|arc| (*arc).clone()), frame.stride, frame.height, frame.width, diff --git a/crates/rendering/src/frame_pipeline.rs b/crates/rendering/src/frame_pipeline.rs index b1348e93956..d76aab9d9c6 100644 --- a/crates/rendering/src/frame_pipeline.rs +++ b/crates/rendering/src/frame_pipeline.rs @@ -300,7 +300,7 @@ impl PendingNv12Readback { let buffer_slice = self.buffer.slice(..); let data = buffer_slice.get_mapped_range(); - let nv12_data = data.to_vec(); + let nv12_data = Arc::new(data.to_vec()); drop(data); self.buffer.unmap(); @@ -327,7 +327,7 @@ pub enum GpuOutputFormat { } pub struct Nv12RenderedFrame { - pub data: Vec, + pub data: Arc>, pub width: u32, pub height: u32, pub y_stride: u32, @@ -349,6 +349,10 @@ impl Nv12RenderedFrame { } } + pub fn into_data(self) -> Vec { + Arc::try_unwrap(self.data).unwrap_or_else(|arc| (*arc).clone()) + } + pub fn y_plane(&self) -> &[u8] { let y_size = (self.y_stride as usize) * (self.height as usize); &self.data[..y_size.min(self.data.len())] @@ -434,7 +438,7 @@ impl PendingReadback { (self.frame_number as u64 * 1_000_000_000) / self.frame_rate.max(1) as u64; Ok(RenderedFrame { - data: data_vec, + data: Arc::new(data_vec), padded_bytes_per_row: self.padded_bytes_per_row, width: self.width, height: self.height, @@ -751,7 +755,7 @@ impl RenderSession { #[derive(Clone)] pub struct RenderedFrame { - pub data: Vec, + pub data: Arc>, pub width: u32, pub height: u32, pub padded_bytes_per_row: u32, diff --git a/crates/rendering/src/main.rs b/crates/rendering/src/main.rs index 3fa4d028f65..efbbc39ddf7 100644 --- a/crates/rendering/src/main.rs +++ b/crates/rendering/src/main.rs @@ -250,6 +250,6 @@ fn save_as_jpeg(frame: &RenderedFrame, output_path: &PathBuf) -> Result<()> { fn save_as_raw(frame: &RenderedFrame, output_path: &PathBuf) -> Result<()> { // Save raw RGBA data - std::fs::write(output_path, &frame.data).context("Failed to save raw frame data")?; + std::fs::write(output_path, &*frame.data).context("Failed to save raw frame data")?; Ok(()) } From a4cac0aaacae7d56ba3e3e04ff9257ac62d501d1 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Sun, 15 Feb 2026 20:44:16 +0000 Subject: [PATCH 16/27] perf(rendering): cache NV12 converter bind groups across frames --- crates/rendering/src/frame_pipeline.rs | 69 ++++++++++++++++++-------- 1 file changed, 48 insertions(+), 21 deletions(-) diff --git a/crates/rendering/src/frame_pipeline.rs b/crates/rendering/src/frame_pipeline.rs index d76aab9d9c6..d34b5d2470c 100644 --- a/crates/rendering/src/frame_pipeline.rs +++ b/crates/rendering/src/frame_pipeline.rs @@ -17,6 +17,9 @@ pub struct RgbaToNv12Converter { pending: Option, cached_width: u32, cached_height: u32, + cached_bind_groups: Option<[wgpu::BindGroup; 2]>, + cached_texture_view: Option, + cached_texture_ptr: usize, } #[repr(C)] @@ -105,6 +108,9 @@ impl RgbaToNv12Converter { pending: None, cached_width: 0, cached_height: 0, + cached_bind_groups: None, + cached_texture_view: None, + cached_texture_ptr: 0, } } @@ -142,6 +148,9 @@ impl RgbaToNv12Converter { self.current_readback = 0; self.cached_width = width; self.cached_height = height; + self.cached_bind_groups = None; + self.cached_texture_view = None; + self.cached_texture_ptr = 0; } #[allow(clippy::too_many_arguments)] @@ -166,7 +175,8 @@ impl RgbaToNv12Converter { return false; }; - let readback_buffer = match self.readback_buffers[self.current_readback].as_ref() { + let readback_idx = self.current_readback; + let readback_buffer = match self.readback_buffers[readback_idx].as_ref() { Some(b) => b.clone(), None => return false, }; @@ -183,26 +193,43 @@ impl RgbaToNv12Converter { }; queue.write_buffer(&self.params_buffer, 0, bytemuck::cast_slice(&[params])); - let source_view = source_texture.create_view(&Default::default()); + let texture_ptr = source_texture as *const wgpu::Texture as usize; + let needs_rebind = + self.cached_texture_ptr != texture_ptr || self.cached_bind_groups.is_none(); + + if needs_rebind { + let source_view = source_texture.create_view(&Default::default()); + + let make_bind_group = |view: &wgpu::TextureView| { + device.create_bind_group(&wgpu::BindGroupDescriptor { + label: Some("RGBA to NV12 Bind Group"), + layout: &self.bind_group_layout, + entries: &[ + wgpu::BindGroupEntry { + binding: 0, + resource: wgpu::BindingResource::TextureView(view), + }, + wgpu::BindGroupEntry { + binding: 1, + resource: nv12_buffer.as_entire_binding(), + }, + wgpu::BindGroupEntry { + binding: 2, + resource: self.params_buffer.as_entire_binding(), + }, + ], + }) + }; - let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { - label: Some("RGBA to NV12 Bind Group"), - layout: &self.bind_group_layout, - entries: &[ - wgpu::BindGroupEntry { - binding: 0, - resource: wgpu::BindingResource::TextureView(&source_view), - }, - wgpu::BindGroupEntry { - binding: 1, - resource: nv12_buffer.as_entire_binding(), - }, - wgpu::BindGroupEntry { - binding: 2, - resource: self.params_buffer.as_entire_binding(), - }, - ], - }); + let bg0 = make_bind_group(&source_view); + let bg1 = make_bind_group(&source_view); + + self.cached_texture_view = Some(source_view); + self.cached_bind_groups = Some([bg0, bg1]); + self.cached_texture_ptr = texture_ptr; + } + + let bind_groups = self.cached_bind_groups.as_ref().unwrap(); { let mut pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { @@ -210,7 +237,7 @@ impl RgbaToNv12Converter { ..Default::default() }); pass.set_pipeline(&self.pipeline); - pass.set_bind_group(0, &bind_group, &[]); + pass.set_bind_group(0, &bind_groups[readback_idx], &[]); pass.dispatch_workgroups(width.div_ceil(4 * 8), height.div_ceil(2 * 8), 1); } From 937e30b0dcf44fd81667c8e506ecdca3bce229b0 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Sun, 15 Feb 2026 20:44:20 +0000 Subject: [PATCH 17/27] perf(rendering): pipeline render with decode prefetch and latency hiding --- crates/rendering/src/frame_pipeline.rs | 42 ++-- crates/rendering/src/lib.rs | 291 +++++++++++++++++++------ 2 files changed, 236 insertions(+), 97 deletions(-) diff --git a/crates/rendering/src/frame_pipeline.rs b/crates/rendering/src/frame_pipeline.rs index d34b5d2470c..110d1967996 100644 --- a/crates/rendering/src/frame_pipeline.rs +++ b/crates/rendering/src/frame_pipeline.rs @@ -819,7 +819,7 @@ pub async fn finish_encoder( queue: &wgpu::Queue, uniforms: &ProjectUniforms, encoder: wgpu::CommandEncoder, -) -> Result { +) -> Result, RenderingError> { let previous_frame = if let Some(prev) = session.pipelined_readback.take_pending() { Some(prev.wait(device).await?) } else { @@ -838,16 +838,7 @@ pub async fn finish_encoder( .pipelined_readback .submit_readback(device, queue, texture, uniforms, encoder)?; - if let Some(prev_frame) = previous_frame { - return Ok(prev_frame); - } - - let pending = session - .pipelined_readback - .take_pending() - .expect("just submitted a readback"); - - pending.wait(device).await + Ok(previous_frame) } pub async fn finish_encoder_nv12( @@ -857,7 +848,7 @@ pub async fn finish_encoder_nv12( queue: &wgpu::Queue, uniforms: &ProjectUniforms, mut encoder: wgpu::CommandEncoder, -) -> Result { +) -> Result, RenderingError> { let width = uniforms.output_size.0; let height = uniforms.output_size.1; @@ -888,28 +879,21 @@ pub async fn finish_encoder_nv12( queue.submit(std::iter::once(encoder.finish())); nv12_converter.start_readback(); - if let Some(prev_frame) = previous_frame { - return Ok(prev_frame); - } - - let pending = nv12_converter - .take_pending() - .expect("just submitted a conversion"); - pending.wait(device).await + Ok(previous_frame) } else if let Some(prev_frame) = previous_frame { queue.submit(std::iter::once(encoder.finish())); - Ok(prev_frame) + Ok(Some(prev_frame)) } else { let rgba_frame = finish_encoder(session, device, queue, uniforms, encoder).await?; - Ok(Nv12RenderedFrame { - data: rgba_frame.data, - width: rgba_frame.width, - height: rgba_frame.height, - y_stride: rgba_frame.padded_bytes_per_row, - frame_number: rgba_frame.frame_number, - target_time_ns: rgba_frame.target_time_ns, + Ok(rgba_frame.map(|f| Nv12RenderedFrame { + data: f.data, + width: f.width, + height: f.height, + y_stride: f.padded_bytes_per_row, + frame_number: f.frame_number, + target_time_ns: f.target_time_ns, format: GpuOutputFormat::Rgba, - }) + })) } } diff --git a/crates/rendering/src/lib.rs b/crates/rendering/src/lib.rs index 67cb34ccadb..00390ffde2a 100644 --- a/crates/rendering/src/lib.rs +++ b/crates/rendering/src/lib.rs @@ -354,10 +354,13 @@ pub async fn render_video_to_channel( ); } + let needs_camera = !project.camera.hide; let mut last_successful_frame: Option = None; let mut consecutive_failures = 0u32; const MAX_CONSECUTIVE_FAILURES: u32 = 200; + let mut prefetched_decode: Option<(u32, f64, usize, Option)> = None; + loop { if frame_number >= total_frames { break; @@ -381,21 +384,39 @@ pub async fn render_video_to_channel( let render_segment = &render_segments[segment.recording_clip as usize]; let is_initial_frame = current_frame_number == 0 || last_successful_frame.is_none(); + let segment_clip_index = segment.recording_clip as usize; - let segment_frames = decode_segment_frames_with_retry( - &render_segment.decoders, - segment_time, - !project.camera.hide, - clip_config.map(|v| v.offsets).unwrap_or_default(), - current_frame_number, - is_initial_frame, - ) - .await; + let segment_frames = + if let Some((pf_num, _pf_time, pf_clip, pf_result)) = prefetched_decode.take() { + if pf_num == current_frame_number && pf_clip == segment_clip_index { + pf_result + } else { + decode_segment_frames_with_retry( + &render_segment.decoders, + segment_time, + needs_camera, + clip_config.map(|v| v.offsets).unwrap_or_default(), + current_frame_number, + is_initial_frame, + ) + .await + } + } else { + decode_segment_frames_with_retry( + &render_segment.decoders, + segment_time, + needs_camera, + clip_config.map(|v| v.offsets).unwrap_or_default(), + current_frame_number, + is_initial_frame, + ) + .await + }; - let frame = if let Some(segment_frames) = segment_frames { + if let Some(segment_frames) = segment_frames { consecutive_failures = 0; - let zoom_focus_interp = &zoom_focus_interpolators[segment.recording_clip as usize]; + let zoom_focus_interp = &zoom_focus_interpolators[segment_clip_index]; let uniforms = ProjectUniforms::new( constants, @@ -409,20 +430,70 @@ pub async fn render_video_to_channel( zoom_focus_interp, ); - match frame_renderer - .render( - segment_frames, - uniforms, - &render_segment.cursor, - &mut layers, - ) - .await - { - Ok(frame) if frame.width > 0 && frame.height > 0 => { + let next_frame_number = frame_number; + let mut next_prefetch_meta: Option<(f64, usize)> = None; + let prefetch_future = if next_frame_number < total_frames { + if let Some((next_seg_time, next_segment)) = + project.get_segment_time(next_frame_number as f64 / fps as f64) + { + let next_clip_index = next_segment.recording_clip as usize; + next_prefetch_meta = Some((next_seg_time, next_clip_index)); + let next_render_segment = &render_segments[next_clip_index]; + let next_clip_config = project + .clips + .iter() + .find(|v| v.index == next_segment.recording_clip); + let next_is_initial = last_successful_frame.is_none(); + + Some(decode_segment_frames_with_retry( + &next_render_segment.decoders, + next_seg_time, + needs_camera, + next_clip_config.map(|v| v.offsets).unwrap_or_default(), + next_frame_number, + next_is_initial, + )) + } else { + None + } + } else { + None + }; + + let render_result = if let Some(prefetch) = prefetch_future { + let (render, decoded) = tokio::join!( + frame_renderer.render( + segment_frames, + uniforms, + &render_segment.cursor, + &mut layers, + ), + prefetch + ); + + if let Some((next_seg_time, next_clip_index)) = next_prefetch_meta { + prefetched_decode = + Some((next_frame_number, next_seg_time, next_clip_index, decoded)); + } + + render + } else { + frame_renderer + .render( + segment_frames, + uniforms, + &render_segment.cursor, + &mut layers, + ) + .await + }; + + match render_result { + Ok(Some(frame)) if frame.width > 0 && frame.height > 0 => { last_successful_frame = Some(frame.clone()); - frame + sender.send((frame, current_frame_number)).await?; } - Ok(_) => { + Ok(Some(_)) => { tracing::warn!( frame_number = current_frame_number, "Rendered frame has zero dimensions" @@ -432,11 +503,10 @@ pub async fn render_video_to_channel( fallback.frame_number = current_frame_number; fallback.target_time_ns = (current_frame_number as u64 * 1_000_000_000) / fps as u64; - fallback - } else { - continue; + sender.send((fallback, current_frame_number)).await?; } } + Ok(None) => {} Err(e) => { tracing::error!( frame_number = current_frame_number, @@ -448,7 +518,7 @@ pub async fn render_video_to_channel( fallback.frame_number = current_frame_number; fallback.target_time_ns = (current_frame_number as u64 * 1_000_000_000) / fps as u64; - fallback + sender.send((fallback, current_frame_number)).await?; } else { return Err(e); } @@ -481,7 +551,7 @@ pub async fn render_video_to_channel( fallback.frame_number = current_frame_number; fallback.target_time_ns = (current_frame_number as u64 * 1_000_000_000) / fps as u64; - fallback + sender.send((fallback, current_frame_number)).await?; } else { tracing::error!( frame_number = current_frame_number, @@ -491,9 +561,7 @@ pub async fn render_video_to_channel( ); continue; } - }; - - sender.send((frame, current_frame_number)).await?; + } } if let Some(Ok(final_frame)) = frame_renderer.flush_pipeline().await @@ -578,10 +646,14 @@ pub async fn render_video_to_channel_nv12( ); } + let needs_camera = !project.camera.hide; + let mut last_successful_frame: Option = None; let mut consecutive_failures = 0u32; const MAX_CONSECUTIVE_FAILURES: u32 = 200; + let mut prefetched_decode: Option<(u32, f64, usize, Option)> = None; + loop { if frame_number >= total_frames { break; @@ -605,21 +677,39 @@ pub async fn render_video_to_channel_nv12( let render_segment = &render_segments[segment.recording_clip as usize]; let is_initial_frame = current_frame_number == 0 || last_successful_frame.is_none(); + let segment_clip_index = segment.recording_clip as usize; - let segment_frames = decode_segment_frames_with_retry( - &render_segment.decoders, - segment_time, - !project.camera.hide, - clip_config.map(|v| v.offsets).unwrap_or_default(), - current_frame_number, - is_initial_frame, - ) - .await; + let segment_frames = + if let Some((pf_num, _pf_time, pf_clip, pf_result)) = prefetched_decode.take() { + if pf_num == current_frame_number && pf_clip == segment_clip_index { + pf_result + } else { + decode_segment_frames_with_retry( + &render_segment.decoders, + segment_time, + needs_camera, + clip_config.map(|v| v.offsets).unwrap_or_default(), + current_frame_number, + is_initial_frame, + ) + .await + } + } else { + decode_segment_frames_with_retry( + &render_segment.decoders, + segment_time, + needs_camera, + clip_config.map(|v| v.offsets).unwrap_or_default(), + current_frame_number, + is_initial_frame, + ) + .await + }; - let frame = if let Some(segment_frames) = segment_frames { + if let Some(segment_frames) = segment_frames { consecutive_failures = 0; - let zoom_focus_interp = &zoom_focus_interpolators[segment.recording_clip as usize]; + let zoom_focus_interp = &zoom_focus_interpolators[segment_clip_index]; let uniforms = ProjectUniforms::new( constants, @@ -633,20 +723,70 @@ pub async fn render_video_to_channel_nv12( zoom_focus_interp, ); - match frame_renderer - .render_nv12( - segment_frames, - uniforms, - &render_segment.cursor, - &mut layers, - ) - .await - { - Ok(frame) if frame.width > 0 && frame.height > 0 => { + let next_frame_number = frame_number; + let mut next_prefetch_meta: Option<(f64, usize)> = None; + let prefetch_future = if next_frame_number < total_frames { + if let Some((next_seg_time, next_segment)) = + project.get_segment_time(next_frame_number as f64 / fps as f64) + { + let next_clip_index = next_segment.recording_clip as usize; + next_prefetch_meta = Some((next_seg_time, next_clip_index)); + let next_render_segment = &render_segments[next_clip_index]; + let next_clip_config = project + .clips + .iter() + .find(|v| v.index == next_segment.recording_clip); + let next_is_initial = last_successful_frame.is_none(); + + Some(decode_segment_frames_with_retry( + &next_render_segment.decoders, + next_seg_time, + needs_camera, + next_clip_config.map(|v| v.offsets).unwrap_or_default(), + next_frame_number, + next_is_initial, + )) + } else { + None + } + } else { + None + }; + + let render_result = if let Some(prefetch) = prefetch_future { + let (render, decoded) = tokio::join!( + frame_renderer.render_nv12( + segment_frames, + uniforms, + &render_segment.cursor, + &mut layers, + ), + prefetch + ); + + if let Some((next_seg_time, next_clip_index)) = next_prefetch_meta { + prefetched_decode = + Some((next_frame_number, next_seg_time, next_clip_index, decoded)); + } + + render + } else { + frame_renderer + .render_nv12( + segment_frames, + uniforms, + &render_segment.cursor, + &mut layers, + ) + .await + }; + + match render_result { + Ok(Some(frame)) if frame.width > 0 && frame.height > 0 => { last_successful_frame = Some(frame.clone_metadata_with_data()); - frame + sender.send((frame, current_frame_number)).await?; } - Ok(_) => { + Ok(Some(_)) => { tracing::warn!( frame_number = current_frame_number, "Rendered NV12 frame has zero dimensions" @@ -656,11 +796,10 @@ pub async fn render_video_to_channel_nv12( fallback.frame_number = current_frame_number; fallback.target_time_ns = (current_frame_number as u64 * 1_000_000_000) / fps as u64; - fallback - } else { - continue; + sender.send((fallback, current_frame_number)).await?; } } + Ok(None) => {} Err(e) => { tracing::error!( frame_number = current_frame_number, @@ -672,7 +811,7 @@ pub async fn render_video_to_channel_nv12( fallback.frame_number = current_frame_number; fallback.target_time_ns = (current_frame_number as u64 * 1_000_000_000) / fps as u64; - fallback + sender.send((fallback, current_frame_number)).await?; } else { return Err(e); } @@ -705,7 +844,7 @@ pub async fn render_video_to_channel_nv12( fallback.frame_number = current_frame_number; fallback.target_time_ns = (current_frame_number as u64 * 1_000_000_000) / fps as u64; - fallback + sender.send((fallback, current_frame_number)).await?; } else { tracing::error!( frame_number = current_frame_number, @@ -715,9 +854,7 @@ pub async fn render_video_to_channel_nv12( ); continue; } - }; - - sender.send((frame, current_frame_number)).await?; + } } if let Some(Ok(final_frame)) = frame_renderer.flush_pipeline_nv12().await @@ -1211,7 +1348,7 @@ impl MotionBlurDescriptor { } impl ProjectUniforms { - fn get_crop(options: &RenderOptions, project: &ProjectConfiguration) -> Crop { + pub fn get_crop(options: &RenderOptions, project: &ProjectConfiguration) -> Crop { project.background.crop.as_ref().cloned().unwrap_or(Crop { position: XY { x: 0, y: 0 }, size: XY { @@ -2062,7 +2199,7 @@ impl<'a> FrameRenderer<'a> { uniforms: ProjectUniforms, cursor: &CursorEvents, layers: &mut RendererLayers, - ) -> Result { + ) -> Result, RenderingError> { let mut last_error = None; for attempt in 0..Self::MAX_RENDER_RETRIES { @@ -2101,7 +2238,7 @@ impl<'a> FrameRenderer<'a> { ) .await { - Ok(frame) => return Ok(frame), + Ok(opt_frame) => return Ok(opt_frame), Err(RenderingError::BufferMapWaitingFailed) => { tracing::warn!( frame_number = uniforms.frame_number, @@ -2126,6 +2263,24 @@ impl<'a> FrameRenderer<'a> { Err(last_error.unwrap_or(RenderingError::BufferMapWaitingFailed)) } + pub async fn render_immediate( + &mut self, + segment_frames: DecodedSegmentFrames, + uniforms: ProjectUniforms, + cursor: &CursorEvents, + layers: &mut RendererLayers, + ) -> Result { + if let Some(frame) = self + .render(segment_frames, uniforms, cursor, layers) + .await? + { + return Ok(frame); + } + self.flush_pipeline() + .await + .unwrap_or(Err(RenderingError::BufferMapWaitingFailed)) + } + pub async fn flush_pipeline(&mut self) -> Option> { if let Some(session) = &mut self.session { flush_pending_readback(session, &self.constants.device).await @@ -2148,7 +2303,7 @@ impl<'a> FrameRenderer<'a> { uniforms: ProjectUniforms, cursor: &CursorEvents, layers: &mut RendererLayers, - ) -> Result { + ) -> Result, RenderingError> { let mut last_error = None; for attempt in 0..Self::MAX_RENDER_RETRIES { @@ -2220,7 +2375,7 @@ impl<'a> FrameRenderer<'a> { ) .await { - Ok(frame) => return Ok(frame), + Ok(opt_frame) => return Ok(opt_frame), Err(RenderingError::BufferMapWaitingFailed) => { last_error = Some(RenderingError::BufferMapWaitingFailed); } @@ -2550,7 +2705,7 @@ async fn produce_frame( cursor: &CursorEvents, layers: &mut RendererLayers, session: &mut RenderSession, -) -> Result { +) -> Result, RenderingError> { let mut encoder = constants.device.create_command_encoder( &(wgpu::CommandEncoderDescriptor { label: Some("Render Encoder"), From 0d0cf472a13e9a762b20312eaaf0916a09525a1e Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Sun, 15 Feb 2026 20:44:25 +0000 Subject: [PATCH 18/27] refactor(desktop): use render_immediate for non-export render calls --- apps/desktop/src-tauri/src/export.rs | 4 ++-- apps/desktop/src-tauri/src/screenshot_editor.rs | 2 +- crates/editor/examples/playback-pipeline-benchmark.rs | 4 ++-- crates/editor/src/editor.rs | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/apps/desktop/src-tauri/src/export.rs b/apps/desktop/src-tauri/src/export.rs index 9460dc4183d..c2ef35807af 100644 --- a/apps/desktop/src-tauri/src/export.rs +++ b/apps/desktop/src-tauri/src/export.rs @@ -366,7 +366,7 @@ pub async fn generate_export_preview( ); let frame = frame_renderer - .render( + .render_immediate( segment_frames, uniforms, &render_segment.cursor, @@ -509,7 +509,7 @@ pub async fn generate_export_preview_fast( ); let frame = frame_renderer - .render(segment_frames, uniforms, &segment_media.cursor, &mut layers) + .render_immediate(segment_frames, uniforms, &segment_media.cursor, &mut layers) .await .map_err(|e| format!("Failed to render frame: {e}"))?; diff --git a/apps/desktop/src-tauri/src/screenshot_editor.rs b/apps/desktop/src-tauri/src/screenshot_editor.rs index c95624c59b6..dfaa09d7f74 100644 --- a/apps/desktop/src-tauri/src/screenshot_editor.rs +++ b/apps/desktop/src-tauri/src/screenshot_editor.rs @@ -371,7 +371,7 @@ impl ScreenshotEditorInstances { ); let rendered_frame = frame_renderer - .render( + .render_immediate( segment_frames, uniforms, &cap_project::CursorEvents::default(), diff --git a/crates/editor/examples/playback-pipeline-benchmark.rs b/crates/editor/examples/playback-pipeline-benchmark.rs index 8e04f349d87..87d824e1019 100644 --- a/crates/editor/examples/playback-pipeline-benchmark.rs +++ b/crates/editor/examples/playback-pipeline-benchmark.rs @@ -374,7 +374,7 @@ async fn run_full_pipeline_benchmark( let render_start = Instant::now(); match frame_renderer - .render(segment_frames, uniforms, &segment_media.cursor, &mut layers) + .render_immediate(segment_frames, uniforms, &segment_media.cursor, &mut layers) .await { Ok(_frame) => { @@ -536,7 +536,7 @@ async fn run_scrubbing_benchmark( let render_start = Instant::now(); match frame_renderer - .render(segment_frames, uniforms, &segment_media.cursor, &mut layers) + .render_immediate(segment_frames, uniforms, &segment_media.cursor, &mut layers) .await { Ok(_frame) => { diff --git a/crates/editor/src/editor.rs b/crates/editor/src/editor.rs index d3f5d911cd7..f378538a08f 100644 --- a/crates/editor/src/editor.rs +++ b/crates/editor/src/editor.rs @@ -148,7 +148,7 @@ impl Renderer { } } match frame_renderer - .render( + .render_immediate( current.segment_frames, current.uniforms, ¤t.cursor, From f078f5cd98d2df12c3101e6a71972109946a3462 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Sun, 15 Feb 2026 20:44:34 +0000 Subject: [PATCH 19/27] perf(export): always use NV12 GPU path with dimension alignment --- crates/export/src/mp4.rs | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/crates/export/src/mp4.rs b/crates/export/src/mp4.rs index dabdfdae326..f2c37dc6608 100644 --- a/crates/export/src/mp4.rs +++ b/crates/export/src/mp4.rs @@ -58,29 +58,30 @@ impl Mp4ExportSettings { let fps = self.fps; - let output_size = ProjectUniforms::get_output_size( + let raw_output_size = ProjectUniforms::get_output_size( &base.render_constants.options, &base.project_config, self.resolution_base, ); - let nv12_compatible = output_size.0.is_multiple_of(4) && output_size.1.is_multiple_of(2); + let output_size = ((raw_output_size.0 + 3) & !3, (raw_output_size.1 + 1) & !1); - if nv12_compatible { + if output_size != raw_output_size { info!( - width = output_size.0, - height = output_size.1, - "Using GPU NV12 export path (reduced readback + no CPU swscale)" + raw_width = raw_output_size.0, + raw_height = raw_output_size.1, + aligned_width = output_size.0, + aligned_height = output_size.1, + "Aligned output dimensions for NV12 GPU path" ); - self.export_nv12(base, output_size, fps, on_progress).await - } else { - info!( - width = output_size.0, - height = output_size.1, - "Falling back to RGBA export path (dimensions not NV12-compatible)" - ); - self.export_rgba(base, output_size, fps, on_progress).await } + + info!( + width = output_size.0, + height = output_size.1, + "Using GPU NV12 export path (reduced readback + no CPU swscale)" + ); + self.export_nv12(base, output_size, fps, on_progress).await } async fn export_nv12( From 981d6d7b48fca7e9c988115b00e868de6e505924 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Sun, 15 Feb 2026 20:44:40 +0000 Subject: [PATCH 20/27] perf(export): increase pipeline channel buffer sizes to 32 --- crates/export/src/mp4.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/export/src/mp4.rs b/crates/export/src/mp4.rs index f2c37dc6608..d6b3f633974 100644 --- a/crates/export/src/mp4.rs +++ b/crates/export/src/mp4.rs @@ -95,8 +95,8 @@ impl Mp4ExportSettings { let meta = &base.studio_meta; let (tx_image_data, mut video_rx) = - tokio::sync::mpsc::channel::<(Nv12RenderedFrame, u32)>(16); - let (frame_tx, frame_rx) = std::sync::mpsc::sync_channel::(16); + tokio::sync::mpsc::channel::<(Nv12RenderedFrame, u32)>(32); + let (frame_tx, frame_rx) = std::sync::mpsc::sync_channel::(32); let mut video_info = VideoInfo::from_raw(RawVideoFormat::Nv12, output_size.0, output_size.1, fps); @@ -358,8 +358,8 @@ impl Mp4ExportSettings { let meta = &base.studio_meta; let (tx_image_data, mut video_rx) = - tokio::sync::mpsc::channel::<(cap_rendering::RenderedFrame, u32)>(16); - let (frame_tx, frame_rx) = std::sync::mpsc::sync_channel::(16); + tokio::sync::mpsc::channel::<(cap_rendering::RenderedFrame, u32)>(32); + let (frame_tx, frame_rx) = std::sync::mpsc::sync_channel::(32); let mut video_info = VideoInfo::from_raw(RawVideoFormat::Rgba, output_size.0, output_size.1, fps); From acdc1be8f3d466d3431a0a2170fde4dfb0e1b7e3 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Sun, 15 Feb 2026 20:44:43 +0000 Subject: [PATCH 21/27] refactor(export): use into_data for Arc frame extraction --- crates/export/src/mp4.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/export/src/mp4.rs b/crates/export/src/mp4.rs index d6b3f633974..a578103b3ad 100644 --- a/crates/export/src/mp4.rs +++ b/crates/export/src/mp4.rs @@ -631,7 +631,7 @@ fn ensure_nv12_data(frame: Nv12RenderedFrame) -> Vec { use cap_rendering::GpuOutputFormat; if frame.format != GpuOutputFormat::Rgba { - return frame.data; + return frame.into_data(); } tracing::warn!( From e9e01edd3c9c64b03c028e05f244559dd1f6fa36 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Sun, 15 Feb 2026 20:49:19 +0000 Subject: [PATCH 22/27] clippy --- crates/video-decode/src/ffmpeg.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/video-decode/src/ffmpeg.rs b/crates/video-decode/src/ffmpeg.rs index b78700796c9..8311f57dff0 100644 --- a/crates/video-decode/src/ffmpeg.rs +++ b/crates/video-decode/src/ffmpeg.rs @@ -149,9 +149,9 @@ fn configure_software_threading(decoder: &mut avcodec::decoder::Video, width: u3 let thread_count = if pixel_count > 8294400 { 0 } else if pixel_count > 2073600 { - cpu_count.min(8).max(2) as i32 + cpu_count.clamp(2, 8) as i32 } else { - cpu_count.min(6).max(2) as i32 + cpu_count.clamp(2, 6) as i32 }; let thread_type = ffmpeg::sys::FF_THREAD_FRAME | ffmpeg::sys::FF_THREAD_SLICE; From 5db251410f6fc8f591b9b9e70277d08e095ceb08 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Sun, 15 Feb 2026 20:58:48 +0000 Subject: [PATCH 23/27] clippy --- crates/export/src/mp4.rs | 263 --------------------------------------- 1 file changed, 263 deletions(-) diff --git a/crates/export/src/mp4.rs b/crates/export/src/mp4.rs index a578103b3ad..9af7f85a074 100644 --- a/crates/export/src/mp4.rs +++ b/crates/export/src/mp4.rs @@ -346,269 +346,6 @@ impl Mp4ExportSettings { Ok(output_path) } - - async fn export_rgba( - self, - base: ExporterBase, - output_size: (u32, u32), - fps: u32, - mut on_progress: impl FnMut(u32) -> bool + Send + 'static, - ) -> Result { - let output_path = base.output_path.clone(); - let meta = &base.studio_meta; - - let (tx_image_data, mut video_rx) = - tokio::sync::mpsc::channel::<(cap_rendering::RenderedFrame, u32)>(32); - let (frame_tx, frame_rx) = std::sync::mpsc::sync_channel::(32); - - let mut video_info = - VideoInfo::from_raw(RawVideoFormat::Rgba, output_size.0, output_size.1, fps); - video_info.time_base = ffmpeg::Rational::new(1, fps as i32); - - let audio_segments = get_audio_segments(&base.segments); - - let mut audio_renderer = audio_segments - .first() - .filter(|_| !base.project_config.audio.mute) - .map(|_| AudioRenderer::new(audio_segments.clone())); - let has_audio = audio_renderer.is_some(); - - let encoder_thread = tokio::task::spawn_blocking(move || { - trace!("Creating MP4File encoder (RGBA fallback)"); - - let mut encoder = MP4File::init( - "output", - base.output_path.clone(), - |o| { - H264Encoder::builder(video_info) - .with_bpp(self.effective_bpp()) - .with_export_priority() - .with_export_settings() - .build(o) - }, - |o| { - has_audio.then(|| { - AACEncoder::init(AudioRenderer::info(), o) - .map(|v| v.boxed()) - .map_err(Into::into) - }) - }, - ) - .map_err(|v| v.to_string())?; - - info!("Created MP4File encoder (RGBA fallback, export settings)"); - - let mut encoded_frames = 0u32; - let encode_start = std::time::Instant::now(); - - while let Ok(frame) = frame_rx.recv() { - encoder - .queue_video_frame(frame.video, Duration::MAX) - .map_err(|err| err.to_string())?; - if let Some(audio) = frame.audio { - encoder.queue_audio_frame(audio); - } - encoded_frames += 1; - } - - let encode_elapsed = encode_start.elapsed(); - if encoded_frames > 0 { - let encode_fps = encoded_frames as f64 / encode_elapsed.as_secs_f64().max(0.001); - info!( - encoded_frames = encoded_frames, - elapsed_secs = format!("{:.2}", encode_elapsed.as_secs_f64()), - encode_fps = format!("{:.1}", encode_fps), - "Encoder thread finished (RGBA)" - ); - } - - let res = encoder - .finish() - .map_err(|e| format!("Failed to finish encoding: {e}"))?; - - if let Err(e) = res.video_finish { - return Err(format!("Video encoding failed: {e}")); - } - if let Err(e) = res.audio_finish { - return Err(format!("Audio encoding failed: {e}")); - } - - Ok::<_, String>(base.output_path) - }) - .then(|r| async { r.map_err(|e| e.to_string()).and_then(|v| v) }); - - let render_task = tokio::spawn({ - let project = base.project_config.clone(); - let project_path = base.project_path.clone(); - async move { - let mut frame_count = 0; - let mut first_frame = None; - let sample_rate = u64::from(AudioRenderer::SAMPLE_RATE); - let fps_u64 = u64::from(fps); - let mut audio_sample_cursor = 0u64; - - let mut consecutive_timeouts = 0u32; - const MAX_CONSECUTIVE_TIMEOUTS: u32 = 3; - - loop { - let timeout_secs = if frame_count == 0 { 120 } else { 90 }; - let (frame, frame_number) = match tokio::time::timeout( - Duration::from_secs(timeout_secs), - video_rx.recv(), - ) - .await - { - Err(_) => { - consecutive_timeouts += 1; - - if consecutive_timeouts >= MAX_CONSECUTIVE_TIMEOUTS { - tracing::error!( - frame_count = frame_count, - timeout_secs = timeout_secs, - consecutive_timeouts = consecutive_timeouts, - "Export render_task timed out {} consecutive times - aborting", - MAX_CONSECUTIVE_TIMEOUTS - ); - return Err(format!( - "Export timed out {MAX_CONSECUTIVE_TIMEOUTS} times consecutively after {timeout_secs}s each waiting for frame {frame_count} - GPU/decoder may be unresponsive" - )); - } - - tracing::warn!( - frame_count = frame_count, - timeout_secs = timeout_secs, - consecutive_timeouts = consecutive_timeouts, - "Frame receive timed out, waiting for next frame..." - ); - continue; - } - Ok(Some(v)) => { - consecutive_timeouts = 0; - v - } - Ok(None) => { - tracing::debug!( - frame_count = frame_count, - "Render channel closed - rendering complete" - ); - break; - } - }; - - if !(on_progress)(frame_count) { - return Err("Export cancelled".to_string()); - } - - if frame_count == 0 { - first_frame = Some(frame.clone()); - if let Some(audio) = &mut audio_renderer { - audio.set_playhead(0.0, &project); - } - } - - let audio_frame = audio_renderer.as_mut().and_then(|audio| { - let n = u64::from(frame_number); - let end = ((n + 1) * sample_rate) / fps_u64; - if end <= audio_sample_cursor { - return None; - } - let pts = audio_sample_cursor as i64; - let samples = (end - audio_sample_cursor) as usize; - audio_sample_cursor = end; - audio.render_frame(samples, &project).map(|mut frame| { - frame.set_pts(Some(pts)); - frame - }) - }); - - if frame_tx - .send(MP4Input { - audio: audio_frame, - video: video_info.wrap_frame( - &frame.data, - frame_number as i64, - frame.padded_bytes_per_row as usize, - ), - }) - .is_err() - { - warn!("Renderer task sender dropped. Exiting"); - return Ok(()); - } - - frame_count += 1; - } - - drop(frame_tx); - - if let Some(frame) = first_frame { - let project_path = project_path.clone(); - let screenshot_task = tokio::task::spawn_blocking(move || { - let rgb_img = ImageBuffer::, Vec>::from_raw( - frame.width, - frame.height, - frame - .data - .chunks(frame.padded_bytes_per_row as usize) - .flat_map(|row| { - row[0..(frame.width * 4) as usize] - .chunks(4) - .flat_map(|chunk| [chunk[0], chunk[1], chunk[2]]) - }) - .collect::>(), - ); - - let Some(rgb_img) = rgb_img else { - return; - }; - - let screenshots_dir = project_path.join("screenshots"); - if std::fs::create_dir_all(&screenshots_dir).is_err() { - return; - } - - let screenshot_path = screenshots_dir.join("display.jpg"); - let _ = rgb_img.save(&screenshot_path); - }); - - if let Err(e) = screenshot_task.await { - warn!("Screenshot task failed: {e}"); - } - } else { - warn!("No frames were processed, cannot save screenshot or thumbnail"); - } - - Ok::<_, String>(()) - } - }) - .then(|r| async { - r.map_err(|e| e.to_string()) - .and_then(|v| v.map_err(|e| e.to_string())) - }); - - let render_video_task = cap_rendering::render_video_to_channel( - &base.render_constants, - &base.project_config, - tx_image_data, - &base.recording_meta, - meta, - base.segments - .iter() - .map(|s| RenderSegment { - cursor: s.cursor.clone(), - decoders: s.decoders.clone(), - }) - .collect(), - fps, - self.resolution_base, - &base.recordings, - ) - .then(|v| async { v.map_err(|e| e.to_string()) }); - - tokio::try_join!(encoder_thread, render_video_task, render_task)?; - - Ok(output_path) - } } struct FirstFrameNv12 { From 1fe56b34d5a76f0691510a71bfb40b35010226bc Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Sun, 15 Feb 2026 22:17:58 +0000 Subject: [PATCH 24/27] clippy --- apps/desktop/src-tauri/src/camera.rs | 54 +------- apps/desktop/src-tauri/src/panel_manager.rs | 121 +----------------- apps/desktop/src-tauri/src/windows.rs | 5 - .../src/sources/screen_capture/windows.rs | 97 +++++++------- 4 files changed, 50 insertions(+), 227 deletions(-) diff --git a/apps/desktop/src-tauri/src/camera.rs b/apps/desktop/src-tauri/src/camera.rs index be5c20113fc..3c5ae5d4ee0 100644 --- a/apps/desktop/src-tauri/src/camera.rs +++ b/apps/desktop/src-tauri/src/camera.rs @@ -151,50 +151,6 @@ impl CameraPreviewManager { } } - // Resumes a paused camera preview. Uses window.show() which is safe, unlike - // panel.order_front_regardless() which causes crashes after repeated use. - pub fn resume(&mut self, window: &WebviewWindow) { - if let Some(preview) = &mut self.preview - && preview.is_paused - { - preview.is_paused = false; - preview - .reconfigure - .send(ReconfigureEvent::Resume) - .map_err(|err| error!("Error sending camera preview resume event: {err}")) - .ok(); - window - .run_on_main_thread({ - let window = window.clone(); - move || { - let _ = window.show(); - } - }) - .ok(); - } - } - - pub fn is_paused(&self) -> bool { - self.preview.as_ref().is_some_and(|p| p.is_paused) - } - - pub fn begin_shutdown_for_session( - &mut self, - expected_session_id: u64, - ) -> Option> { - if let Some(preview) = &self.preview - && preview.session_id != expected_session_id - { - info!( - "Skipping camera preview close: session mismatch (expected {}, current {})", - expected_session_id, preview.session_id - ); - return None; - } - - self.begin_shutdown() - } - pub fn begin_shutdown(&mut self) -> Option> { let preview = self.preview.take()?; info!( @@ -304,14 +260,6 @@ impl CameraPreviewManager { Ok(()) } - - pub fn on_window_close_for_session(&mut self, expected_session_id: u64) { - let _ = self.begin_shutdown_for_session(expected_session_id); - } - - pub fn on_window_close(&mut self) { - let _ = self.begin_shutdown(); - } } // Internal events for the persistent camera renderer architecture. @@ -874,7 +822,7 @@ impl Renderer { let _ = self.device.poll(wgpu::PollType::Wait); drop(std::mem::take(&mut self.texture)); - drop(std::mem::take(&mut self.aspect_ratio)); + self.aspect_ratio = Cached::default(); let surface = self.surface.take(); let (drop_tx, drop_rx) = oneshot::channel(); diff --git a/apps/desktop/src-tauri/src/panel_manager.rs b/apps/desktop/src-tauri/src/panel_manager.rs index 78de6267ab0..7d63e350c1d 100644 --- a/apps/desktop/src-tauri/src/panel_manager.rs +++ b/apps/desktop/src-tauri/src/panel_manager.rs @@ -6,28 +6,19 @@ use tokio::sync::RwLock; use tracing::{debug, info, trace, warn}; #[cfg(target_os = "macos")] -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] pub enum PanelState { + #[default] None, Creating, Ready, Destroying, } -#[cfg(target_os = "macos")] -impl Default for PanelState { - fn default() -> Self { - Self::None - } -} - #[cfg(target_os = "macos")] #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum PanelWindowType { Camera, - Main, - TargetSelectOverlay, - InProgressRecording, } #[cfg(target_os = "macos")] @@ -35,9 +26,6 @@ impl std::fmt::Display for PanelWindowType { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::Camera => write!(f, "Camera"), - Self::Main => write!(f, "Main"), - Self::TargetSelectOverlay => write!(f, "TargetSelectOverlay"), - Self::InProgressRecording => write!(f, "InProgressRecording"), } } } @@ -107,7 +95,6 @@ impl PanelManager { window_type, op_id ); Some(PanelOperationGuard { - window_type, operation_id: op_id, completed: false, }) @@ -136,87 +123,6 @@ impl PanelManager { } } - pub async fn try_begin_show( - &self, - window_type: PanelWindowType, - ) -> Option { - let mut panels = self.panels.write().await; - let entry = panels.entry(window_type).or_default(); - - match entry.state { - PanelState::Ready => { - let op_id = self - .operation_counter - .fetch_add(1, std::sync::atomic::Ordering::SeqCst); - debug!( - "Panel {}: beginning show operation (op_id={})", - window_type, op_id - ); - Some(PanelOperationGuard { - window_type, - operation_id: op_id, - completed: true, - }) - } - PanelState::None => { - debug!("Panel {}: show blocked - window doesn't exist", window_type); - None - } - PanelState::Creating => { - debug!( - "Panel {}: show blocked - currently creating (op_id={})", - window_type, entry.operation_id - ); - None - } - PanelState::Destroying => { - debug!( - "Panel {}: show blocked - currently destroying (op_id={})", - window_type, entry.operation_id - ); - None - } - } - } - - pub async fn try_begin_destroy( - &self, - window_type: PanelWindowType, - ) -> Option { - let mut panels = self.panels.write().await; - let entry = panels.entry(window_type).or_default(); - - match entry.state { - PanelState::Ready | PanelState::Creating => { - let op_id = self - .operation_counter - .fetch_add(1, std::sync::atomic::Ordering::SeqCst); - entry.state = PanelState::Destroying; - entry.operation_id = op_id; - debug!( - "Panel {}: beginning destroy operation (op_id={})", - window_type, op_id - ); - Some(PanelOperationGuard { - window_type, - operation_id: op_id, - completed: false, - }) - } - PanelState::None => { - debug!("Panel {}: destroy skipped - already destroyed", window_type); - None - } - PanelState::Destroying => { - debug!( - "Panel {}: destroy blocked - already destroying (op_id={})", - window_type, entry.operation_id - ); - None - } - } - } - pub async fn mark_ready(&self, window_type: PanelWindowType, operation_id: u64) { let mut panels = self.panels.write().await; if let Some(entry) = panels.get_mut(&window_type) { @@ -235,24 +141,6 @@ impl PanelManager { } } - pub async fn mark_destroyed(&self, window_type: PanelWindowType, operation_id: u64) { - let mut panels = self.panels.write().await; - if let Some(entry) = panels.get_mut(&window_type) { - if entry.operation_id == operation_id && entry.state == PanelState::Destroying { - entry.state = PanelState::None; - info!( - "Panel {}: marked destroyed (op_id={})", - window_type, operation_id - ); - } else { - warn!( - "Panel {}: mark_destroyed ignored - state mismatch (current state={:?}, current op={}, requested op={})", - window_type, entry.state, entry.operation_id, operation_id - ); - } - } - } - pub async fn force_reset(&self, window_type: PanelWindowType) { let mut panels = self.panels.write().await; if let Some(entry) = panels.get_mut(&window_type) { @@ -293,7 +181,6 @@ impl PanelManager { #[cfg(target_os = "macos")] pub struct PanelOperationGuard { - pub window_type: PanelWindowType, pub operation_id: u64, completed: bool, } @@ -303,10 +190,6 @@ impl PanelOperationGuard { pub fn mark_completed(&mut self) { self.completed = true; } - - pub fn is_completed(&self) -> bool { - self.completed - } } #[cfg(target_os = "macos")] diff --git a/apps/desktop/src-tauri/src/windows.rs b/apps/desktop/src-tauri/src/windows.rs index 6024d4d6cbf..b6ec15f2b16 100644 --- a/apps/desktop/src-tauri/src/windows.rs +++ b/apps/desktop/src-tauri/src/windows.rs @@ -886,11 +886,6 @@ impl ShowCapWindow { None }; - match self { - Self::Main { .. } => {} - _ => {} - } - if let Self::Main { init_target_mode: Some(target_mode), } = self diff --git a/crates/recording/src/sources/screen_capture/windows.rs b/crates/recording/src/sources/screen_capture/windows.rs index 6b93dfe988f..5ff6077160e 100644 --- a/crates/recording/src/sources/screen_capture/windows.rs +++ b/crates/recording/src/sources/screen_capture/windows.rs @@ -205,9 +205,10 @@ impl WindowsFrameScaler { let src_width = frame.width(); let src_height = frame.height(); - let needs_reinit = self.state.as_ref().map_or(true, |s| { - s.source_width != src_width || s.source_height != src_height - }); + let needs_reinit = self + .state + .as_ref() + .is_none_or(|s| s.source_width != src_width || s.source_height != src_height); if needs_reinit { let src_pixel = match self.pixel_format { @@ -432,33 +433,42 @@ enum VideoControl { const MAX_CAPTURE_RESTARTS: u32 = 3; const RESTART_DELAY: Duration = Duration::from_secs(1); -fn create_d3d_capturer( - display_id: &DisplayId, - settings: &scap_direct3d::Settings, - d3d_device: &ID3D11Device, - video_tx: &mpsc::Sender, - error_tx: &mpsc::Sender, - video_frame_counter: &Arc, - video_drop_counter: &Arc, +struct CreateCapturerParams<'a> { + display_id: &'a DisplayId, + settings: &'a scap_direct3d::Settings, + d3d_device: &'a ID3D11Device, + video_tx: &'a mpsc::Sender, + video_frame_counter: &'a Arc, + video_drop_counter: &'a Arc, expected_width: u32, expected_height: u32, frame_scaler: Arc>, scaling_logged: Arc, scaled_frame_count: Arc, +} + +fn create_d3d_capturer( + params: &CreateCapturerParams, + error_tx: &mpsc::Sender, ) -> anyhow::Result { - let capture_item = Display::from_id(display_id) - .ok_or_else(|| anyhow!("Display not found for ID: {:?}", display_id))? + let capture_item = Display::from_id(params.display_id) + .ok_or_else(|| anyhow!("Display not found for ID: {:?}", params.display_id))? .raw_handle() .try_as_capture_item() .map_err(|e| anyhow!("Failed to create GraphicsCaptureItem: {}", e))?; scap_direct3d::Capturer::new( capture_item, - settings.clone(), + params.settings.clone(), { - let video_frame_counter = video_frame_counter.clone(); - let video_drop_counter = video_drop_counter.clone(); - let mut tx = video_tx.clone(); + let video_frame_counter = params.video_frame_counter.clone(); + let video_drop_counter = params.video_drop_counter.clone(); + let mut tx = params.video_tx.clone(); + let expected_width = params.expected_width; + let expected_height = params.expected_height; + let frame_scaler = params.frame_scaler.clone(); + let scaling_logged = params.scaling_logged.clone(); + let scaled_frame_count = params.scaled_frame_count.clone(); move |frame| { let timestamp = frame.inner().SystemRelativeTime()?; let timestamp = Timestamp::PerformanceCounter(PerformanceCounterTimestamp::new( @@ -535,7 +545,7 @@ fn create_d3d_capturer( Ok(()) } }, - Some(d3d_device.clone()), + Some(params.d3d_device.clone()), ) .map_err(|e| anyhow!("{e}")) } @@ -590,20 +600,21 @@ impl output_pipeline::VideoSource for VideoSource { let cancel_token = CancellationToken::new(); let mut error_tx = error_tx; - let mut capturer = match create_d3d_capturer( - &display_id, - &settings, - &d3d_device, - &video_tx, - &error_tx, - &video_frame_counter, - &video_drop_counter, + let capturer_params = CreateCapturerParams { + display_id: &display_id, + settings: &settings, + d3d_device: &d3d_device, + video_tx: &video_tx, + video_frame_counter: &video_frame_counter, + video_drop_counter: &video_drop_counter, expected_width, expected_height, - frame_scaler.clone(), - scaling_logged.clone(), - scaled_frame_count.clone(), - ) { + frame_scaler: frame_scaler.clone(), + scaling_logged: scaling_logged.clone(), + scaled_frame_count: scaled_frame_count.clone(), + }; + + let mut capturer = match create_d3d_capturer(&capturer_params, &error_tx) { Ok(c) => { trace!("D3D capturer created successfully"); Some(c) @@ -701,20 +712,7 @@ impl output_pipeline::VideoSource for VideoSource { drop(old); } - match create_d3d_capturer( - &display_id, - &settings, - &d3d_device, - &video_tx, - &error_tx, - &video_frame_counter, - &video_drop_counter, - expected_width, - expected_height, - frame_scaler.clone(), - scaling_logged.clone(), - scaled_frame_count.clone(), - ) { + match create_d3d_capturer(&capturer_params, &error_tx) { Ok(mut new_cap) => match new_cap.start() { Ok(()) => { let count = restart_counter @@ -1223,12 +1221,11 @@ impl output_pipeline::AudioSource for SystemAudioSource { fn stop(&mut self) -> impl Future> { self.cancel_token.cancel(); - if let Ok(guard) = self.state.lock() { - if let Some(ref capturer) = guard.capturer { - if let Err(err) = capturer.pause() { - warn!("system audio capturer pause failed: {err}"); - } - } + if let Ok(guard) = self.state.lock() + && let Some(ref capturer) = guard.capturer + && let Err(err) = capturer.pause() + { + warn!("system audio capturer pause failed: {err}"); } async { Ok(()) } } From 3f25623adc178cfad6eb7b0a849669d965659f38 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Sun, 15 Feb 2026 22:36:17 +0000 Subject: [PATCH 25/27] clippy --- apps/desktop/src-tauri/src/frame_ws.rs | 2 +- crates/export/src/mp4.rs | 50 +++++++++++--------------- crates/rendering/src/frame_pipeline.rs | 2 +- 3 files changed, 22 insertions(+), 32 deletions(-) diff --git a/apps/desktop/src-tauri/src/frame_ws.rs b/apps/desktop/src-tauri/src/frame_ws.rs index 10bbac06e36..dc16a2bb1b8 100644 --- a/apps/desktop/src-tauri/src/frame_ws.rs +++ b/apps/desktop/src-tauri/src/frame_ws.rs @@ -278,7 +278,7 @@ pub async fn create_frame_ws(frame_tx: broadcast::Sender) -> (u16, Canc match incoming_frame { Ok(frame) => { let packed = pack_frame_data( - std::sync::Arc::try_unwrap(frame.data).unwrap_or_else(|arc| (*arc).clone()), + std::sync::Arc::unwrap_or_clone(frame.data), frame.stride, frame.height, frame.width, diff --git a/crates/export/src/mp4.rs b/crates/export/src/mp4.rs index 9af7f85a074..672eb1915c3 100644 --- a/crates/export/src/mp4.rs +++ b/crates/export/src/mp4.rs @@ -8,7 +8,7 @@ use futures::FutureExt; use image::ImageBuffer; use serde::Deserialize; use specta::Type; -use std::{path::PathBuf, time::Duration}; +use std::{path::PathBuf, sync::Arc, time::Duration}; use tracing::{info, trace, warn}; #[derive(Deserialize, Type, Clone, Copy, Debug)] @@ -58,28 +58,16 @@ impl Mp4ExportSettings { let fps = self.fps; - let raw_output_size = ProjectUniforms::get_output_size( + let output_size = ProjectUniforms::get_output_size( &base.render_constants.options, &base.project_config, self.resolution_base, ); - let output_size = ((raw_output_size.0 + 3) & !3, (raw_output_size.1 + 1) & !1); - - if output_size != raw_output_size { - info!( - raw_width = raw_output_size.0, - raw_height = raw_output_size.1, - aligned_width = output_size.0, - aligned_height = output_size.1, - "Aligned output dimensions for NV12 GPU path" - ); - } - info!( width = output_size.0, height = output_size.1, - "Using GPU NV12 export path (reduced readback + no CPU swscale)" + "Exporting with NV12 pipeline (GPU when possible, CPU fallback otherwise)" ); self.export_nv12(base, output_size, fps, on_progress).await } @@ -247,14 +235,16 @@ impl Mp4ExportSettings { return Err("Export cancelled".to_string()); } + let frame_width = frame.width; + let frame_height = frame.height; let nv12_data = ensure_nv12_data(frame); if frame_count == 0 { first_frame_data = Some(FirstFrameNv12 { data: nv12_data.clone(), - width: output_size.0, - height: output_size.1, - y_stride: output_size.0, + width: frame_width, + height: frame_height, + y_stride: frame_width, }); if let Some(audio) = &mut audio_renderer { audio.set_playhead(0.0, &project); @@ -280,9 +270,9 @@ impl Mp4ExportSettings { .send(Nv12ExportFrame { audio: audio_frame, nv12_data, - width: output_size.0, - height: output_size.1, - y_stride: output_size.0, + width: frame_width, + height: frame_height, + y_stride: frame_width, pts: frame_number as i64, }) .is_err() @@ -349,14 +339,14 @@ impl Mp4ExportSettings { } struct FirstFrameNv12 { - data: Vec, + data: Arc>, width: u32, height: u32, y_stride: u32, } struct Nv12ExportFrame { - nv12_data: Vec, + nv12_data: Arc>, width: u32, height: u32, y_stride: u32, @@ -364,11 +354,11 @@ struct Nv12ExportFrame { audio: Option, } -fn ensure_nv12_data(frame: Nv12RenderedFrame) -> Vec { +fn ensure_nv12_data(frame: Nv12RenderedFrame) -> Arc> { use cap_rendering::GpuOutputFormat; if frame.format != GpuOutputFormat::Rgba { - return frame.into_data(); + return frame.data; } tracing::warn!( @@ -429,7 +419,7 @@ fn ensure_nv12_data(frame: Nv12RenderedFrame) -> Vec { } } - return result; + return Arc::new(result); } } @@ -437,7 +427,7 @@ fn ensure_nv12_data(frame: Nv12RenderedFrame) -> Vec { frame_number = frame.frame_number, "swscale RGBA to NV12 conversion failed, using zeroed NV12" ); - vec![0u8; width as usize * height as usize * 3 / 2] + Arc::new(vec![0u8; width as usize * height as usize * 3 / 2]) } fn fill_nv12_frame(frame: &mut ffmpeg::frame::Video, input: &Nv12ExportFrame) { @@ -577,7 +567,7 @@ mod tests { } let input = Nv12ExportFrame { - nv12_data: nv12_data.clone(), + nv12_data: Arc::new(nv12_data.clone()), width, height, y_stride: width, @@ -613,7 +603,7 @@ mod tests { let data = vec![1u8, 2, 3, 4, 5, 6]; let frame = Nv12RenderedFrame { - data: data.clone(), + data: std::sync::Arc::new(data.clone()), width: 4, height: 2, y_stride: 4, @@ -623,7 +613,7 @@ mod tests { }; let result = ensure_nv12_data(frame); - assert_eq!(result, data); + assert_eq!(*result, data); } #[test] diff --git a/crates/rendering/src/frame_pipeline.rs b/crates/rendering/src/frame_pipeline.rs index 110d1967996..f8dac07e697 100644 --- a/crates/rendering/src/frame_pipeline.rs +++ b/crates/rendering/src/frame_pipeline.rs @@ -377,7 +377,7 @@ impl Nv12RenderedFrame { } pub fn into_data(self) -> Vec { - Arc::try_unwrap(self.data).unwrap_or_else(|arc| (*arc).clone()) + Arc::unwrap_or_clone(self.data) } pub fn y_plane(&self) -> &[u8] { From a451f83a5b30bb43a078e90c981468609be39d77 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Sun, 15 Feb 2026 22:50:04 +0000 Subject: [PATCH 26/27] Fix AVAssetReader unwraps and add session notes --- crates/editor/PLAYBACK-FINDINGS.md | 31 ++++++++++++++++++++++++ crates/recording/FINDINGS.md | 26 ++++++++++++++++++++ crates/video-decode/src/avassetreader.rs | 12 ++++++--- 3 files changed, 65 insertions(+), 4 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index d30b940f219..32d23fca020 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -324,6 +324,37 @@ The CPU RGBA→NV12 conversion was taking 15-25ms per frame for 3024x1964 resolu --- +### Session 2026-02-15 (Performance Check + AVAssetReader Fix) + +**Goal**: Run playback benchmarks, fix panics in decoder fallback path + +**What was done**: +1. Ran full playback validation on MP4 and fragmented recordings +2. Identified AVAssetReader panicking with `unwrap()` on directory paths (fragmented recordings) +3. Fixed by replacing `unwrap()` with proper error propagation + +**Changes Made**: +- `crates/video-decode/src/avassetreader.rs`: Replaced `ffmpeg::format::input(&path).unwrap()` and `.ok_or(...).unwrap()` with `map_err()?` and `ok_or_else()?` for clean error propagation instead of panics + +**Results** (MP4 Mode): +- ✅ Decoder: AVAssetReader (hardware), display init=114-123ms, camera init=25-33ms +- ✅ Playback: 637-640 fps effective, avg=1.6ms, p95=5.0ms, p99=6.3ms +- ✅ Camera sync: 0ms drift (perfect) +- ✅ Mic sync: 88-100ms (borderline on this run, normally 77-88ms) +- 🟡 System audio: 193-205ms (known issue, inherited from recording) + +**Results** (Fragmented Mode): +- ✅ Decoder: FFmpeg (hardware) with VideoToolbox, display init=100-110ms, camera init=7ms +- ✅ Playback: 153-173 fps effective, avg=5.8-6.5ms, p95=9.0-12.4ms +- ✅ Camera sync: 0ms drift (perfect) +- ✅ Mic sync: 10-23ms (excellent) +- ✅ AVAssetReader now cleanly falls back to FFmpeg without panicking +- 🟡 System audio: 85-116ms (borderline, known issue) + +**Stopping point**: All playback metrics healthy. AVAssetReader panic fixed. No further action needed. + +--- + ## References - `PLAYBACK-BENCHMARKS.md` - Raw performance test data (auto-updated by test runner) diff --git a/crates/recording/FINDINGS.md b/crates/recording/FINDINGS.md index bcfd5586a76..0b3d99f8678 100644 --- a/crates/recording/FINDINGS.md +++ b/crates/recording/FINDINGS.md @@ -415,6 +415,32 @@ System Audio ────┘ ├─► MP4 (macos.rs) ─ --- +### Session 2026-02-15 (Performance Check + AVAssetReader Fix) + +**Goal**: Run recording and playback benchmarks, fix any issues + +**What was done**: +1. Ran MP4 baseline benchmarks (cold + warm runs) +2. Ran fragmented baseline benchmark +3. Ran playback benchmark on resulting recordings +4. Fixed AVAssetReader panic on directory paths (fragmented recordings) + +**Changes Made**: +- `crates/video-decode/src/avassetreader.rs`: Replaced two `unwrap()` calls with proper error propagation via `?` and `map_err`. Previously panicked when given a directory path (fragmented recordings); now returns clean error that triggers graceful FFmpeg fallback. + +**Results**: +- ✅ MP4: 29.2fps, 10.4-10.7ms jitter, 2.7% dropped, 0ms A/V sync, 81-94ms mic timing +- ✅ Fragmented: 29.5-29.6fps, 4.6-5.9ms jitter, 1.3% dropped, 0ms A/V sync, 1-4ms mic timing +- ✅ Playback MP4: 637fps effective, 1.6ms avg, 5.0ms p95, 0ms camera drift +- ✅ Playback Fragmented: 153fps effective, 6.5ms avg, 12.4ms p95, 0ms camera drift +- ✅ AVAssetReader no longer panics on directory paths +- 🟡 System audio: 120-246ms (known lower-priority issue) +- 🟡 MP4 dropped frames at 2.7% (single 160ms spike from encoder warmup, not actionable) + +**Stopping point**: All major metrics pass. AVAssetReader panic fixed. System audio timing remains as documented known issue. + +--- + ## References - `BENCHMARKS.md` - Raw performance test data (auto-updated by test runner) diff --git a/crates/video-decode/src/avassetreader.rs b/crates/video-decode/src/avassetreader.rs index 1a021151ed4..a8db3b606f4 100644 --- a/crates/video-decode/src/avassetreader.rs +++ b/crates/video-decode/src/avassetreader.rs @@ -237,13 +237,13 @@ impl AVAssetReaderDecoder { keyframe_index: Option>, ) -> Result { let (pixel_format, width, height) = { - let input = ffmpeg::format::input(&path).unwrap(); + let input = ffmpeg::format::input(&path) + .map_err(|e| format!("Failed to open video input '{}': {e}", path.display()))?; let input_stream = input .streams() .best(ffmpeg::media::Type::Video) - .ok_or("Could not find a video stream") - .unwrap(); + .ok_or_else(|| format!("No video stream in '{}'", path.display()))?; let decoder = avcodec::Context::from_parameters(input_stream.parameters()) .map_err(|e| format!("decoder context / {e}"))? @@ -338,7 +338,11 @@ impl AVAssetReaderDecoder { height: u32, ) -> Result<(R, R), String> { let asset = av::UrlAsset::with_url( - &ns::Url::with_fs_path_str(path.to_str().unwrap(), false), + &ns::Url::with_fs_path_str( + path.to_str() + .ok_or_else(|| format!("Invalid UTF-8 in path: {path:?}"))?, + false, + ), None, ) .ok_or_else(|| format!("UrlAsset::with_url{{{path:?}}}"))?; From 4186535f6e7dcc3c80bb8d000bbd26f71ed00765 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Sun, 15 Feb 2026 23:11:44 +0000 Subject: [PATCH 27/27] Sync system audio start_time; minor rust refactors --- apps/desktop/src-tauri/src/windows.rs | 27 +++++++-------- crates/editor/PLAYBACK-FINDINGS.md | 36 ++++++++++++++++++++ crates/recording/FINDINGS.md | 43 ++++++++++++++++++++++++ crates/recording/src/studio_recording.rs | 26 +++++++++++--- 4 files changed, 114 insertions(+), 18 deletions(-) diff --git a/apps/desktop/src-tauri/src/windows.rs b/apps/desktop/src-tauri/src/windows.rs index b6ec15f2b16..06558553f46 100644 --- a/apps/desktop/src-tauri/src/windows.rs +++ b/apps/desktop/src-tauri/src/windows.rs @@ -75,10 +75,9 @@ fn is_system_dark_mode() -> bool { let hkcu = RegKey::predef(HKEY_CURRENT_USER); if let Ok(key) = hkcu.open_subkey("Software\\Microsoft\\Windows\\CurrentVersion\\Themes\\Personalize") + && let Ok(value) = key.get_value::("AppsUseLightTheme") { - if let Ok(value) = key.get_value::("AppsUseLightTheme") { - return value == 0; - } + return value == 0; } false } @@ -858,17 +857,17 @@ impl ShowCapWindow { } #[cfg(not(target_os = "macos"))] - if let Self::InProgressRecording { .. } = self { - if let Some(window) = self.id(app).get(app) { - let width = 320.0; - let height = 150.0; - let recording_monitor = CursorMonitorInfo::get(); - let (pos_x, pos_y) = recording_monitor.bottom_center_position(width, height, 120.0); - let _ = window.set_position(tauri::LogicalPosition::new(pos_x, pos_y)); - window.show().ok(); - window.set_focus().ok(); - return Ok(window); - } + if let Self::InProgressRecording { .. } = self + && let Some(window) = self.id(app).get(app) + { + let width = 320.0; + let height = 150.0; + let recording_monitor = CursorMonitorInfo::get(); + let (pos_x, pos_y) = recording_monitor.bottom_center_position(width, height, 120.0); + let _ = window.set_position(tauri::LogicalPosition::new(pos_x, pos_y)); + window.show().ok(); + window.set_focus().ok(); + return Ok(window); } if !matches!(self, Self::Camera { .. } | Self::InProgressRecording { .. }) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 32d23fca020..8cda6a2297b 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -355,6 +355,42 @@ The CPU RGBA→NV12 conversion was taking 15-25ms per frame for 3024x1964 resolu --- +### Session 2026-02-15 (Playback Validation + System Audio Sync) + +**Goal**: Comprehensive playback benchmark validation, system audio start_time sync fix + +**What was done**: +1. Ran playback validation on fragmented and MP4 recordings +2. Verified AVAssetReader graceful fallback on directory paths (no panics) +3. Audited all decoder `unwrap()` calls for safety +4. Added system audio to recording start_time sync chain (studio_recording.rs) + +**Changes Made**: +- `crates/recording/src/studio_recording.rs`: System audio start_time now syncs to mic (or display) when drift >30ms, matching the existing camera/display sync pattern. Improves playback alignment. + +**Results (MP4 Mode)**: +- ✅ Decoder: AVAssetReader (hardware), display init=162-174ms, camera init=21-32ms +- ✅ Playback: 283-641 fps effective (target ≥60fps) +- ✅ Latency: avg=1.6-3.5ms, p95=2.8-5.0ms (target p95 <50ms) +- ✅ Camera sync: 0ms drift (target <100ms) +- ✅ Mic sync: 93ms (target <100ms) +- 🟡 System audio: 178-195ms (inherent macOS capture latency, sync fix improves alignment) + +**Results (Fragmented Mode)**: +- ✅ Decoder: FFmpeg (hardware) with VideoToolbox, display init=100ms, camera init=7ms +- ✅ Playback: 156 fps effective (target ≥60fps) +- ✅ Latency: avg=6.4ms, p95=9.5ms (target p95 <50ms) +- ✅ Camera sync: 0ms drift (target <100ms) +- ✅ Mic sync: 8.5ms (target <100ms) +- ✅ System audio: 98ms (target <100ms) +- ✅ AVAssetReader cleanly falls back to FFmpeg with descriptive error message + +**Decoder audit**: All `unwrap()` in `avassetreader.rs` eliminated. Remaining `unwrap()` calls in ffmpeg.rs and avassetreader decoder loop are on guaranteed-non-empty BTreeMap caches (safe by construction). + +**Stopping point**: All playback metrics healthy. System audio sync metadata fix applied. + +--- + ## References - `PLAYBACK-BENCHMARKS.md` - Raw performance test data (auto-updated by test runner) diff --git a/crates/recording/FINDINGS.md b/crates/recording/FINDINGS.md index 0b3d99f8678..b2d1e137554 100644 --- a/crates/recording/FINDINGS.md +++ b/crates/recording/FINDINGS.md @@ -441,6 +441,49 @@ System Audio ────┘ ├─► MP4 (macos.rs) ─ --- +### Session 2026-02-15 (Fix Attempts + System Audio Sync) + +**Goal**: Fix known issues: MP4 encoder warmup dropped frames and system audio timing offset + +**What was done**: +1. Ran comprehensive benchmarks (MP4 cold, warm, thermal stress; fragmented) +2. Attempted encoder warmup patience fix (increasing retry budget from 50ms to 200ms during first 3 frames) +3. Reverted encoder warmup fix after it degraded performance (longer blocking caused pipeline backpressure) +4. Implemented system audio start_time sync to match mic/display sync chain +5. Verified all metrics stable after changes + +**Changes Made**: +- `crates/recording/src/studio_recording.rs`: Added system audio to the start_time sync chain. System audio now syncs to mic start time (preferred) or display start time when drift >30ms, matching the existing sync pattern for camera and display. Improves playback alignment of system audio. + +**Encoder Warmup Investigation**: +- Root cause: VideoToolbox hardware encoder first-frame latency (~160ms) causes `NotReadyForMore` for frames 2-5 +- Current retry budget: 100 × 500μs = 50ms. Frames during warmup are dropped after 50ms retry +- Attempted fix: 400 × 500μs = 200ms patience for first 3 frames +- Result: WORSE (71 frames instead of 149). Longer blocking prevented the encoder thread from draining the channel, causing capture-side drops from channel full +- Conclusion: 50ms retry timeout is the correct safety valve. The ~3% dropped frames during warmup is the optimal tradeoff. Pre-warming the hardware encoder would require architectural changes (dummy frame encoding before recording starts) + +**Results (MP4 - warm run, post system audio sync fix)**: +- ✅ Frame rate: 29.0-29.2fps (target 30±2fps) +- ✅ Jitter: 10.3-12.4ms (target <15ms) +- ✅ A/V sync: 0ms across all streams (target <50ms) +- ✅ Mic timing: 90-94ms (target <100ms) +- 🟡 Dropped frames: 2.7-3.3% (encoder warmup, not actionable without architectural changes) +- 🟡 System audio duration: 215-259ms shorter than video (inherent macOS capture latency, cannot be fixed with metadata sync) + +**Results (Fragmented)**: +- ✅ Frame rate: 29.5fps, jitter: 5.7ms, dropped: 1.3% +- ✅ Mic timing: 13.5ms +- 🟡 System audio duration: 111.5ms shorter + +**Key findings**: +- MP4 encoder warmup spike is NOT fixable by increasing retry patience (makes it worse) +- System audio file duration is inherently shorter due to macOS ScreenCaptureKit capture latency +- System audio start_time metadata sync improves playback alignment but not duration measurement + +**Stopping point**: System audio sync metadata fix applied. Encoder warmup spike documented as architectural limitation. + +--- + ## References - `BENCHMARKS.md` - Raw performance test data (auto-updated by test runner) diff --git a/crates/recording/src/studio_recording.rs b/crates/recording/src/studio_recording.rs index 6a84faa3acc..4d575f67822 100644 --- a/crates/recording/src/studio_recording.rs +++ b/crates/recording/src/studio_recording.rs @@ -762,10 +762,28 @@ async fn stop_recording( start_time: mic_start_time, device_id: s.mic_device_id.clone(), }), - system_audio: s.pipeline.system_audio.map(|audio| AudioMeta { - path: make_relative(&audio.path), - start_time: Some(to_start_time(audio.first_timestamp)), - device_id: None, + system_audio: s.pipeline.system_audio.map(|audio| { + let raw_sys_start = to_start_time(audio.first_timestamp); + let sys_start_time = if let Some(mic_start) = mic_start_time { + let sync_offset = raw_sys_start - mic_start; + if sync_offset.abs() > 0.030 { + mic_start + } else { + raw_sys_start + } + } else { + let sync_offset = raw_sys_start - display_start_time; + if sync_offset.abs() > 0.030 { + display_start_time + } else { + raw_sys_start + } + }; + AudioMeta { + path: make_relative(&audio.path), + start_time: Some(sys_start_time), + device_id: None, + } }), cursor: s .pipeline