diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index e474de2d33..cc7833eb11 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -35,7 +35,7 @@ ## Current Status -**Last Updated**: 2026-03-25 +**Last Updated**: 2026-05-07 ### Performance Summary @@ -78,6 +78,7 @@ - [x] **Run initial baseline** - Established current playback performance metrics (2026-01-28) - [x] **Profile decoder init time** - Hardware acceleration confirmed (AVAssetReader) (2026-01-28) - [x] **Identify latency hotspots** - No issues found, p95=3.1ms (2026-01-28) +- [x] **Optimize random-access scrubbing** - Reduced AVAssetReader scrub decode p95 on cap-performance-fixtures from 231.5ms to 47.6ms (2026-05-07) --- @@ -421,6 +422,49 @@ The CPU RGBA→NV12 conversion was taking 15-25ms per frame for 3024x1964 resolu --- +### Session 2026-05-07 (Reference Fixture Scrub Optimization) + +**Goal**: Benchmark current editor playback performance on a repeatable real `.cap` fixture and improve playback responsiveness without compromising FPS, visual quality, or audio sync. + +**What was done**: +1. Cloned `https://github.com/CapSoftware/cap-performance-fixtures` to `/tmp/cap-performance-fixtures` +2. Used `/tmp/cap-performance-fixtures/reference-recording.cap`, a two-segment Studio recording with 3024x1964 display video, 1920x1080 camera video, mic audio, system audio, cursor data, and zoom configuration +3. Ran playback validation at 60fps +4. Ran `cap-editor` decode/render/scrub pipeline benchmarks at 60fps for 300 frames +5. Tuned the macOS AVAssetReader multi-position decoder pool so scrub requests use a stricter decoder reuse window than linear playback + +**Changes Made**: +- `crates/rendering/src/decoder/multi_position.rs`: Added a custom reuse-threshold entry point for decoder selection while preserving the default playback threshold. +- `crates/rendering/src/decoder/avassetreader.rs`: During detected scrubbing, reuse an existing decoder only when it is within 0.5s behind the requested frame; otherwise reset the nearest decoder to the target keyframe. + +**Baseline Results**: +- Playback validation: PASS, AVAssetReader hardware decode, camera-display drift 0ms, mic diff 35.3ms/13.8ms, system audio diff 92.7ms/92.8ms. +- Decode-only: 730.0 fps effective, avg 1.37ms, p95 2.94ms, p99 9.34ms. +- Full pipeline 1920x1080: 140.8 fps effective, total avg 7.10ms, p95 8.73ms, p99 9.64ms. +- Scrubbing half resolution: 6.8 fps effective, decode avg 138.98ms, p95 231.49ms, p99 263.25ms, total p95 242.61ms. + +**Final Results**: +- Playback validation: PASS, AVAssetReader hardware decode, camera-display drift 0ms, mic diff 35.3ms/13.8ms, system audio diff 92.7ms/92.8ms. +- Decode-only: 722.2 fps effective, avg 1.38ms, p95 3.03ms, p99 8.31ms. +- Full pipeline 1920x1080: 147.6 fps effective, total avg 6.78ms, p95 8.37ms, p99 9.31ms. +- Scrubbing half resolution: 19.9 fps effective, decode avg 41.86ms, p95 47.57ms, p99 47.95ms, total p95 55.48ms. + +**Impact**: +- Scrub decode average improved 138.98ms → 41.86ms (-69.9%). +- Scrub decode p95 improved 231.49ms → 47.57ms (-79.4%). +- Scrub throughput improved 6.8fps → 19.9fps (2.9x). +- Linear playback, camera sync, mic sync, and system-audio sync remained healthy. + +**Validation**: +- `cargo fmt --all` +- `cargo run -p cap-recording --example playback-test-runner -- --recording-path /tmp/cap-performance-fixtures/reference-recording.cap --fps 60 full` +- `cargo run -p cap-editor --example playback-pipeline-benchmark -- --recording-path /tmp/cap-performance-fixtures/reference-recording.cap --fps 60 --frames 300` +- `cargo clippy -p cap-rendering --all-targets -- -D warnings` + +**Stopping point**: Scrubbing is substantially faster and playback validation still passes. Remaining architectural opportunities are renderer readback/transport overhead and longer-duration testing on lower-powered MacBook Air hardware. + +--- + ## References - `PLAYBACK-BENCHMARKS.md` - Raw performance test data (auto-updated by test runner) diff --git a/crates/rendering/src/decoder/avassetreader.rs b/crates/rendering/src/decoder/avassetreader.rs index 3f3decdbfa..96905f85f3 100644 --- a/crates/rendering/src/decoder/avassetreader.rs +++ b/crates/rendering/src/decoder/avassetreader.rs @@ -20,6 +20,7 @@ use super::multi_position::{DecoderPoolManager, MultiPositionDecoderConfig, Scru use super::{DecoderInitResult, DecoderType, FRAME_CACHE_SIZE, VideoDecoderMessage, pts_to_frame}; const MAX_RELAXED_FALLBACK_DISTANCE: u32 = 8; +const SCRUB_REUSE_THRESHOLD_SECS: f32 = 0.5; #[derive(Clone)] struct FrameData { @@ -498,11 +499,19 @@ impl AVAssetReaderDecoder { }) } - fn select_best_decoder(&mut self, requested_time: f32) -> (usize, bool) { + fn select_best_decoder(&mut self, requested_time: f32, is_scrubbing: bool) -> (usize, bool) { let decoder_count = self.decoders.len(); - let (best_id, _distance, needs_reset) = self - .pool_manager - .find_best_decoder_for_time(requested_time, decoder_count); + let (best_id, _distance, needs_reset) = if is_scrubbing { + self.pool_manager + .find_best_decoder_for_time_with_reuse_threshold( + requested_time, + decoder_count, + SCRUB_REUSE_THRESHOLD_SECS, + ) + } else { + self.pool_manager + .find_best_decoder_for_time(requested_time, decoder_count) + }; let decoder_idx = best_id.min(decoder_count.saturating_sub(1)); @@ -639,7 +648,7 @@ impl AVAssetReaderDecoder { let requested_frame = min_requested_frame; let requested_time = requested_frame as f32 / fps as f32; - let (decoder_idx, was_reset) = this.select_best_decoder(requested_time); + let (decoder_idx, was_reset) = this.select_best_decoder(requested_time, is_scrubbing); let cache_min = if was_reset { min_requested_frame.saturating_sub(FRAME_CACHE_SIZE as u32 * 2) diff --git a/crates/rendering/src/decoder/multi_position.rs b/crates/rendering/src/decoder/multi_position.rs index 98e7cd97eb..e8c904a9b4 100644 --- a/crates/rendering/src/decoder/multi_position.rs +++ b/crates/rendering/src/decoder/multi_position.rs @@ -135,6 +135,19 @@ impl DecoderPoolManager { &mut self, requested_time: f32, decoder_count: usize, + ) -> (usize, f32, bool) { + self.find_best_decoder_for_time_with_reuse_threshold( + requested_time, + decoder_count, + self.reposition_threshold, + ) + } + + pub fn find_best_decoder_for_time_with_reuse_threshold( + &mut self, + requested_time: f32, + decoder_count: usize, + reuse_threshold: f32, ) -> (usize, f32, bool) { self.total_accesses += 1; @@ -150,6 +163,7 @@ impl DecoderPoolManager { let mut best_decoder_id = 0; let mut best_distance = f32::MAX; let mut needs_reset = true; + let reuse_threshold = reuse_threshold.clamp(0.0, self.reposition_threshold); if decoder_count == 0 { return (0, f32::MAX, true); @@ -158,7 +172,7 @@ impl DecoderPoolManager { for position in self.positions.iter().filter(|p| p.id < decoder_count) { let distance = (position.position_secs - requested_time).abs(); let is_usable = position.position_secs <= requested_time - && (requested_time - position.position_secs) < self.reposition_threshold; + && (requested_time - position.position_secs) < reuse_threshold; if is_usable && distance < best_distance { best_distance = distance; @@ -320,6 +334,7 @@ impl Default for ScrubDetector { #[cfg(test)] mod tests { use super::*; + use std::path::PathBuf; #[test] fn test_calculate_optimal_pool_size_short_video() { @@ -384,4 +399,23 @@ mod tests { ); assert_eq!(calculate_reposition_threshold(duration_55_min), 10.0); } + + #[test] + fn test_custom_reuse_threshold_forces_reset_when_decoder_is_too_far_behind() { + let runtime = tokio::runtime::Runtime::new().unwrap(); + let config = MultiPositionDecoderConfig { + path: PathBuf::from("fixture.mp4"), + tokio_handle: runtime.handle().clone(), + keyframe_index: None, + fps: 60, + duration_secs: 20.0, + }; + let mut manager = DecoderPoolManager::new(config); + + let (decoder_id, _, needs_reset) = + manager.find_best_decoder_for_time_with_reuse_threshold(6.0, 5, 0.5); + + assert_eq!(decoder_id, 1); + assert!(needs_reset); + } }