diff --git a/CHANGELOG.md b/CHANGELOG.md index a24f38f41..cb078aafa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,8 @@ - CoreAudio: Add `i8`, `i32` and `I24` sample format support (24-bit samples stored in 4 bytes). - CoreAudio: Add support for loopback recording (recording system audio output) on macOS. - CoreAudio: Update `mach2` to 0.5. +- CoreAudio: Configure device buffer to ensure predictable callback buffer sizes. +- CoreAudio: Fix timestamp accuracy. - iOS: Fix example by properly activating audio session. - WASAPI: Expose `IMMDevice` from WASAPI host Device. - WASAPI: Add `I24` and `U24` sample format support (24-bit samples stored in 4 bytes). diff --git a/src/host/coreaudio/ios/mod.rs b/src/host/coreaudio/ios/mod.rs index 753bbd099..f3ad2834c 100644 --- a/src/host/coreaudio/ios/mod.rs +++ b/src/host/coreaudio/ios/mod.rs @@ -1,17 +1,31 @@ //! -//! coreaudio on iOS looks a bit different from macOS. A lot of configuration needs to use -//! the AVAudioSession objc API which doesn't exist on macOS. +//! CoreAudio implementation for iOS using RemoteIO Audio Units. //! -//! TODO: -//! - Use AVAudioSession to enumerate buffer size / sample rate / number of channels and set -//! buffer size. +//! ## Implementation Details //! +//! This implementation uses **RemoteIO Audio Units** to interface with iOS audio hardware: +//! +//! - **RemoteIO**: A special Audio Unit that acts as a proxy to the actual hardware +//! - **Direct queries**: Buffer sizes are queried directly from the RemoteIO unit +//! - **System control**: iOS controls buffer sizes, sample rates, and device routing +//! - **Single device model**: iOS presents audio as a single system-managed device +//! +//! ## Limitations +//! +//! - **No device enumeration**: iOS doesn't allow direct hardware device access +//! - **No fixed buffer sizes**: `BufferSize::Fixed` returns `StreamConfigNotSupported` +//! - **System-determined parameters**: Buffer sizes and sample rates are set by iOS + +// TODO: +// - Use AVAudioSession to enumerate buffer size / sample rate / number of channels and set +// buffer size. use std::cell::RefCell; use coreaudio::audio_unit::render_callback::data; use coreaudio::audio_unit::{render_callback, AudioUnit, Element, Scope}; use objc2_audio_toolbox::{kAudioOutputUnitProperty_EnableIO, kAudioUnitProperty_StreamFormat}; +use objc2_core_audio::kAudioDevicePropertyBufferFrameSize; use objc2_core_audio_types::{AudioBuffer, AudioStreamBasicDescription}; use super::{asbd_from_config, frames_to_duration, host_time_to_stream_instant}; @@ -197,6 +211,10 @@ impl DeviceTrait for Device { BufferSize::Default => (), } + // Query the actual device buffer size for more accurate latency calculation. On iOS, + // BufferSize::Fixed is not supported, so this always gets the current device buffer size. + let device_buffer_frames = get_device_buffer_frame_size(&audio_unit).ok(); + // Register the callback that is being called by coreaudio whenever it needs data to be // fed to the audio buffer. let bytes_per_channel = sample_format.sample_size(); @@ -218,7 +236,6 @@ impl DeviceTrait for Device { let len = (data_byte_size as usize / bytes_per_channel) as usize; let data = Data::from_parts(data, len, sample_format); - // TODO: Need a better way to get delay, for now we assume a double-buffer offset. let callback = match host_time_to_stream_instant(args.time_stamp.mHostTime) { Err(err) => { error_callback(err.into()); @@ -227,7 +244,12 @@ impl DeviceTrait for Device { Ok(cb) => cb, }; let buffer_frames = len / channels as usize; - let delay = frames_to_duration(buffer_frames, sample_rate); + // Use device buffer size for latency calculation if available + let latency_frames = device_buffer_frames.unwrap_or( + // Fallback to callback buffer size if device buffer size is unknown + buffer_frames, + ); + let delay = frames_to_duration(latency_frames, sample_rate); let capture = callback .sub(delay) .expect("`capture` occurs before origin of alsa `StreamInstant`"); @@ -276,6 +298,10 @@ impl DeviceTrait for Device { let asbd = asbd_from_config(config, sample_format); audio_unit.set_property(kAudioUnitProperty_StreamFormat, scope, element, Some(&asbd))?; + // Query the actual device buffer size for more accurate latency calculation. On iOS, + // BufferSize::Fixed is not supported, so this always gets the current device buffer size. + let device_buffer_frames = get_device_buffer_frame_size(&audio_unit).ok(); + // Register the callback that is being called by coreaudio whenever it needs data to be // fed to the audio buffer. let bytes_per_channel = sample_format.sample_size(); @@ -302,9 +328,13 @@ impl DeviceTrait for Device { } Ok(cb) => cb, }; - // TODO: Need a better way to get delay, for now we assume a double-buffer offset. let buffer_frames = len / channels as usize; - let delay = frames_to_duration(buffer_frames, sample_rate); + // Use device buffer size for latency calculation if available + let latency_frames = device_buffer_frames.unwrap_or( + // Fallback to callback buffer size if device buffer size is unknown + buffer_frames, + ); + let delay = frames_to_duration(latency_frames, sample_rate); let playback = callback .add(delay) .expect("`playback` occurs beyond representation supported by `StreamInstant`"); @@ -427,3 +457,18 @@ fn stream_config_from_asbd(asbd: AudioStreamBasicDescription) -> SupportedStream sample_format: SUPPORTED_SAMPLE_FORMAT, } } + +/// Query the current device buffer frame size from CoreAudio. +/// +/// On iOS, this queries the RemoteIO audio unit which acts as a proxy to the hardware. +/// RemoteIO uses Global scope because it represents the system-wide audio session, +/// not a specific hardware device like on macOS. +fn get_device_buffer_frame_size(audio_unit: &AudioUnit) -> Result { + // For iOS RemoteIO, we query the global scope since RemoteIO represents + // the system audio session rather than direct hardware access + audio_unit.get_property::( + kAudioDevicePropertyBufferFrameSize, + Scope::Global, + Element::Output, + ) +} diff --git a/src/host/coreaudio/macos/device.rs b/src/host/coreaudio/macos/device.rs index 177f82914..bdcecd270 100644 --- a/src/host/coreaudio/macos/device.rs +++ b/src/host/coreaudio/macos/device.rs @@ -694,40 +694,34 @@ impl Device { audio_unit_from_device(&loopback_aggregate.as_ref().unwrap().aggregate_device, true)? }; - // Set the stream in interleaved mode. - let asbd = asbd_from_config(config, sample_format); - audio_unit.set_property(kAudioUnitProperty_StreamFormat, scope, element, Some(&asbd))?; - - // Set the buffersize - match config.buffer_size { - BufferSize::Fixed(v) => { - let buffer_size_range = get_io_buffer_frame_size_range(&audio_unit)?; - match buffer_size_range { - SupportedBufferSize::Range { min, max } => { - if v >= min && v <= max { - audio_unit.set_property( - kAudioDevicePropertyBufferFrameSize, - scope, - element, - Some(&v), - )? - } else { - return Err(BuildStreamError::StreamConfigNotSupported); - } - } - SupportedBufferSize::Unknown => (), - } - } - BufferSize::Default => (), - } + // Configure device buffer to ensure predictable callback behavior and accurate latency. + // + // CoreAudio double-buffering model: + // - CPAL buffer size (from user) = total buffer size that CPAL manages + // - Device buffer size = actual hardware buffer size (CPAL buffer size / 2) + // - Callback buffer size = size of each callback invocation (≈ device buffer size) + // + // CoreAudio automatically delivers callbacks with buffer_size ≈ device_buffer_size. + // To ensure applications receive callbacks of the size they requested, + // we configure device_buffer_size = requested_buffer_size / 2. + // + // This provides: + // - Predictable callback buffer sizes matching application requests + // - Efficient double-buffering (device buffer + callback buffer) + // - Low latency determined by the device buffer size + // + // For latency calculation, we need the device buffer size, not the callback buffer size, + // because latency represents the delay from when audio is written to when it's heard. + configure_stream_format_and_buffer(&mut audio_unit, config, sample_format, scope, element)?; let error_callback = Arc::new(Mutex::new(error_callback)); let error_callback_disconnect = error_callback.clone(); // Register the callback that is being called by coreaudio whenever it needs data to be // fed to the audio buffer. - let bytes_per_channel = sample_format.sample_size(); - let sample_rate = config.sample_rate; + let (bytes_per_channel, sample_rate, device_buffer_frames) = + setup_callback_vars(&audio_unit, config, sample_format, scope, element); + type Args = render_callback::Args; audio_unit.set_input_callback(move |args: Args| unsafe { let ptr = (*args.data.data).mBuffers.as_ptr(); @@ -745,7 +739,6 @@ impl Device { let len = data_byte_size as usize / bytes_per_channel; let data = Data::from_parts(data, len, sample_format); - // TODO: Need a better way to get delay, for now we assume a double-buffer offset. let callback = match host_time_to_stream_instant(args.time_stamp.mHostTime) { Err(err) => { (error_callback.lock().unwrap())(err.into()); @@ -754,7 +747,13 @@ impl Device { Ok(cb) => cb, }; let buffer_frames = len / channels as usize; - let delay = frames_to_duration(buffer_frames, sample_rate); + // Use device buffer size for latency calculation if available + let latency_frames = device_buffer_frames.unwrap_or( + // Fallback to callback buffer size if device buffer size is unknown + // (may overestimate latency for BufferSize::Default) + buffer_frames, + ); + let delay = frames_to_duration(latency_frames, sample_rate); let capture = callback .sub(delay) .expect("`capture` occurs before origin of alsa `StreamInstant`"); @@ -802,40 +801,17 @@ impl Device { let scope = Scope::Input; let element = Element::Output; - // Set the stream in interleaved mode. - let asbd = asbd_from_config(config, sample_format); - audio_unit.set_property(kAudioUnitProperty_StreamFormat, scope, element, Some(&asbd))?; - - // Set the buffersize - match config.buffer_size { - BufferSize::Fixed(v) => { - let buffer_size_range = get_io_buffer_frame_size_range(&audio_unit)?; - match buffer_size_range { - SupportedBufferSize::Range { min, max } => { - if v >= min && v <= max { - audio_unit.set_property( - kAudioDevicePropertyBufferFrameSize, - scope, - element, - Some(&v), - )? - } else { - return Err(BuildStreamError::StreamConfigNotSupported); - } - } - SupportedBufferSize::Unknown => (), - } - } - BufferSize::Default => (), - } + // Configure device buffer (see comprehensive documentation in input stream above) + configure_stream_format_and_buffer(&mut audio_unit, config, sample_format, scope, element)?; let error_callback = Arc::new(Mutex::new(error_callback)); let error_callback_disconnect = error_callback.clone(); // Register the callback that is being called by coreaudio whenever it needs data to be // fed to the audio buffer. - let bytes_per_channel = sample_format.sample_size(); - let sample_rate = config.sample_rate; + let (bytes_per_channel, sample_rate, device_buffer_frames) = + setup_callback_vars(&audio_unit, config, sample_format, scope, element); + type Args = render_callback::Args; audio_unit.set_render_callback(move |args: Args| unsafe { // If `run()` is currently running, then a callback will be available from this list. @@ -858,9 +834,14 @@ impl Device { } Ok(cb) => cb, }; - // TODO: Need a better way to get delay, for now we assume a double-buffer offset. let buffer_frames = len / channels as usize; - let delay = frames_to_duration(buffer_frames, sample_rate); + // Use device buffer size for latency calculation if available + let latency_frames = device_buffer_frames.unwrap_or( + // Fallback to callback buffer size if device buffer size is unknown + // (may overestimate latency for BufferSize::Default) + buffer_frames, + ); + let delay = frames_to_duration(latency_frames, sample_rate); let playback = callback .add(delay) .expect("`playback` occurs beyond representation supported by `StreamInstant`"); @@ -890,3 +871,77 @@ impl Device { Ok(stream) } } + +/// Configure stream format and buffer size for CoreAudio stream. +/// +/// This handles the common setup tasks for both input and output streams: +/// - Sets the stream format (ASBD) +/// - Configures buffer size for Fixed buffer size requests +/// - Validates buffer size ranges +fn configure_stream_format_and_buffer( + audio_unit: &mut AudioUnit, + config: &StreamConfig, + sample_format: SampleFormat, + scope: Scope, + element: Element, +) -> Result<(), BuildStreamError> { + // Set the stream in interleaved mode + let asbd = asbd_from_config(config, sample_format); + audio_unit.set_property(kAudioUnitProperty_StreamFormat, scope, element, Some(&asbd))?; + + // Configure device buffer size if requested + match config.buffer_size { + BufferSize::Fixed(cpal_buffer_size) => { + let buffer_size_range = get_io_buffer_frame_size_range(audio_unit)?; + let device_buffer_size = cpal_buffer_size / 2; + + if let SupportedBufferSize::Range { min, max } = buffer_size_range { + if !(min..=max).contains(&device_buffer_size) { + // The calculated device buffer size doesn't fit in the supported range. + // This means the requested cpal_buffer_size is too small or too large for this + // device. + return Err(BuildStreamError::StreamConfigNotSupported); + } + } + audio_unit.set_property( + kAudioDevicePropertyBufferFrameSize, + scope, + element, + Some(&device_buffer_size), + )?; + } + BufferSize::Default => (), + } + + Ok(()) +} + +/// Setup common callback variables and query device buffer size. +/// +/// Returns (bytes_per_channel, sample_rate, device_buffer_frames) +fn setup_callback_vars( + audio_unit: &AudioUnit, + config: &StreamConfig, + sample_format: SampleFormat, + scope: Scope, + element: Element, +) -> (usize, crate::SampleRate, Option) { + let bytes_per_channel = sample_format.sample_size(); + let sample_rate = config.sample_rate; + + // Query the actual device buffer size for latency calculation. + // For Fixed: verifies CoreAudio actually set what we requested + // For Default: gets the device's current buffer size + let device_buffer_frames = get_device_buffer_frame_size(audio_unit, scope, element).ok(); + + (bytes_per_channel, sample_rate, device_buffer_frames) +} + +/// Query the current device buffer frame size from CoreAudio. +fn get_device_buffer_frame_size( + audio_unit: &AudioUnit, + scope: Scope, + element: Element, +) -> Result { + audio_unit.get_property::(kAudioDevicePropertyBufferFrameSize, scope, element) +} diff --git a/src/host/coreaudio/macos/mod.rs b/src/host/coreaudio/macos/mod.rs index e29d05768..f7af72819 100644 --- a/src/host/coreaudio/macos/mod.rs +++ b/src/host/coreaudio/macos/mod.rs @@ -223,4 +223,107 @@ mod test { *sample = Sample::EQUILIBRIUM; } } + + #[test] + #[cfg(target_os = "macos")] + fn test_buffer_size_equivalence() { + use crate::{BufferSize, SampleRate, StreamConfig}; + use std::sync::{Arc, Mutex}; + use std::time::Duration; + + let host = default_host(); + let device = host.default_output_device().unwrap(); + + // First, test with BufferSize::Default to see what we get + let default_config = StreamConfig { + channels: 2, + sample_rate: SampleRate(48000), + buffer_size: BufferSize::Default, + }; + + // Capture actual buffer sizes from callbacks + let default_buffer_sizes = Arc::new(Mutex::new(Vec::new())); + let default_buffer_sizes_clone = default_buffer_sizes.clone(); + + let default_stream = device + .build_output_stream( + &default_config, + move |data: &mut [f32], info: &crate::OutputCallbackInfo| { + let mut sizes = default_buffer_sizes_clone.lock().unwrap(); + if sizes.len() < 10 { + // Collect first 10 callback buffer sizes + sizes.push(data.len()); + } + write_silence(data, info); + }, + move |err| println!("Error: {err}"), + None, + ) + .unwrap(); + + default_stream.play().unwrap(); + std::thread::sleep(Duration::from_millis(200)); + default_stream.pause().unwrap(); + + let default_sizes = default_buffer_sizes.lock().unwrap().clone(); + assert!( + !default_sizes.is_empty(), + "Should have captured some buffer sizes" + ); + + // Get the typical buffer size (most streams should be consistent) + let typical_buffer_size = default_sizes[0]; + + // Now test with BufferSize::Fixed using double the callback buffer size + // Based on our theory: cpal_buffer_size = 2 * device_buffer_size ≈ 2 * callback_buffer_size + let fixed_cpal_buffer_size = typical_buffer_size * 2; + let fixed_config = StreamConfig { + channels: 2, + sample_rate: SampleRate(48000), + buffer_size: BufferSize::Fixed(fixed_cpal_buffer_size as u32), + }; + + let fixed_buffer_sizes = Arc::new(Mutex::new(Vec::new())); + let fixed_buffer_sizes_clone = fixed_buffer_sizes.clone(); + + let fixed_stream = device + .build_output_stream( + &fixed_config, + move |data: &mut [f32], info: &crate::OutputCallbackInfo| { + let mut sizes = fixed_buffer_sizes_clone.lock().unwrap(); + if sizes.len() < 10 { + sizes.push(data.len()); + } + write_silence(data, info); + }, + move |err| println!("Error: {err}"), + None, + ) + .unwrap(); + + fixed_stream.play().unwrap(); + std::thread::sleep(Duration::from_millis(200)); + fixed_stream.pause().unwrap(); + + let fixed_sizes = fixed_buffer_sizes.lock().unwrap().clone(); + assert!( + !fixed_sizes.is_empty(), + "Should have captured some buffer sizes" + ); + + let fixed_typical_size = fixed_sizes[0]; + + // The key test: verify that the callback buffer sizes are approximately equal + // This validates our fallback assumption: callback_buffer_size ≈ device_buffer_size + let size_difference = (typical_buffer_size as i32 - fixed_typical_size as i32).abs(); + let tolerance = typical_buffer_size / 10; // 10% tolerance + + assert!( + size_difference <= tolerance as i32, + "Buffer sizes should be approximately equal: Default={}, Fixed={}, Difference={}", + typical_buffer_size, + fixed_typical_size, + size_difference + ); + } }