From 211000084e0179d7e087c31a7214d72ac8b65bb0 Mon Sep 17 00:00:00 2001 From: Max Heimbrock <43608204+MaxHeimbrock@users.noreply.github.com> Date: Thu, 11 Jun 2026 17:45:54 +0200 Subject: [PATCH 1/3] Configure native audio source from device, not hardcoded defaults MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The native (Rust) audio source was created with a hardcoded sample rate (48000) and channel count (2). Microphone frames flow through Unity's audio graph (AudioProbe) at the actual DSP output configuration, which often differs — e.g. with a Bluetooth headset. The Rust source does not resample; it rejects frames whose rate/channels don't match, causing the metadata-mismatch warning and capture failures. Read the source's sample rate and channel count from Unity's output configuration (AudioSettings.GetConfiguration) instead of hardcoded defaults, falling back to the defaults only when Unity can't report one. The base constructor now exposes a device-mode overload (type only) and an explicit overload (type, sampleRate, channels) for sources that generate a fixed format. MicrophoneSource and BasicAudioSource use device mode; BasicAudioSource drops its unused channels parameter. SineWaveAudioSource declares its exact format. If a frame's format still doesn't match (inconsistent Unity report or a runtime output change), drop it with a throttled warning instead of sending a mismatch the native side would error on. Also removes the redundant Microphone.Start in the Meet sample. Co-Authored-By: Claude Opus 4.8 (1M context) --- Runtime/Scripts/BasicAudioSource.cs | 6 +- Runtime/Scripts/MicrophoneSource.cs | 2 +- Runtime/Scripts/RtcAudioSource.cs | 81 ++++++++++++++++++--- Samples~/Meet/Assets/Runtime/MeetManager.cs | 3 +- Tests/PlayMode/Utils/SineWaveAudioSource.cs | 2 +- 5 files changed, 79 insertions(+), 15 deletions(-) diff --git a/Runtime/Scripts/BasicAudioSource.cs b/Runtime/Scripts/BasicAudioSource.cs index 3b63680b..8193090d 100644 --- a/Runtime/Scripts/BasicAudioSource.cs +++ b/Runtime/Scripts/BasicAudioSource.cs @@ -19,9 +19,11 @@ sealed public class BasicAudioSource : RtcAudioSource /// Creates a new basic audio source for the given in the scene. /// /// The to capture from. - /// The number of channels to capture. /// The type of audio source. - public BasicAudioSource(AudioSource source, int channels = 2, RtcAudioSourceType sourceType = RtcAudioSourceType.AudioSourceCustom) : base(channels, sourceType) + /// + /// The sample rate and channel count are taken from Unity's audio configuration. + /// + public BasicAudioSource(AudioSource source, RtcAudioSourceType sourceType = RtcAudioSourceType.AudioSourceCustom) : base(sourceType) { _source = source; } diff --git a/Runtime/Scripts/MicrophoneSource.cs b/Runtime/Scripts/MicrophoneSource.cs index 904b8da7..a8775568 100644 --- a/Runtime/Scripts/MicrophoneSource.cs +++ b/Runtime/Scripts/MicrophoneSource.cs @@ -28,7 +28,7 @@ sealed public class MicrophoneSource : RtcAudioSource /// get the list of available devices. /// The GameObject to attach the AudioSource to. The object must be kept in the scene /// for the duration of the source's lifetime. - public MicrophoneSource(string deviceName, GameObject sourceObject) : base(2, RtcAudioSourceType.AudioSourceMicrophone) + public MicrophoneSource(string deviceName, GameObject sourceObject) : base(RtcAudioSourceType.AudioSourceMicrophone) { _deviceName = deviceName; _sourceObject = sourceObject; diff --git a/Runtime/Scripts/RtcAudioSource.cs b/Runtime/Scripts/RtcAudioSource.cs index a9af8a0a..7f5c3d7c 100644 --- a/Runtime/Scripts/RtcAudioSource.cs +++ b/Runtime/Scripts/RtcAudioSource.cs @@ -83,20 +83,33 @@ private sealed class PendingAudioFrame private volatile bool _disposed = false; private int _audioReadCount = 0; - protected RtcAudioSource(int channels = 2, RtcAudioSourceType audioSourceType = RtcAudioSourceType.AudioSourceCustom) + // Device-capture sources (microphone, AudioSource taps) don't know their format ahead of + // time — it is whatever Unity's audio graph delivers. They use this constructor, which + // configures the native source from Unity's current output configuration. + protected RtcAudioSource(RtcAudioSourceType audioSourceType) + : this(audioSourceType, 0, 0) { } + + // Sources that generate a fixed, known format (e.g. test signal generators) declare it + // directly. Passing 0 for either value falls back to the device configuration. + protected RtcAudioSource(RtcAudioSourceType audioSourceType, uint sampleRate, uint channels) { _sourceType = audioSourceType; - _expectedChannels = (uint)channels; + + if (sampleRate > 0 && channels > 0) + { + _expectedSampleRate = sampleRate; + _expectedChannels = channels; + } + else + { + (_expectedSampleRate, _expectedChannels) = ResolveDeviceFormat(); + } using var request = FFIBridge.Instance.NewRequest(); var newAudioSource = request.request; newAudioSource.Type = AudioSourceType.AudioSourceNative; - newAudioSource.NumChannels = (uint)channels; - newAudioSource.SampleRate = _sourceType == RtcAudioSourceType.AudioSourceMicrophone ? - DefaultMicrophoneSampleRate : DefaultSampleRate; - _expectedSampleRate = newAudioSource.SampleRate; - - Utils.Debug($"NewAudioSource: {newAudioSource.NumChannels} {newAudioSource.SampleRate}"); + newAudioSource.NumChannels = _expectedChannels; + newAudioSource.SampleRate = _expectedSampleRate; newAudioSource.Options = request.TempResource(); newAudioSource.Options.EchoCancellation = true; @@ -109,6 +122,49 @@ protected RtcAudioSource(int channels = 2, RtcAudioSourceType audioSourceType = Utils.Debug($"{DebugTag} created handle={Handle.DangerousGetHandle()} expectedRate={_expectedSampleRate} expectedChannels={_expectedChannels} sourceType={_sourceType}"); } + // Reads Unity's actual output audio configuration. The capture path delivers buffers at the + // DSP output rate/channel count (see AudioProbe), so this is the format the native source + // must match. Falls back to the platform defaults when Unity cannot report a configuration + // (e.g. batch mode without an audio device). + private (uint sampleRate, uint channels) ResolveDeviceFormat() + { + uint sampleRate = _sourceType == RtcAudioSourceType.AudioSourceMicrophone + ? DefaultMicrophoneSampleRate + : DefaultSampleRate; + uint channels = DefaultChannels; + + try + { + var config = UnityEngine.AudioSettings.GetConfiguration(); + if (config.sampleRate > 0) + sampleRate = (uint)config.sampleRate; + var configuredChannels = SpeakerModeChannels(config.speakerMode); + if (configuredChannels > 0) + channels = configuredChannels; + } + catch (Exception e) + { + Utils.Warning($"{DebugTag} could not read Unity audio configuration, using defaults: {e.Message}"); + } + + return (sampleRate, channels); + } + + private static uint SpeakerModeChannels(UnityEngine.AudioSpeakerMode mode) + { + switch (mode) + { + case UnityEngine.AudioSpeakerMode.Mono: return 1; + case UnityEngine.AudioSpeakerMode.Stereo: return 2; + case UnityEngine.AudioSpeakerMode.Quad: return 4; + case UnityEngine.AudioSpeakerMode.Surround: return 5; + case UnityEngine.AudioSpeakerMode.Mode5point1: return 6; + case UnityEngine.AudioSpeakerMode.Mode7point1: return 8; + case UnityEngine.AudioSpeakerMode.Prologic: return 2; + default: return 0; + } + } + /// /// Begin capturing audio samples from the underlying source. /// @@ -153,9 +209,16 @@ private void OnAudioRead(float[] data, int channels, int sampleRate) return; } + // The native source rejects frames whose rate/channels differ from how it was + // configured (it does not resample). This should not happen now that the source is + // configured from the device, but if Unity reports an inconsistent format — or the + // output configuration changes at runtime — we drop the frame instead of sending a + // mismatch the native side would error on. if ((uint)sampleRate != _expectedSampleRate || (uint)channels != _expectedChannels) { - Utils.Warning($"{DebugTag} audio frame #{frameIndex} metadata mismatch actualRate={sampleRate} actualChannels={channels} expectedRate={_expectedSampleRate} expectedChannels={_expectedChannels} sourceType={_sourceType}"); + if (frameIndex == 1 || frameIndex % 100 == 0) + Utils.Warning($"{DebugTag} dropping audio frame #{frameIndex}: format {sampleRate}/{channels} does not match source {_expectedSampleRate}/{_expectedChannels} (sourceType={_sourceType})"); + return; } var pendingBeforeSend = PendingFrameCount(); diff --git a/Samples~/Meet/Assets/Runtime/MeetManager.cs b/Samples~/Meet/Assets/Runtime/MeetManager.cs index 225c7a0c..97b2cb70 100644 --- a/Samples~/Meet/Assets/Runtime/MeetManager.cs +++ b/Samples~/Meet/Assets/Runtime/MeetManager.cs @@ -453,8 +453,7 @@ private IEnumerator PublishLocalMicrophone() { if (_audioObjects.ContainsKey(LocalAudioTrackName)) yield break; - Microphone.Start(null, true, 10, 44100); - + // MicrophoneSource starts the device itself, so we only need the device name here. var audioObject = new GameObject($"My Microphone: {Microphone.devices[0]}"); audioObject.transform.SetParent(_audioTrackParent); diff --git a/Tests/PlayMode/Utils/SineWaveAudioSource.cs b/Tests/PlayMode/Utils/SineWaveAudioSource.cs index 907e9ccc..2337615b 100644 --- a/Tests/PlayMode/Utils/SineWaveAudioSource.cs +++ b/Tests/PlayMode/Utils/SineWaveAudioSource.cs @@ -31,7 +31,7 @@ public SineWaveAudioSource( int sampleRate = 48000, double frequencyHz = 440.0, float amplitude = 0.1f) - : base(channels, RtcAudioSourceType.AudioSourceCustom) + : base(RtcAudioSourceType.AudioSourceCustom, (uint)sampleRate, (uint)channels) { _channels = channels; _sampleRate = sampleRate; From 834f2047ccf7b8a0a848b8b0518931462256ad4c Mon Sep 17 00:00:00 2001 From: Max Heimbrock <43608204+MaxHeimbrock@users.noreply.github.com> Date: Fri, 12 Jun 2026 10:12:35 +0200 Subject: [PATCH 2/3] Add throttled capture/receive rate diagnostics (Info level) Temporary, ~2s-throttled diagnostics to investigate choppy received audio: - RtcAudioSource logs the effective capture sample rate (samples/sec by wall clock) vs the rate declared to the native source. A measured rate that differs from the declared rate means the frame format label is wrong, which would sound fast/slow/choppy on the receiver. - AudioStream logs buffer fill, underrun count, callback count and frames received, to distinguish receive-side starvation from a clean stream. Emitted via Utils.Info so they appear without LK_DEBUG (Utils.Debug is compiled out unless LK_DEBUG is defined). Co-Authored-By: Claude Opus 4.8 (1M context) --- Runtime/Scripts/AudioStream.cs | 31 ++++++++++++++++++++++++++++ Runtime/Scripts/RtcAudioSource.cs | 34 +++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) diff --git a/Runtime/Scripts/AudioStream.cs b/Runtime/Scripts/AudioStream.cs index 4d9ab588..3288f839 100644 --- a/Runtime/Scripts/AudioStream.cs +++ b/Runtime/Scripts/AudioStream.cs @@ -50,6 +50,14 @@ public sealed class AudioStream : IDisposable private const int CrossfadeFrames = 128; // ~2.7ms @ 48kHz private int _skipCooldown = 0; + // --- Temporary receive diagnostics (Info level, emitted ~every 2s) --- + // Reveals whether choppiness is a buffer-starvation problem (underruns/low fill) versus a + // clean stream, and what rate/channels we are actually playing/requesting. + private long _diagWindowStartTicks; + private int _diagCallbacks; + private int _diagUnderruns; + private int _diagFramesReceived; + /// /// Creates a new audio stream from a remote audio track, attaching it to the /// given in the scene. @@ -147,6 +155,8 @@ private void OnAudioRead(float[] data, int channels, int sampleRate) lock (_lock) { + MaybeLogReceiveDiagnostics(channels, sampleRate); + // Single gate covering first-create and runtime format changes (e.g. after a // system audio device switch). When the FFI stream is missing or what we asked // Rust for no longer matches what Unity is delivering, post a (re)create to the @@ -214,6 +224,7 @@ static float S16ToFloat(short v) if (valuesAvailableToRead < data.Length) { _isPrimed = false; + _diagUnderruns++; Utils.Debug($"AudioStream underrun detected, re-priming (got {valuesAvailableToRead} samples but want to read {data.Length})"); // Output silence immediately instead of playing partial/choppy samples. @@ -370,6 +381,7 @@ private void OnAudioStreamEvent(AudioStreamEvent e) var data = new ReadOnlySpan(frame.Data.ToPointer(), frame.Length); _buffer.Write(data); } + _diagFramesReceived++; } } @@ -427,6 +439,25 @@ private void Dispose(bool disposing) Dispose(false); } + // Temporary diagnostic: ~every 2s logs buffer fill, underrun count, callback count and + // frames received so we can tell starvation (choppy) from a clean stream. Called under _lock. + private void MaybeLogReceiveDiagnostics(int channels, int sampleRate) + { + _diagCallbacks++; + var now = System.Diagnostics.Stopwatch.GetTimestamp(); + if (_diagWindowStartTicks == 0) _diagWindowStartTicks = now; + var elapsed = (now - _diagWindowStartTicks) / (double)System.Diagnostics.Stopwatch.Frequency; + if (elapsed < 2.0) return; + + float fill = _buffer != null ? _buffer.AvailableReadInPercent() : 0f; + Utils.Info($"AudioStream#{_trackHandleId} diag: out={sampleRate}Hz/{channels}ch ffi={_ffiSampleRate}Hz/{_ffiNumChannels}ch " + + $"bufferFill={fill * 100f:F0}% callbacks={_diagCallbacks} underruns={_diagUnderruns} framesRecv={_diagFramesReceived} over={elapsed:F1}s"); + _diagWindowStartTicks = now; + _diagCallbacks = 0; + _diagUnderruns = 0; + _diagFramesReceived = 0; + } + // For testing and debugging internal float GetBufferFill() { diff --git a/Runtime/Scripts/RtcAudioSource.cs b/Runtime/Scripts/RtcAudioSource.cs index 7f5c3d7c..c85e5a2d 100644 --- a/Runtime/Scripts/RtcAudioSource.cs +++ b/Runtime/Scripts/RtcAudioSource.cs @@ -83,6 +83,15 @@ private sealed class PendingAudioFrame private volatile bool _disposed = false; private int _audioReadCount = 0; + // --- Temporary capture-rate diagnostics (Info level, emitted ~every 2s) --- + // Measures the effective sample rate from wall-clock time vs the rate we declared to the + // native source. A measured rate that differs from the declared rate means the format + // label on the frames is wrong (audio would sound fast/slow/choppy on the receiver). + private long _diagWindowStartTicks; // 0 = not started + private long _diagSamplesPerChannel; + private int _diagAcceptedFrames; + private int _diagDroppedFrames; + // Device-capture sources (microphone, AudioSource taps) don't know their format ahead of // time — it is whatever Unity's audio graph delivers. They use this constructor, which // configures the native source from Unity's current output configuration. @@ -209,6 +218,9 @@ private void OnAudioRead(float[] data, int channels, int sampleRate) return; } + var willDrop = (uint)sampleRate != _expectedSampleRate || (uint)channels != _expectedChannels; + RecordCaptureDiagnostics(data.Length / channels, channels, sampleRate, willDrop); + // The native source rejects frames whose rate/channels differ from how it was // configured (it does not resample). This should not happen now that the source is // configured from the device, but if Unity reports an inconsistent format — or the @@ -405,6 +417,28 @@ private static double ElapsedMilliseconds(long startedTimestamp) return (Stopwatch.GetTimestamp() - startedTimestamp) * 1000.0 / Stopwatch.Frequency; } + // Temporary diagnostic: accumulates captured audio over wall-clock time and, ~every 2s, + // logs the effective sample rate vs the rate declared to the native source. Runs on the + // audio thread; the periodic Info log is cheap. + private void RecordCaptureDiagnostics(int samplesPerChannel, int channels, int sampleRate, bool dropped) + { + var now = Stopwatch.GetTimestamp(); + if (_diagWindowStartTicks == 0) _diagWindowStartTicks = now; + _diagSamplesPerChannel += samplesPerChannel; + if (dropped) _diagDroppedFrames++; else _diagAcceptedFrames++; + + var elapsed = (now - _diagWindowStartTicks) / (double)Stopwatch.Frequency; + if (elapsed < 2.0) return; + + var measuredRate = _diagSamplesPerChannel / elapsed; + Utils.Info($"{DebugTag} capture diag: declared={_expectedSampleRate}Hz/{_expectedChannels}ch measuredRate={measuredRate:F0}Hz " + + $"lastFrame={samplesPerChannel}smp/{channels}ch/{sampleRate}Hz accepted={_diagAcceptedFrames} dropped={_diagDroppedFrames} over={elapsed:F1}s"); + _diagWindowStartTicks = now; + _diagSamplesPerChannel = 0; + _diagAcceptedFrames = 0; + _diagDroppedFrames = 0; + } + private string DebugTag => $"RtcAudioSource#{_debugId}"; } } From a775e593e899b0f9d6bee902ab7876138160f4bf Mon Sep 17 00:00:00 2001 From: Max Heimbrock <43608204+MaxHeimbrock@users.noreply.github.com> Date: Fri, 12 Jun 2026 11:07:23 +0200 Subject: [PATCH 3/3] Open microphone at the output sample rate to avoid capture drift MicrophoneSource started the device at the hardcoded DefaultMicrophoneSampleRate and played the looping clip through an AudioSource read on the DSP thread. When the device's actual rate differs from the engine output rate, the clip fills and plays back at different rates, so the read position drifts against the write position and the captured audio becomes choppy. Open the microphone at AudioSettings.outputSampleRate when the device supports it (clamped to the device's reported caps; falling back to the default when the output rate is unknown), so capture and playback run at the same rate. This also aligns the mic rate with the native source rate, which is taken from the same output configuration. Co-Authored-By: Claude Opus 4.8 (1M context) --- Runtime/Scripts/MicrophoneSource.cs | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/Runtime/Scripts/MicrophoneSource.cs b/Runtime/Scripts/MicrophoneSource.cs index a8775568..9a4405ac 100644 --- a/Runtime/Scripts/MicrophoneSource.cs +++ b/Runtime/Scripts/MicrophoneSource.cs @@ -59,6 +59,28 @@ public override void Start() _started = true; } + // Opens the microphone at the engine's output sample rate when the device supports it, so + // the captured clip and the AudioSource that plays it back run at the same rate. A mismatch + // makes the looping clip drift against the playback read position and produces choppy audio. + // Falls back to DefaultMicrophoneSampleRate when the output rate is unknown, and clamps to + // the device's supported range when it reports one. + private static int ResolveMicrophoneSampleRate(string deviceName) + { + int target = AudioSettings.outputSampleRate; + if (target <= 0) + target = (int)DefaultMicrophoneSampleRate; + + Microphone.GetDeviceCaps(deviceName, out int minFreq, out int maxFreq); + // Unity reports (0, 0) when the device imposes no specific sample-rate range. + if (minFreq == 0 && maxFreq == 0) + return target; + + var result = Mathf.Clamp(target, minFreq, maxFreq); + Utils.Info($"ResolveMicrophoneSampleRate: {result}"); + + return result; + } + private IEnumerator StartMicrophone() { // Validate that the GameObject is still valid before starting @@ -76,13 +98,14 @@ private IEnumerator StartMicrophone() } AudioClip clip = null; + var micFrequency = ResolveMicrophoneSampleRate(_deviceName); try { clip = Microphone.Start( _deviceName, loop: true, lengthSec: 1, - frequency: (int)DefaultMicrophoneSampleRate + frequency: micFrequency ); } catch (Exception e) @@ -97,6 +120,8 @@ private IEnumerator StartMicrophone() yield break; } + Utils.Info($"MicrophoneSource device='{_deviceName}' opened at {micFrequency}Hz (output={AudioSettings.outputSampleRate}Hz)"); + // Ensure no duplicate components exist before adding new ones. // This is important during app resume on iOS where components might not be // fully destroyed yet due to Unity's deferred Destroy().