diff --git a/Runtime/Scripts/AudioStream.cs b/Runtime/Scripts/AudioStream.cs index 4d9ab588..3288f839 100644 --- a/Runtime/Scripts/AudioStream.cs +++ b/Runtime/Scripts/AudioStream.cs @@ -50,6 +50,14 @@ public sealed class AudioStream : IDisposable private const int CrossfadeFrames = 128; // ~2.7ms @ 48kHz private int _skipCooldown = 0; + // --- Temporary receive diagnostics (Info level, emitted ~every 2s) --- + // Reveals whether choppiness is a buffer-starvation problem (underruns/low fill) versus a + // clean stream, and what rate/channels we are actually playing/requesting. + private long _diagWindowStartTicks; + private int _diagCallbacks; + private int _diagUnderruns; + private int _diagFramesReceived; + /// /// Creates a new audio stream from a remote audio track, attaching it to the /// given in the scene. @@ -147,6 +155,8 @@ private void OnAudioRead(float[] data, int channels, int sampleRate) lock (_lock) { + MaybeLogReceiveDiagnostics(channels, sampleRate); + // Single gate covering first-create and runtime format changes (e.g. after a // system audio device switch). When the FFI stream is missing or what we asked // Rust for no longer matches what Unity is delivering, post a (re)create to the @@ -214,6 +224,7 @@ static float S16ToFloat(short v) if (valuesAvailableToRead < data.Length) { _isPrimed = false; + _diagUnderruns++; Utils.Debug($"AudioStream underrun detected, re-priming (got {valuesAvailableToRead} samples but want to read {data.Length})"); // Output silence immediately instead of playing partial/choppy samples. @@ -370,6 +381,7 @@ private void OnAudioStreamEvent(AudioStreamEvent e) var data = new ReadOnlySpan(frame.Data.ToPointer(), frame.Length); _buffer.Write(data); } + _diagFramesReceived++; } } @@ -427,6 +439,25 @@ private void Dispose(bool disposing) Dispose(false); } + // Temporary diagnostic: ~every 2s logs buffer fill, underrun count, callback count and + // frames received so we can tell starvation (choppy) from a clean stream. Called under _lock. + private void MaybeLogReceiveDiagnostics(int channels, int sampleRate) + { + _diagCallbacks++; + var now = System.Diagnostics.Stopwatch.GetTimestamp(); + if (_diagWindowStartTicks == 0) _diagWindowStartTicks = now; + var elapsed = (now - _diagWindowStartTicks) / (double)System.Diagnostics.Stopwatch.Frequency; + if (elapsed < 2.0) return; + + float fill = _buffer != null ? _buffer.AvailableReadInPercent() : 0f; + Utils.Info($"AudioStream#{_trackHandleId} diag: out={sampleRate}Hz/{channels}ch ffi={_ffiSampleRate}Hz/{_ffiNumChannels}ch " + + $"bufferFill={fill * 100f:F0}% callbacks={_diagCallbacks} underruns={_diagUnderruns} framesRecv={_diagFramesReceived} over={elapsed:F1}s"); + _diagWindowStartTicks = now; + _diagCallbacks = 0; + _diagUnderruns = 0; + _diagFramesReceived = 0; + } + // For testing and debugging internal float GetBufferFill() { diff --git a/Runtime/Scripts/BasicAudioSource.cs b/Runtime/Scripts/BasicAudioSource.cs index 3b63680b..8193090d 100644 --- a/Runtime/Scripts/BasicAudioSource.cs +++ b/Runtime/Scripts/BasicAudioSource.cs @@ -19,9 +19,11 @@ sealed public class BasicAudioSource : RtcAudioSource /// Creates a new basic audio source for the given in the scene. /// /// The to capture from. - /// The number of channels to capture. /// The type of audio source. - public BasicAudioSource(AudioSource source, int channels = 2, RtcAudioSourceType sourceType = RtcAudioSourceType.AudioSourceCustom) : base(channels, sourceType) + /// + /// The sample rate and channel count are taken from Unity's audio configuration. + /// + public BasicAudioSource(AudioSource source, RtcAudioSourceType sourceType = RtcAudioSourceType.AudioSourceCustom) : base(sourceType) { _source = source; } diff --git a/Runtime/Scripts/MicrophoneSource.cs b/Runtime/Scripts/MicrophoneSource.cs index 904b8da7..9a4405ac 100644 --- a/Runtime/Scripts/MicrophoneSource.cs +++ b/Runtime/Scripts/MicrophoneSource.cs @@ -28,7 +28,7 @@ sealed public class MicrophoneSource : RtcAudioSource /// get the list of available devices. /// The GameObject to attach the AudioSource to. The object must be kept in the scene /// for the duration of the source's lifetime. - public MicrophoneSource(string deviceName, GameObject sourceObject) : base(2, RtcAudioSourceType.AudioSourceMicrophone) + public MicrophoneSource(string deviceName, GameObject sourceObject) : base(RtcAudioSourceType.AudioSourceMicrophone) { _deviceName = deviceName; _sourceObject = sourceObject; @@ -59,6 +59,28 @@ public override void Start() _started = true; } + // Opens the microphone at the engine's output sample rate when the device supports it, so + // the captured clip and the AudioSource that plays it back run at the same rate. A mismatch + // makes the looping clip drift against the playback read position and produces choppy audio. + // Falls back to DefaultMicrophoneSampleRate when the output rate is unknown, and clamps to + // the device's supported range when it reports one. + private static int ResolveMicrophoneSampleRate(string deviceName) + { + int target = AudioSettings.outputSampleRate; + if (target <= 0) + target = (int)DefaultMicrophoneSampleRate; + + Microphone.GetDeviceCaps(deviceName, out int minFreq, out int maxFreq); + // Unity reports (0, 0) when the device imposes no specific sample-rate range. + if (minFreq == 0 && maxFreq == 0) + return target; + + var result = Mathf.Clamp(target, minFreq, maxFreq); + Utils.Info($"ResolveMicrophoneSampleRate: {result}"); + + return result; + } + private IEnumerator StartMicrophone() { // Validate that the GameObject is still valid before starting @@ -76,13 +98,14 @@ private IEnumerator StartMicrophone() } AudioClip clip = null; + var micFrequency = ResolveMicrophoneSampleRate(_deviceName); try { clip = Microphone.Start( _deviceName, loop: true, lengthSec: 1, - frequency: (int)DefaultMicrophoneSampleRate + frequency: micFrequency ); } catch (Exception e) @@ -97,6 +120,8 @@ private IEnumerator StartMicrophone() yield break; } + Utils.Info($"MicrophoneSource device='{_deviceName}' opened at {micFrequency}Hz (output={AudioSettings.outputSampleRate}Hz)"); + // Ensure no duplicate components exist before adding new ones. // This is important during app resume on iOS where components might not be // fully destroyed yet due to Unity's deferred Destroy(). diff --git a/Runtime/Scripts/RtcAudioSource.cs b/Runtime/Scripts/RtcAudioSource.cs index a9af8a0a..c85e5a2d 100644 --- a/Runtime/Scripts/RtcAudioSource.cs +++ b/Runtime/Scripts/RtcAudioSource.cs @@ -83,20 +83,42 @@ private sealed class PendingAudioFrame private volatile bool _disposed = false; private int _audioReadCount = 0; - protected RtcAudioSource(int channels = 2, RtcAudioSourceType audioSourceType = RtcAudioSourceType.AudioSourceCustom) + // --- Temporary capture-rate diagnostics (Info level, emitted ~every 2s) --- + // Measures the effective sample rate from wall-clock time vs the rate we declared to the + // native source. A measured rate that differs from the declared rate means the format + // label on the frames is wrong (audio would sound fast/slow/choppy on the receiver). + private long _diagWindowStartTicks; // 0 = not started + private long _diagSamplesPerChannel; + private int _diagAcceptedFrames; + private int _diagDroppedFrames; + + // Device-capture sources (microphone, AudioSource taps) don't know their format ahead of + // time — it is whatever Unity's audio graph delivers. They use this constructor, which + // configures the native source from Unity's current output configuration. + protected RtcAudioSource(RtcAudioSourceType audioSourceType) + : this(audioSourceType, 0, 0) { } + + // Sources that generate a fixed, known format (e.g. test signal generators) declare it + // directly. Passing 0 for either value falls back to the device configuration. + protected RtcAudioSource(RtcAudioSourceType audioSourceType, uint sampleRate, uint channels) { _sourceType = audioSourceType; - _expectedChannels = (uint)channels; + + if (sampleRate > 0 && channels > 0) + { + _expectedSampleRate = sampleRate; + _expectedChannels = channels; + } + else + { + (_expectedSampleRate, _expectedChannels) = ResolveDeviceFormat(); + } using var request = FFIBridge.Instance.NewRequest(); var newAudioSource = request.request; newAudioSource.Type = AudioSourceType.AudioSourceNative; - newAudioSource.NumChannels = (uint)channels; - newAudioSource.SampleRate = _sourceType == RtcAudioSourceType.AudioSourceMicrophone ? - DefaultMicrophoneSampleRate : DefaultSampleRate; - _expectedSampleRate = newAudioSource.SampleRate; - - Utils.Debug($"NewAudioSource: {newAudioSource.NumChannels} {newAudioSource.SampleRate}"); + newAudioSource.NumChannels = _expectedChannels; + newAudioSource.SampleRate = _expectedSampleRate; newAudioSource.Options = request.TempResource(); newAudioSource.Options.EchoCancellation = true; @@ -109,6 +131,49 @@ protected RtcAudioSource(int channels = 2, RtcAudioSourceType audioSourceType = Utils.Debug($"{DebugTag} created handle={Handle.DangerousGetHandle()} expectedRate={_expectedSampleRate} expectedChannels={_expectedChannels} sourceType={_sourceType}"); } + // Reads Unity's actual output audio configuration. The capture path delivers buffers at the + // DSP output rate/channel count (see AudioProbe), so this is the format the native source + // must match. Falls back to the platform defaults when Unity cannot report a configuration + // (e.g. batch mode without an audio device). + private (uint sampleRate, uint channels) ResolveDeviceFormat() + { + uint sampleRate = _sourceType == RtcAudioSourceType.AudioSourceMicrophone + ? DefaultMicrophoneSampleRate + : DefaultSampleRate; + uint channels = DefaultChannels; + + try + { + var config = UnityEngine.AudioSettings.GetConfiguration(); + if (config.sampleRate > 0) + sampleRate = (uint)config.sampleRate; + var configuredChannels = SpeakerModeChannels(config.speakerMode); + if (configuredChannels > 0) + channels = configuredChannels; + } + catch (Exception e) + { + Utils.Warning($"{DebugTag} could not read Unity audio configuration, using defaults: {e.Message}"); + } + + return (sampleRate, channels); + } + + private static uint SpeakerModeChannels(UnityEngine.AudioSpeakerMode mode) + { + switch (mode) + { + case UnityEngine.AudioSpeakerMode.Mono: return 1; + case UnityEngine.AudioSpeakerMode.Stereo: return 2; + case UnityEngine.AudioSpeakerMode.Quad: return 4; + case UnityEngine.AudioSpeakerMode.Surround: return 5; + case UnityEngine.AudioSpeakerMode.Mode5point1: return 6; + case UnityEngine.AudioSpeakerMode.Mode7point1: return 8; + case UnityEngine.AudioSpeakerMode.Prologic: return 2; + default: return 0; + } + } + /// /// Begin capturing audio samples from the underlying source. /// @@ -153,9 +218,19 @@ private void OnAudioRead(float[] data, int channels, int sampleRate) return; } + var willDrop = (uint)sampleRate != _expectedSampleRate || (uint)channels != _expectedChannels; + RecordCaptureDiagnostics(data.Length / channels, channels, sampleRate, willDrop); + + // The native source rejects frames whose rate/channels differ from how it was + // configured (it does not resample). This should not happen now that the source is + // configured from the device, but if Unity reports an inconsistent format — or the + // output configuration changes at runtime — we drop the frame instead of sending a + // mismatch the native side would error on. if ((uint)sampleRate != _expectedSampleRate || (uint)channels != _expectedChannels) { - Utils.Warning($"{DebugTag} audio frame #{frameIndex} metadata mismatch actualRate={sampleRate} actualChannels={channels} expectedRate={_expectedSampleRate} expectedChannels={_expectedChannels} sourceType={_sourceType}"); + if (frameIndex == 1 || frameIndex % 100 == 0) + Utils.Warning($"{DebugTag} dropping audio frame #{frameIndex}: format {sampleRate}/{channels} does not match source {_expectedSampleRate}/{_expectedChannels} (sourceType={_sourceType})"); + return; } var pendingBeforeSend = PendingFrameCount(); @@ -342,6 +417,28 @@ private static double ElapsedMilliseconds(long startedTimestamp) return (Stopwatch.GetTimestamp() - startedTimestamp) * 1000.0 / Stopwatch.Frequency; } + // Temporary diagnostic: accumulates captured audio over wall-clock time and, ~every 2s, + // logs the effective sample rate vs the rate declared to the native source. Runs on the + // audio thread; the periodic Info log is cheap. + private void RecordCaptureDiagnostics(int samplesPerChannel, int channels, int sampleRate, bool dropped) + { + var now = Stopwatch.GetTimestamp(); + if (_diagWindowStartTicks == 0) _diagWindowStartTicks = now; + _diagSamplesPerChannel += samplesPerChannel; + if (dropped) _diagDroppedFrames++; else _diagAcceptedFrames++; + + var elapsed = (now - _diagWindowStartTicks) / (double)Stopwatch.Frequency; + if (elapsed < 2.0) return; + + var measuredRate = _diagSamplesPerChannel / elapsed; + Utils.Info($"{DebugTag} capture diag: declared={_expectedSampleRate}Hz/{_expectedChannels}ch measuredRate={measuredRate:F0}Hz " + + $"lastFrame={samplesPerChannel}smp/{channels}ch/{sampleRate}Hz accepted={_diagAcceptedFrames} dropped={_diagDroppedFrames} over={elapsed:F1}s"); + _diagWindowStartTicks = now; + _diagSamplesPerChannel = 0; + _diagAcceptedFrames = 0; + _diagDroppedFrames = 0; + } + private string DebugTag => $"RtcAudioSource#{_debugId}"; } } diff --git a/Samples~/Meet/Assets/Runtime/MeetManager.cs b/Samples~/Meet/Assets/Runtime/MeetManager.cs index 225c7a0c..97b2cb70 100644 --- a/Samples~/Meet/Assets/Runtime/MeetManager.cs +++ b/Samples~/Meet/Assets/Runtime/MeetManager.cs @@ -453,8 +453,7 @@ private IEnumerator PublishLocalMicrophone() { if (_audioObjects.ContainsKey(LocalAudioTrackName)) yield break; - Microphone.Start(null, true, 10, 44100); - + // MicrophoneSource starts the device itself, so we only need the device name here. var audioObject = new GameObject($"My Microphone: {Microphone.devices[0]}"); audioObject.transform.SetParent(_audioTrackParent); diff --git a/Tests/PlayMode/Utils/SineWaveAudioSource.cs b/Tests/PlayMode/Utils/SineWaveAudioSource.cs index 907e9ccc..2337615b 100644 --- a/Tests/PlayMode/Utils/SineWaveAudioSource.cs +++ b/Tests/PlayMode/Utils/SineWaveAudioSource.cs @@ -31,7 +31,7 @@ public SineWaveAudioSource( int sampleRate = 48000, double frequencyHz = 440.0, float amplitude = 0.1f) - : base(channels, RtcAudioSourceType.AudioSourceCustom) + : base(RtcAudioSourceType.AudioSourceCustom, (uint)sampleRate, (uint)channels) { _channels = channels; _sampleRate = sampleRate;