From 339e6c2694034b9eac285cab9c54c3e554b7c048 Mon Sep 17 00:00:00 2001 From: Max Heimbrock <43608204+MaxHeimbrock@users.noreply.github.com> Date: Thu, 11 Jun 2026 15:03:53 +0200 Subject: [PATCH 1/2] Match native audio source format to device, recreate on mismatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The native (Rust) audio source was created with a hardcoded sample rate (48000) and channel count (2). Microphone frames, however, arrive at Unity's actual DSP output configuration, which can differ — most notably when a Bluetooth headset connects and switches the output rate. The Rust native source does not resample; it rejects frames whose rate/channels don't match its configuration with "sample_rate and num_channels don't match", producing the metadata-mismatch warning and capture failures. RtcAudioSource now initializes the native source from Unity's real output configuration (AudioSettings.GetConfiguration) instead of hardcoded defaults, and adds a runtime safety net: when a captured frame's format does not match the live source, the frame is dropped and the native source is recreated to match (coalesced and marshaled to the main thread). Because a track is bound to a specific source handle at creation and cannot follow a new one in place, LocalAudioTrack listens for the recreation and transparently rebuilds + republishes itself onto the new handle, keeping the same instance so callers' references stay valid. Sources that know their exact format (SineWaveAudioSource) pass it explicitly to keep behavior deterministic. Also removes the redundant Microphone.Start in the Meet sample. Co-Authored-By: Claude Opus 4.8 (1M context) --- Runtime/Scripts/Participant.cs | 5 + Runtime/Scripts/RtcAudioSource.cs | 158 ++++++++++++++++++-- Runtime/Scripts/Track.cs | 54 ++++++- Samples~/Meet/Assets/Runtime/MeetManager.cs | 4 +- Tests/PlayMode/Utils/SineWaveAudioSource.cs | 2 +- 5 files changed, 202 insertions(+), 21 deletions(-) diff --git a/Runtime/Scripts/Participant.cs b/Runtime/Scripts/Participant.cs index 4da028c0..ee23c58a 100644 --- a/Runtime/Scripts/Participant.cs +++ b/Runtime/Scripts/Participant.cs @@ -77,6 +77,11 @@ public PublishTrackInstruction PublishTrack(ILocalTrack localTrack, TrackPublish if (!Room.TryGetTarget(out var room)) throw new Exception("room is invalid"); + // Remember the publish target so an audio track can transparently republish itself if + // its source recreates its native handle (e.g. on a sample-rate change). + if (localTrack is LocalAudioTrack audioTrack) + audioTrack.RememberPublishTarget(this, options); + var track = (Track)localTrack; using var request = FFIBridge.Instance.NewRequest(); diff --git a/Runtime/Scripts/RtcAudioSource.cs b/Runtime/Scripts/RtcAudioSource.cs index dcbfb58f..62c4d493 100644 --- a/Runtime/Scripts/RtcAudioSource.cs +++ b/Runtime/Scripts/RtcAudioSource.cs @@ -60,10 +60,26 @@ private sealed class PendingAudioFrame private readonly RtcAudioSourceType _sourceType; public RtcAudioSourceType SourceType => _sourceType; private readonly int _debugId = Interlocked.Increment(ref nextDebugId); - private readonly uint _expectedSampleRate; - private readonly uint _expectedChannels; - internal readonly FfiHandle Handle; + // Format of the live native source. Written only on the main thread (constructor and + // recreation); read on the audio thread. The writers publish their changes through the + // volatile _handleReady flag (set last in CreateNativeSource). + private volatile uint _liveSampleRate; + private volatile uint _liveChannels; + private volatile bool _handleReady; + + // Coalesces recreation requests raised from the audio thread and marshaled to the main thread. + private readonly object _recreateLock = new object(); + private bool _recreateScheduled; + private uint _desiredSampleRate; + private uint _desiredChannels; + + // Raised on the main thread after the native source is recreated at runtime (not on the + // initial creation). LocalAudioTrack subscribes to rebuild and republish its FFI track, + // since a track is bound to a specific native source handle at creation time. + internal event Action NativeSourceChanged; + + internal FfiHandle Handle { get; private set; } protected AudioSourceInfo _info; // CaptureAudioFrame is asynchronous: the native side can continue reading from the PCM @@ -83,20 +99,37 @@ private sealed class PendingAudioFrame private volatile bool _disposed = false; private int _audioReadCount = 0; - protected RtcAudioSource(int channels = 2, RtcAudioSourceType audioSourceType = RtcAudioSourceType.AudioSourceCustom) + protected RtcAudioSource(int channels = 2, RtcAudioSourceType audioSourceType = RtcAudioSourceType.AudioSourceCustom, uint sampleRate = 0) { _sourceType = audioSourceType; - _expectedChannels = (uint)channels; + // Sources that know their exact format (e.g. test signal generators) pass it + // explicitly. Capture sources that flow through Unity's audio graph leave sampleRate + // at 0 so we read the device's actual output configuration instead of hardcoding it. + uint initialRate; + uint initialChannels; + if (sampleRate > 0) + { + initialRate = sampleRate; + initialChannels = (uint)channels; + } + else + { + (initialRate, initialChannels) = ResolveDeviceFormat((uint)channels); + } + + CreateNativeSource(initialRate, initialChannels); + Utils.Debug($"{DebugTag} created handle={Handle.DangerousGetHandle()} rate={_liveSampleRate} channels={_liveChannels} sourceType={_sourceType}"); + } + + // Builds (or rebuilds) the underlying native audio source. Main thread only. + private void CreateNativeSource(uint sampleRate, uint channels) + { using var request = FFIBridge.Instance.NewRequest(); var newAudioSource = request.request; newAudioSource.Type = AudioSourceType.AudioSourceNative; - newAudioSource.NumChannels = (uint)channels; - newAudioSource.SampleRate = _sourceType == RtcAudioSourceType.AudioSourceMicrophone ? - DefaultMicrophoneSampleRate : DefaultSampleRate; - _expectedSampleRate = newAudioSource.SampleRate; - - UnityEngine.Debug.Log($"NewAudioSource: {newAudioSource.NumChannels} {newAudioSource.SampleRate}"); + newAudioSource.NumChannels = channels; + newAudioSource.SampleRate = sampleRate; newAudioSource.Options = request.TempResource(); newAudioSource.Options.EchoCancellation = true; @@ -106,7 +139,98 @@ protected RtcAudioSource(int channels = 2, RtcAudioSourceType audioSourceType = FfiResponse res = response; _info = res.NewAudioSource.Source.Info; Handle = FfiHandle.FromOwnedHandle(res.NewAudioSource.Source.Handle); - Utils.Debug($"{DebugTag} created handle={Handle.DangerousGetHandle()} expectedRate={_expectedSampleRate} expectedChannels={_expectedChannels} sourceType={_sourceType}"); + + _liveSampleRate = sampleRate; + _liveChannels = channels; + _handleReady = true; // volatile release: publishes Handle/_info/_live* to the audio thread + } + + // Reads Unity's actual output audio configuration. The capture path delivers buffers at + // the DSP output rate/channel count (see AudioProbe), so this is the format the native + // source must match. Falls back to the platform defaults when Unity cannot report a + // configuration (e.g. batch mode without an audio device). + private (uint sampleRate, uint channels) ResolveDeviceFormat(uint channelHint) + { + uint sampleRate = _sourceType == RtcAudioSourceType.AudioSourceMicrophone + ? DefaultMicrophoneSampleRate + : DefaultSampleRate; + uint channels = channelHint; + + try + { + var config = UnityEngine.AudioSettings.GetConfiguration(); + if (config.sampleRate > 0) + sampleRate = (uint)config.sampleRate; + var configuredChannels = SpeakerModeChannels(config.speakerMode); + if (configuredChannels > 0) + channels = configuredChannels; + } + catch (Exception e) + { + Utils.Warning($"{DebugTag} could not read Unity audio configuration, using defaults: {e.Message}"); + } + + return (sampleRate, channels); + } + + private static uint SpeakerModeChannels(UnityEngine.AudioSpeakerMode mode) + { + switch (mode) + { + case UnityEngine.AudioSpeakerMode.Mono: return 1; + case UnityEngine.AudioSpeakerMode.Stereo: return 2; + case UnityEngine.AudioSpeakerMode.Quad: return 4; + case UnityEngine.AudioSpeakerMode.Surround: return 5; + case UnityEngine.AudioSpeakerMode.Mode5point1: return 6; + case UnityEngine.AudioSpeakerMode.Mode7point1: return 8; + case UnityEngine.AudioSpeakerMode.Prologic: return 2; + default: return 0; + } + } + + // Called from the audio thread when an incoming frame's format does not match the live + // native source. Coalesces requests and marshals the rebuild to the main thread, because + // creating the native source and rebuilding the track touch FFI/Unity APIs that are not + // safe to call from the audio thread. + private void RequestNativeSource(uint sampleRate, uint channels) + { + lock (_recreateLock) + { + _desiredSampleRate = sampleRate; + _desiredChannels = channels; + if (_recreateScheduled) return; + _recreateScheduled = true; + } + + var context = FfiClient.Instance._context; + if (context != null) + context.Post(_ => ApplyRecreate(), null); + else + ApplyRecreate(); + } + + private void ApplyRecreate() + { + uint sampleRate; + uint channels; + lock (_recreateLock) + { + _recreateScheduled = false; + sampleRate = _desiredSampleRate; + channels = _desiredChannels; + } + + if (_disposed) return; + if (_handleReady && sampleRate == _liveSampleRate && channels == _liveChannels) + return; // configuration already settled on the desired format + + Utils.Debug($"{DebugTag} recreating native source rate {_liveSampleRate}->{sampleRate} channels {_liveChannels}->{channels} sourceType={_sourceType}"); + + var previous = Handle; + _handleReady = false; // drop audio-thread frames until the new source is live + CreateNativeSource(sampleRate, channels); + NativeSourceChanged?.Invoke(); // let the track rebuild/republish onto the new handle + previous?.Dispose(); } /// @@ -153,9 +277,15 @@ private void OnAudioRead(float[] data, int channels, int sampleRate) return; } - if ((uint)sampleRate != _expectedSampleRate || (uint)channels != _expectedChannels) + // The native source rejects frames whose rate/channels differ from how it was + // configured (the Rust source does not resample). Unity's reported configuration is + // not always accurate and can change at runtime (e.g. when a Bluetooth headset + // connects), so trust the frame: if it does not match the live source, drop it and + // (re)create the native source to match. + if (!_handleReady || (uint)sampleRate != _liveSampleRate || (uint)channels != _liveChannels) { - Utils.Warning($"{DebugTag} audio frame #{frameIndex} metadata mismatch actualRate={sampleRate} actualChannels={channels} expectedRate={_expectedSampleRate} expectedChannels={_expectedChannels} sourceType={_sourceType}"); + RequestNativeSource((uint)sampleRate, (uint)channels); + return; } var pendingBeforeSend = PendingFrameCount(); diff --git a/Runtime/Scripts/Track.cs b/Runtime/Scripts/Track.cs index 419c25df..3e442322 100644 --- a/Runtime/Scripts/Track.cs +++ b/Runtime/Scripts/Track.cs @@ -85,7 +85,7 @@ public class Track : ITrack // IsOwned is true if C# owns the handle public bool IsOwned => Handle != null && !Handle.IsInvalid; - public readonly FfiHandle Handle; + public FfiHandle Handle { get; private set; } FfiHandle ITrack.TrackHandle => Handle; @@ -104,6 +104,17 @@ internal void UpdateInfo(TrackInfo info) _info = info; } + // Replaces the underlying FFI track handle. Used when a local track is rebuilt because its + // audio source recreated its native handle at a new sample rate/channel count. Disposes + // the previous handle. + internal void SwapHandle(OwnedTrack track) + { + var previous = Handle; + Handle = FfiHandle.FromOwnedHandle(track.Handle); + UpdateInfo(track.Info); + previous?.Dispose(); + } + internal void UpdateMuted(bool muted) { _info.Muted = muted; @@ -118,6 +129,9 @@ internal void DisposeHandles() public sealed class LocalAudioTrack : Track, ILocalTrack, IAudioTrack { RtcAudioSource _source; + string _name; + LocalParticipant _participant; + TrackPublishOptions _publishOptions; IRtcSource ILocalTrack.source { get => _source; } @@ -126,6 +140,17 @@ internal LocalAudioTrack(OwnedTrack track, Room room, RtcAudioSource source) : b } public static LocalAudioTrack CreateAudioTrack(string name, RtcAudioSource source, Room room) + { + var track = new LocalAudioTrack(CreateFfiTrack(name, source), room, source); + track._name = name; + // The track is bound to a specific native source handle at creation time and cannot + // follow a new one in place. If the source recreates its native handle at runtime + // (e.g. on a sample-rate change), rebuild and republish the track onto the new handle. + source.NativeSourceChanged += track.OnNativeSourceChanged; + return track; + } + + private static OwnedTrack CreateFfiTrack(string name, RtcAudioSource source) { using var request = FFIBridge.Instance.NewRequest(); var createTrack = request.request; @@ -134,9 +159,30 @@ public static LocalAudioTrack CreateAudioTrack(string name, RtcAudioSource sourc using var resp = request.Send(); FfiResponse res = resp; - var trackInfo = res.CreateAudioTrack.Track; - var track = new LocalAudioTrack(trackInfo, room, source); - return track; + return res.CreateAudioTrack.Track; + } + + // Records the publish target so the track can republish itself after a source recreation. + internal void RememberPublishTarget(LocalParticipant participant, TrackPublishOptions options) + { + _participant = participant; + _publishOptions = options; + } + + // Runs on the main thread after the source recreated its native handle. Rebuilds the FFI + // track onto the new source and, if the track was already published, republishes it. + private void OnNativeSourceChanged() + { + var wasPublished = _participant != null && !string.IsNullOrEmpty(Sid); + + // Unpublish first (reads the current Sid) before swapping to the new handle. + if (wasPublished) + _participant.UnpublishTrack(this, false); + + SwapHandle(CreateFfiTrack(_name, _source)); + + if (wasPublished) + _participant.PublishTrack(this, _publishOptions); } } diff --git a/Samples~/Meet/Assets/Runtime/MeetManager.cs b/Samples~/Meet/Assets/Runtime/MeetManager.cs index 225c7a0c..6bc31f0e 100644 --- a/Samples~/Meet/Assets/Runtime/MeetManager.cs +++ b/Samples~/Meet/Assets/Runtime/MeetManager.cs @@ -453,8 +453,8 @@ private IEnumerator PublishLocalMicrophone() { if (_audioObjects.ContainsKey(LocalAudioTrackName)) yield break; - Microphone.Start(null, true, 10, 44100); - + // MicrophoneSource starts the device itself (at the resolved sample rate), so we only + // need the device name here. var audioObject = new GameObject($"My Microphone: {Microphone.devices[0]}"); audioObject.transform.SetParent(_audioTrackParent); diff --git a/Tests/PlayMode/Utils/SineWaveAudioSource.cs b/Tests/PlayMode/Utils/SineWaveAudioSource.cs index 907e9ccc..bac6bb11 100644 --- a/Tests/PlayMode/Utils/SineWaveAudioSource.cs +++ b/Tests/PlayMode/Utils/SineWaveAudioSource.cs @@ -31,7 +31,7 @@ public SineWaveAudioSource( int sampleRate = 48000, double frequencyHz = 440.0, float amplitude = 0.1f) - : base(channels, RtcAudioSourceType.AudioSourceCustom) + : base(channels, RtcAudioSourceType.AudioSourceCustom, (uint)sampleRate) { _channels = channels; _sampleRate = sampleRate; From 9965cadaa81630fb76a7bfc1fadb7a82d2ebedc6 Mon Sep 17 00:00:00 2001 From: Max Heimbrock <43608204+MaxHeimbrock@users.noreply.github.com> Date: Thu, 11 Jun 2026 15:23:39 +0200 Subject: [PATCH 2/2] Derive channel count from device config, drop hardcoded hint Replace the base RtcAudioSource constructor's `int channels = 2` hint with two explicit constructors: a device-capture one that takes only the source type and reads both sample rate and channel count from Unity's audio configuration (falling back to the platform defaults), and an explicit one for sources that generate a known fixed format. Either way the format is corrected from the first captured frame, so the initial values are just a starting point. MicrophoneSource and BasicAudioSource now use device mode (no channel hint); BasicAudioSource drops its unused `channels` parameter. SineWaveAudioSource declares its exact (sampleRate, channels). Co-Authored-By: Claude Opus 4.8 (1M context) --- Runtime/Scripts/BasicAudioSource.cs | 7 +++-- Runtime/Scripts/MicrophoneSource.cs | 2 +- Runtime/Scripts/RtcAudioSource.cs | 29 +++++++++++++-------- Tests/PlayMode/Utils/SineWaveAudioSource.cs | 2 +- 4 files changed, 25 insertions(+), 15 deletions(-) diff --git a/Runtime/Scripts/BasicAudioSource.cs b/Runtime/Scripts/BasicAudioSource.cs index 3b63680b..ebe1a713 100644 --- a/Runtime/Scripts/BasicAudioSource.cs +++ b/Runtime/Scripts/BasicAudioSource.cs @@ -19,9 +19,12 @@ sealed public class BasicAudioSource : RtcAudioSource /// Creates a new basic audio source for the given in the scene. /// /// The to capture from. - /// The number of channels to capture. /// The type of audio source. - public BasicAudioSource(AudioSource source, int channels = 2, RtcAudioSourceType sourceType = RtcAudioSourceType.AudioSourceCustom) : base(channels, sourceType) + /// + /// The sample rate and channel count are taken from Unity's audio configuration and + /// adjusted automatically to match the captured audio. + /// + public BasicAudioSource(AudioSource source, RtcAudioSourceType sourceType = RtcAudioSourceType.AudioSourceCustom) : base(sourceType) { _source = source; } diff --git a/Runtime/Scripts/MicrophoneSource.cs b/Runtime/Scripts/MicrophoneSource.cs index 904b8da7..a8775568 100644 --- a/Runtime/Scripts/MicrophoneSource.cs +++ b/Runtime/Scripts/MicrophoneSource.cs @@ -28,7 +28,7 @@ sealed public class MicrophoneSource : RtcAudioSource /// get the list of available devices. /// The GameObject to attach the AudioSource to. The object must be kept in the scene /// for the duration of the source's lifetime. - public MicrophoneSource(string deviceName, GameObject sourceObject) : base(2, RtcAudioSourceType.AudioSourceMicrophone) + public MicrophoneSource(string deviceName, GameObject sourceObject) : base(RtcAudioSourceType.AudioSourceMicrophone) { _deviceName = deviceName; _sourceObject = sourceObject; diff --git a/Runtime/Scripts/RtcAudioSource.cs b/Runtime/Scripts/RtcAudioSource.cs index 62c4d493..ac178c32 100644 --- a/Runtime/Scripts/RtcAudioSource.cs +++ b/Runtime/Scripts/RtcAudioSource.cs @@ -99,23 +99,29 @@ private sealed class PendingAudioFrame private volatile bool _disposed = false; private int _audioReadCount = 0; - protected RtcAudioSource(int channels = 2, RtcAudioSourceType audioSourceType = RtcAudioSourceType.AudioSourceCustom, uint sampleRate = 0) + // Device-capture sources (microphone, AudioSource taps) don't know their format ahead of + // time — it is whatever Unity's audio graph delivers. They use this constructor, which + // reads the device's output configuration up front and then corrects itself from the first + // captured frame (see OnAudioRead). + protected RtcAudioSource(RtcAudioSourceType audioSourceType) + : this(audioSourceType, 0, 0) { } + + // Sources that generate a fixed, known format (e.g. test signal generators) declare it + // directly. Passing 0 for either value falls back to the device configuration. + protected RtcAudioSource(RtcAudioSourceType audioSourceType, uint sampleRate, uint channels) { _sourceType = audioSourceType; - // Sources that know their exact format (e.g. test signal generators) pass it - // explicitly. Capture sources that flow through Unity's audio graph leave sampleRate - // at 0 so we read the device's actual output configuration instead of hardcoding it. uint initialRate; uint initialChannels; - if (sampleRate > 0) + if (sampleRate > 0 && channels > 0) { initialRate = sampleRate; - initialChannels = (uint)channels; + initialChannels = channels; } else { - (initialRate, initialChannels) = ResolveDeviceFormat((uint)channels); + (initialRate, initialChannels) = ResolveDeviceFormat(); } CreateNativeSource(initialRate, initialChannels); @@ -147,14 +153,15 @@ private void CreateNativeSource(uint sampleRate, uint channels) // Reads Unity's actual output audio configuration. The capture path delivers buffers at // the DSP output rate/channel count (see AudioProbe), so this is the format the native - // source must match. Falls back to the platform defaults when Unity cannot report a - // configuration (e.g. batch mode without an audio device). - private (uint sampleRate, uint channels) ResolveDeviceFormat(uint channelHint) + // source must match. Both values are corrected from the first captured frame regardless, + // so this only needs to provide a reasonable starting point; it falls back to the platform + // defaults when Unity cannot report a configuration (e.g. batch mode without an audio device). + private (uint sampleRate, uint channels) ResolveDeviceFormat() { uint sampleRate = _sourceType == RtcAudioSourceType.AudioSourceMicrophone ? DefaultMicrophoneSampleRate : DefaultSampleRate; - uint channels = channelHint; + uint channels = DefaultChannels; try { diff --git a/Tests/PlayMode/Utils/SineWaveAudioSource.cs b/Tests/PlayMode/Utils/SineWaveAudioSource.cs index bac6bb11..2337615b 100644 --- a/Tests/PlayMode/Utils/SineWaveAudioSource.cs +++ b/Tests/PlayMode/Utils/SineWaveAudioSource.cs @@ -31,7 +31,7 @@ public SineWaveAudioSource( int sampleRate = 48000, double frequencyHz = 440.0, float amplitude = 0.1f) - : base(channels, RtcAudioSourceType.AudioSourceCustom, (uint)sampleRate) + : base(RtcAudioSourceType.AudioSourceCustom, (uint)sampleRate, (uint)channels) { _channels = channels; _sampleRate = sampleRate;