diff --git a/samples/cs/LiveAudioTranscription/LiveAudioTranscription.csproj b/samples/cs/LiveAudioTranscription/LiveAudioTranscription.csproj
new file mode 100644
index 00000000..fb1a95a3
--- /dev/null
+++ b/samples/cs/LiveAudioTranscription/LiveAudioTranscription.csproj
@@ -0,0 +1,30 @@
+
+
+
+
+
+
+
+
+
+
+
+
+ Exe
+ net9.0
+ win-x64
+ enable
+ enable
+
+
+
+
+
+
+
+
+
+
+
diff --git a/samples/cs/LiveAudioTranscription/Program.cs b/samples/cs/LiveAudioTranscription/Program.cs
new file mode 100644
index 00000000..c0ecee95
--- /dev/null
+++ b/samples/cs/LiveAudioTranscription/Program.cs
@@ -0,0 +1,169 @@
+// Live Audio Transcription — Foundry Local SDK Example
+//
+// Demonstrates real-time microphone-to-text using:
+// SDK (FoundryLocalManager) → Core (NativeAOT DLL) → onnxruntime-genai (StreamingProcessor)
+//
+// Prerequisites:
+// 1. Nemotron ASR model downloaded to a local cache folder
+// 2. Microsoft.AI.Foundry.Local.Core.dll (built from neutron-server with GenAI 0.13.0+)
+// 3. onnxruntime-genai.dll + onnxruntime.dll + onnxruntime_providers_shared.dll (native GenAI)
+//
+// Usage:
+// dotnet run -- [model-cache-dir]
+// dotnet run -- C:\path\to\models
+
+using Microsoft.AI.Foundry.Local;
+using Microsoft.Extensions.Logging;
+using NAudio.Wave;
+
+// Parse model cache directory from args or use default
+var modelCacheDir = args.Length > 0
+ ? args[0]
+ : Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData),
+ "FoundryLocal", "models");
+
+var coreDllPath = Path.Combine(AppContext.BaseDirectory, "Microsoft.AI.Foundry.Local.Core.dll");
+
+var loggerFactory = LoggerFactory.Create(b => b.AddConsole().SetMinimumLevel(Microsoft.Extensions.Logging.LogLevel.Information));
+var logger = loggerFactory.CreateLogger("LiveAudioTranscription");
+
+Console.WriteLine("===========================================================");
+Console.WriteLine(" Foundry Local -- Live Audio Transcription Demo");
+Console.WriteLine("===========================================================");
+Console.WriteLine();
+Console.WriteLine($" Model cache: {modelCacheDir}");
+Console.WriteLine($" Core DLL: {coreDllPath} (exists: {File.Exists(coreDllPath)})");
+Console.WriteLine();
+
+try
+{
+ // === Step 1: Initialize Foundry Local SDK ===
+ Console.WriteLine("[1/5] Initializing Foundry Local SDK...");
+ var config = new Configuration
+ {
+ AppName = "LiveAudioTranscription",
+ LogLevel = Microsoft.AI.Foundry.Local.LogLevel.Information,
+ ModelCacheDir = modelCacheDir,
+ AdditionalSettings = new Dictionary
+ {
+ { "FoundryLocalCorePath", coreDllPath }
+ }
+ };
+
+ await FoundryLocalManager.CreateAsync(config, logger);
+ Console.WriteLine(" SDK initialized.");
+
+ // === Step 2: Find and load the nemotron ASR model ===
+ Console.WriteLine("[2/5] Loading nemotron model...");
+ var catalog = await FoundryLocalManager.Instance.GetCatalogAsync();
+ var model = await catalog.GetModelAsync("nemotron");
+
+ if (model == null)
+ {
+ Console.WriteLine("ERROR: 'nemotron' not found in catalog.");
+ Console.WriteLine($" Ensure the model is downloaded to: {modelCacheDir}");
+ Console.WriteLine(" The folder should contain genai_config.json, encoder.onnx, decoder.onnx, etc.");
+ return;
+ }
+
+ Console.WriteLine($" Found model: {model.Alias}");
+ await model.LoadAsync();
+ Console.WriteLine(" Model loaded.");
+
+ // === Step 3: Create live transcription session ===
+ Console.WriteLine("[3/5] Creating live transcription session...");
+ var audioClient = await model.GetAudioClientAsync();
+ var session = audioClient.CreateLiveTranscriptionSession();
+ session.Settings.SampleRate = 16000;
+ session.Settings.Channels = 1;
+ session.Settings.Language = "en";
+
+ await session.StartAsync();
+ Console.WriteLine(" Session started (SDK -> Core -> GenAI pipeline active).");
+
+ // === Step 4: Set up microphone + transcription reader ===
+ Console.WriteLine("[4/5] Setting up microphone...");
+
+ // Background task reads transcription results as they arrive
+ var readTask = Task.Run(async () =>
+ {
+ try
+ {
+ await foreach (var result in session.GetTranscriptionStream())
+ {
+ if (result.IsFinal)
+ {
+ Console.WriteLine();
+ Console.WriteLine($" [FINAL] {result.Text}");
+ Console.Out.Flush();
+ }
+ else if (!string.IsNullOrEmpty(result.Text))
+ {
+ Console.ForegroundColor = ConsoleColor.Cyan;
+ Console.Write(result.Text);
+ Console.ResetColor();
+ Console.Out.Flush();
+ }
+ }
+ }
+ catch (OperationCanceledException) { }
+ });
+
+ // Microphone capture via NAudio
+ using var waveIn = new WaveInEvent
+ {
+ WaveFormat = new WaveFormat(rate: 16000, bits: 16, channels: 1),
+ BufferMilliseconds = 100
+ };
+
+ int totalChunks = 0;
+ long totalBytes = 0;
+
+ waveIn.DataAvailable += (sender, e) =>
+ {
+ if (e.BytesRecorded > 0)
+ {
+ _ = session.AppendAsync(new ReadOnlyMemory(e.Buffer, 0, e.BytesRecorded));
+ totalChunks++;
+ totalBytes += e.BytesRecorded;
+ }
+ };
+
+ // === Step 5: Record ===
+ Console.WriteLine();
+ Console.WriteLine("===========================================================");
+ Console.WriteLine(" LIVE TRANSCRIPTION ACTIVE");
+ Console.WriteLine(" Speak into your microphone.");
+ Console.WriteLine(" Transcription appears in real-time (cyan text).");
+ Console.WriteLine(" Press ENTER to stop recording.");
+ Console.WriteLine("===========================================================");
+ Console.WriteLine();
+
+ waveIn.StartRecording();
+ Console.ReadLine();
+ waveIn.StopRecording();
+
+ var totalSeconds = totalBytes / (16000.0 * 2);
+ Console.WriteLine($"\n Recording: {totalSeconds:F1}s | {totalChunks} chunks | {totalBytes / 1024} KB");
+
+ // Stop session (flushes remaining audio through the pipeline)
+ Console.WriteLine("\n[5/5] Stopping session...");
+ await session.StopAsync();
+ await readTask;
+
+ // Unload model
+ await model.UnloadAsync();
+
+ Console.WriteLine();
+ Console.WriteLine("===========================================================");
+ Console.WriteLine(" Demo complete!");
+ Console.WriteLine(" Pipeline: Mic -> NAudio -> SDK -> Core -> GenAI -> Text");
+ Console.WriteLine("===========================================================");
+}
+catch (Exception ex)
+{
+ Console.WriteLine($"\nERROR: {ex.Message}");
+ if (ex.InnerException != null)
+ Console.WriteLine($"Inner: {ex.InnerException.Message}");
+ Console.WriteLine($"\n{ex.StackTrace}");
+}
diff --git a/samples/cs/LiveAudioTranscription/README.md b/samples/cs/LiveAudioTranscription/README.md
new file mode 100644
index 00000000..f4897524
--- /dev/null
+++ b/samples/cs/LiveAudioTranscription/README.md
@@ -0,0 +1,143 @@
+# Live Audio Transcription Demo
+
+Real-time microphone-to-text using Foundry Local SDK, Core, and onnxruntime-genai.
+
+## Architecture
+
+```
+Microphone (NAudio, 16kHz/16-bit/mono)
+ |
+ v
+Foundry Local SDK (C#)
+ | AppendAsync(pcmBytes)
+ v
+Foundry Local Core (NativeAOT DLL)
+ | AppendAudioChunk -> CommitTranscription
+ v
+onnxruntime-genai (StreamingProcessor + Generator)
+ | RNNT encoder + decoder
+ v
+Live transcription text
+```
+
+## Prerequisites
+
+1. **Windows x64** with a microphone
+2. **.NET 9.0 SDK** installed
+3. **Nemotron ASR model** downloaded locally
+4. **Native DLLs** (4 files — see Setup below)
+
+## Setup (Step by Step)
+
+### Step 1: Get the native DLLs
+
+You need 4 DLLs placed in this project folder:
+
+| DLL | Source |
+|-----|--------|
+| `Microsoft.AI.Foundry.Local.Core.dll` | Built from neutron-server (`dotnet publish` with NativeAOT) |
+| `onnxruntime-genai.dll` | Built from onnxruntime-genai (Nenad's StreamingProcessor branch) |
+| `onnxruntime.dll` | Comes with the Core publish output |
+| `onnxruntime_providers_shared.dll` | Comes with the Core publish output |
+
+**Option A: From CI artifacts**
+- Download the Core DLL from the neutron-server CI pipeline artifacts
+- Download the GenAI native DLLs from the onnxruntime-genai pipeline artifacts
+
+**Option B: From a teammate**
+- Ask for the 4 DLLs from someone who has already built them
+
+Copy all 4 DLLs to this folder (`samples/cs/LiveAudioTranscription/`).
+
+### Step 2: Get the Nemotron model
+
+The model should be in a folder with this structure:
+```
+models/
+ nemotron/
+ genai_config.json
+ encoder.onnx
+ decoder.onnx
+ joint.onnx
+ tokenizer.json
+ vocab.txt
+```
+
+### Step 3: Build
+
+```powershell
+cd samples/cs/LiveAudioTranscription
+dotnet build -c Debug
+```
+
+### Step 4: Copy native DLLs to output (if not auto-copied)
+
+```powershell
+Copy-Item onnxruntime-genai.dll bin\Debug\net9.0\win-x64\ -Force
+Copy-Item onnxruntime.dll bin\Debug\net9.0\win-x64\ -Force
+Copy-Item onnxruntime_providers_shared.dll bin\Debug\net9.0\win-x64\ -Force
+Copy-Item Microsoft.AI.Foundry.Local.Core.dll bin\Debug\net9.0\win-x64\ -Force
+```
+
+### Step 5: Run
+
+```powershell
+# Default model cache location
+dotnet run -c Debug --no-build
+
+# Or specify model cache directory
+dotnet run -c Debug --no-build -- C:\path\to\models
+```
+
+### Step 6: Speak!
+
+- The app will show `LIVE TRANSCRIPTION ACTIVE`
+- Speak into your microphone
+- Text appears in **cyan** as you speak
+- Press **ENTER** to stop
+
+## Expected Output
+
+```
+===========================================================
+ Foundry Local -- Live Audio Transcription Demo
+===========================================================
+
+[1/5] Initializing Foundry Local SDK...
+ SDK initialized.
+[2/5] Loading nemotron model...
+ Found model: nemotron
+ Model loaded.
+[3/5] Creating live transcription session...
+ Session started (SDK -> Core -> GenAI pipeline active).
+[4/5] Setting up microphone...
+
+===========================================================
+ LIVE TRANSCRIPTION ACTIVE
+ Speak into your microphone.
+ Transcription appears in real-time (cyan text).
+ Press ENTER to stop recording.
+===========================================================
+
+Hello this is a demo of live audio transcription running entirely on device
+ [FINAL] Hello this is a demo of live audio transcription running entirely on device
+
+ Recording: 15.2s | 152 chunks | 475 KB
+
+[5/5] Stopping session...
+
+===========================================================
+ Demo complete!
+ Pipeline: Mic -> NAudio -> SDK -> Core -> GenAI -> Text
+===========================================================
+```
+
+## Troubleshooting
+
+| Error | Fix |
+|-------|-----|
+| `Core DLL not found` | Copy `Microsoft.AI.Foundry.Local.Core.dll` to project folder |
+| `nemotron not found in catalog` | Check `ModelCacheDir` points to folder containing `nemotron/` with `genai_config.json` |
+| `OgaStreamingProcessor not found` | The `onnxruntime-genai.dll` is old — rebuild from Nenad's branch or get from CI |
+| `No microphone` | Ensure a mic is connected and set as default recording device |
+| `num_mels unknown` | Fix `genai_config.json` — ASR params must be at model level, not nested under `speech` |
diff --git a/sdk_v2/cs/src/AssemblyInfo.cs b/sdk_v2/cs/src/AssemblyInfo.cs
index 9bebe71b..987f9de6 100644
--- a/sdk_v2/cs/src/AssemblyInfo.cs
+++ b/sdk_v2/cs/src/AssemblyInfo.cs
@@ -7,4 +7,5 @@
using System.Runtime.CompilerServices;
[assembly: InternalsVisibleTo("Microsoft.AI.Foundry.Local.Tests")]
+[assembly: InternalsVisibleTo("AudioStreamTest")]
[assembly: InternalsVisibleTo("DynamicProxyGenAssembly2")] // for Mock of ICoreInterop
diff --git a/sdk_v2/cs/src/Detail/CoreInterop.cs b/sdk_v2/cs/src/Detail/CoreInterop.cs
index 8411473b..c5eba7ec 100644
--- a/sdk_v2/cs/src/Detail/CoreInterop.cs
+++ b/sdk_v2/cs/src/Detail/CoreInterop.cs
@@ -158,6 +158,31 @@ private static unsafe partial void CoreExecuteCommandWithCallback(RequestBuffer*
nint callbackPtr, // NativeCallbackFn pointer
nint userData);
+ [LibraryImport(LibraryName, EntryPoint = "execute_command_with_binary")]
+ [UnmanagedCallConv(CallConvs = new[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })]
+ private static unsafe partial void CoreExecuteCommandWithBinary(StreamingRequestBuffer* nativeRequest,
+ ResponseBuffer* nativeResponse);
+
+ // --- Audio streaming P/Invoke imports (kept for future dedicated entry points) ---
+
+ [LibraryImport(LibraryName, EntryPoint = "audio_stream_start")]
+ [UnmanagedCallConv(CallConvs = new[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })]
+ private static unsafe partial void CoreAudioStreamStart(
+ RequestBuffer* request,
+ ResponseBuffer* response);
+
+ [LibraryImport(LibraryName, EntryPoint = "audio_stream_push")]
+ [UnmanagedCallConv(CallConvs = new[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })]
+ private static unsafe partial void CoreAudioStreamPush(
+ StreamingRequestBuffer* request,
+ ResponseBuffer* response);
+
+ [LibraryImport(LibraryName, EntryPoint = "audio_stream_stop")]
+ [UnmanagedCallConv(CallConvs = new[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })]
+ private static unsafe partial void CoreAudioStreamStop(
+ RequestBuffer* request,
+ ResponseBuffer* response);
+
// helper to capture exceptions in callbacks
internal class CallbackHelper
{
@@ -331,4 +356,94 @@ public Task ExecuteCommandWithCallbackAsync(string commandName, CoreIn
return Task.Run(() => ExecuteCommandWithCallback(commandName, commandInput, callback), ct);
}
+ ///
+ /// Marshal a ResponseBuffer from unmanaged memory into a managed Response and free the unmanaged memory.
+ ///
+ private Response MarshalResponse(ResponseBuffer response)
+ {
+ Response result = new();
+
+ if (response.Data != IntPtr.Zero && response.DataLength > 0)
+ {
+ byte[] managedResponse = new byte[response.DataLength];
+ Marshal.Copy(response.Data, managedResponse, 0, response.DataLength);
+ result.Data = System.Text.Encoding.UTF8.GetString(managedResponse);
+ }
+
+ if (response.Error != IntPtr.Zero && response.ErrorLength > 0)
+ {
+ result.Error = Marshal.PtrToStringUTF8(response.Error, response.ErrorLength)!;
+ }
+
+ Marshal.FreeHGlobal(response.Data);
+ Marshal.FreeHGlobal(response.Error);
+
+ return result;
+ }
+
+ // --- Audio streaming managed implementations ---
+ // Route through the existing execute_command / execute_command_with_binary entry points.
+ // The Core handles audio_stream_start / audio_stream_stop as command cases in ExecuteCommandManaged,
+ // and audio_stream_push as a command case in ExecuteCommandWithBinaryManaged.
+
+ public Response StartAudioStream(CoreInteropRequest request)
+ {
+ return ExecuteCommand("audio_stream_start", request);
+ }
+
+ public Response PushAudioData(CoreInteropRequest request, ReadOnlyMemory audioData)
+ {
+ try
+ {
+ var commandInputJson = request.ToJson();
+ byte[] commandBytes = System.Text.Encoding.UTF8.GetBytes("audio_stream_push");
+ byte[] inputBytes = System.Text.Encoding.UTF8.GetBytes(commandInputJson);
+
+ IntPtr commandPtr = Marshal.AllocHGlobal(commandBytes.Length);
+ Marshal.Copy(commandBytes, 0, commandPtr, commandBytes.Length);
+
+ IntPtr inputPtr = Marshal.AllocHGlobal(inputBytes.Length);
+ Marshal.Copy(inputBytes, 0, inputPtr, inputBytes.Length);
+
+ // Pin the managed audio data so GC won't move it during the native call
+ using var audioHandle = audioData.Pin();
+
+ unsafe
+ {
+ var reqBuf = new StreamingRequestBuffer
+ {
+ Command = commandPtr,
+ CommandLength = commandBytes.Length,
+ Data = inputPtr,
+ DataLength = inputBytes.Length,
+ BinaryData = (nint)audioHandle.Pointer,
+ BinaryDataLength = audioData.Length
+ };
+
+ ResponseBuffer response = default;
+
+ try
+ {
+ CoreExecuteCommandWithBinary(&reqBuf, &response);
+ }
+ finally
+ {
+ Marshal.FreeHGlobal(commandPtr);
+ Marshal.FreeHGlobal(inputPtr);
+ }
+
+ return MarshalResponse(response);
+ }
+ }
+ catch (Exception ex) when (ex is not OperationCanceledException)
+ {
+ throw new FoundryLocalException("Error executing audio_stream_push", ex, _logger);
+ }
+ }
+
+ public Response StopAudioStream(CoreInteropRequest request)
+ {
+ return ExecuteCommand("audio_stream_stop", request);
+ }
+
}
diff --git a/sdk_v2/cs/src/Detail/ICoreInterop.cs b/sdk_v2/cs/src/Detail/ICoreInterop.cs
index 1fff9dde..b493dfb7 100644
--- a/sdk_v2/cs/src/Detail/ICoreInterop.cs
+++ b/sdk_v2/cs/src/Detail/ICoreInterop.cs
@@ -51,4 +51,21 @@ Task ExecuteCommandAsync(string commandName, CoreInteropRequest? comma
Task ExecuteCommandWithCallbackAsync(string commandName, CoreInteropRequest? commandInput,
CallbackFn callback,
CancellationToken? ct = null);
+
+ // --- Audio streaming session support ---
+
+ [StructLayout(LayoutKind.Sequential)]
+ protected unsafe struct StreamingRequestBuffer
+ {
+ public nint Command;
+ public int CommandLength;
+ public nint Data; // JSON params
+ public int DataLength;
+ public nint BinaryData; // raw PCM audio bytes
+ public int BinaryDataLength;
+ }
+
+ Response StartAudioStream(CoreInteropRequest request);
+ Response PushAudioData(CoreInteropRequest request, ReadOnlyMemory audioData);
+ Response StopAudioStream(CoreInteropRequest request);
}
diff --git a/sdk_v2/cs/src/Detail/JsonSerializationContext.cs b/sdk_v2/cs/src/Detail/JsonSerializationContext.cs
index 894f9454..9ca3f539 100644
--- a/sdk_v2/cs/src/Detail/JsonSerializationContext.cs
+++ b/sdk_v2/cs/src/Detail/JsonSerializationContext.cs
@@ -33,6 +33,9 @@ namespace Microsoft.AI.Foundry.Local.Detail;
[JsonSerializable(typeof(IList))]
[JsonSerializable(typeof(PropertyDefinition))]
[JsonSerializable(typeof(IList))]
+// --- NEW: Audio streaming types ---
+[JsonSerializable(typeof(LiveAudioTranscriptionResult))]
+[JsonSerializable(typeof(CoreErrorResponse))]
[JsonSourceGenerationOptions(DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
WriteIndented = false)]
internal partial class JsonSerializationContext : JsonSerializerContext
diff --git a/sdk_v2/cs/src/OpenAI/AudioClient.cs b/sdk_v2/cs/src/OpenAI/AudioClient.cs
index 564858f3..e7529284 100644
--- a/sdk_v2/cs/src/OpenAI/AudioClient.cs
+++ b/sdk_v2/cs/src/OpenAI/AudioClient.cs
@@ -8,7 +8,6 @@ namespace Microsoft.AI.Foundry.Local;
using System.Runtime.CompilerServices;
using System.Threading.Channels;
-
using Betalgo.Ranul.OpenAI.ObjectModels.RequestModels;
using Betalgo.Ranul.OpenAI.ObjectModels.ResponseModels;
@@ -46,6 +45,16 @@ public record AudioSettings
///
public AudioSettings Settings { get; } = new();
+ ///
+ /// Create a real-time streaming transcription session.
+ /// Audio data is pushed in as PCM chunks and transcription results are returned as an async stream.
+ ///
+ /// A streaming session that must be disposed when done.
+ public LiveAudioTranscriptionSession CreateLiveTranscriptionSession()
+ {
+ return new LiveAudioTranscriptionSession(_modelId);
+ }
+
///
/// Transcribe audio from a file.
///
@@ -63,28 +72,6 @@ public async Task TranscribeAudioAsync(string
.ConfigureAwait(false);
}
- ///
- /// Transcribe audio from a file with streamed output.
- ///
- ///
- /// Path to file containing audio recording.
- /// Supported formats: mp3
- ///
- /// Cancellation token.
- /// An asynchronous enumerable of transcription responses.
- public async IAsyncEnumerable TranscribeAudioStreamingAsync(
- string audioFilePath, [EnumeratorCancellation] CancellationToken ct)
- {
- var enumerable = Utils.CallWithExceptionHandling(
- () => TranscribeAudioStreamingImplAsync(audioFilePath, ct),
- "Error during streaming audio transcription.", _logger).ConfigureAwait(false);
-
- await foreach (var item in enumerable)
- {
- yield return item;
- }
- }
-
private async Task TranscribeAudioImplAsync(string audioFilePath,
CancellationToken? ct)
{
@@ -108,6 +95,7 @@ private async Task TranscribeAudioImplAsync(st
return output;
}
+
private async IAsyncEnumerable TranscribeAudioStreamingImplAsync(
string audioFilePath, [EnumeratorCancellation] CancellationToken ct)
{
diff --git a/sdk_v2/cs/src/OpenAI/LiveAudioTranscriptionClient.cs b/sdk_v2/cs/src/OpenAI/LiveAudioTranscriptionClient.cs
new file mode 100644
index 00000000..0c9e6477
--- /dev/null
+++ b/sdk_v2/cs/src/OpenAI/LiveAudioTranscriptionClient.cs
@@ -0,0 +1,405 @@
+// --------------------------------------------------------------------------------------------------------------------
+//
+// Copyright (c) Microsoft. All rights reserved.
+//
+// --------------------------------------------------------------------------------------------------------------------
+
+namespace Microsoft.AI.Foundry.Local;
+
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Globalization;
+using System.Threading.Channels;
+using Microsoft.AI.Foundry.Local.Detail;
+using Microsoft.Extensions.Logging;
+
+
+///
+/// Session for real-time audio streaming ASR (Automatic Speech Recognition).
+/// Audio data from a microphone (or other source) is pushed in as PCM chunks,
+/// and transcription results are returned as an async stream.
+///
+/// Created via .
+///
+/// Thread safety: PushAudioAsync can be called from any thread (including high-frequency
+/// audio callbacks). Pushes are internally serialized via a bounded channel to prevent
+/// unbounded memory growth and ensure ordering.
+///
+
+
+public sealed class LiveAudioTranscriptionSession : IAsyncDisposable
+{
+ private readonly string _modelId;
+ private readonly ICoreInterop _coreInterop = FoundryLocalManager.Instance.CoreInterop;
+ private readonly ILogger _logger = FoundryLocalManager.Instance.Logger;
+
+ // Session state — protected by _lock
+ private readonly AsyncLock _lock = new();
+ private string? _sessionHandle;
+ private bool _started;
+ private bool _stopped;
+
+ // Output channel: native callback writes, user reads via GetTranscriptionStream
+ private Channel? _outputChannel;
+
+ // Internal push queue: user writes audio chunks, background loop drains to native core.
+ // Bounded to prevent unbounded memory growth if native core is slower than real-time.
+ private Channel>? _pushChannel;
+ private Task? _pushLoopTask;
+
+ // Dedicated CTS for the push loop — decoupled from StartAsync's caller token.
+ // Cancelled only during StopAsync/DisposeAsync to allow clean drain.
+ private CancellationTokenSource? _sessionCts;
+
+ // Snapshot of settings captured at StartAsync — prevents mutation after session starts.
+ private LiveAudioTranscriptionOptions? _activeSettings;
+
+ ///
+ /// Audio format settings for the streaming session.
+ /// Must be configured before calling .
+ /// Settings are frozen once the session starts.
+ ///
+ public record LiveAudioTranscriptionOptions
+ {
+ /// PCM sample rate in Hz. Default: 16000.
+ public int SampleRate { get; set; } = 16000;
+
+ /// Number of audio channels. Default: 1 (mono).
+ public int Channels { get; set; } = 1;
+
+ /// Optional BCP-47 language hint (e.g., "en", "zh").
+ public string? Language { get; set; }
+
+ ///
+ /// Maximum number of audio chunks buffered in the internal push queue.
+ /// If the queue is full, AppendAsync will asynchronously wait.
+ /// Default: 100 (~3 seconds of audio at typical chunk sizes).
+ ///
+ public int PushQueueCapacity { get; set; } = 100;
+
+ internal LiveAudioTranscriptionOptions Snapshot() => this with { }; // record copy
+ }
+
+ public LiveAudioTranscriptionOptions Settings { get; } = new();
+
+ internal LiveAudioTranscriptionSession(string modelId)
+ {
+ _modelId = modelId;
+ }
+
+ ///
+ /// Start a real-time audio streaming session.
+ /// Must be called before or .
+ /// Settings are frozen after this call.
+ ///
+ /// Cancellation token.
+ public async Task StartAsync(CancellationToken ct = default)
+ {
+ using var disposable = await _lock.LockAsync().ConfigureAwait(false);
+
+ if (_started)
+ {
+ throw new FoundryLocalException("Streaming session already started. Call StopAsync first.");
+ }
+
+ // Freeze settings
+ _activeSettings = Settings.Snapshot();
+
+ _outputChannel = Channel.CreateUnbounded(
+ new UnboundedChannelOptions
+ {
+ SingleWriter = true, // only the native callback writes
+ SingleReader = true,
+ AllowSynchronousContinuations = true
+ });
+
+ _pushChannel = Channel.CreateBounded>(
+ new BoundedChannelOptions(_activeSettings.PushQueueCapacity)
+ {
+ SingleReader = true, // only the push loop reads
+ SingleWriter = false, // multiple threads may push audio data
+ FullMode = BoundedChannelFullMode.Wait
+ });
+
+ var request = new CoreInteropRequest
+ {
+ Params = new Dictionary
+ {
+ { "Model", _modelId },
+ { "SampleRate", _activeSettings.SampleRate.ToString(CultureInfo.InvariantCulture) },
+ { "Channels", _activeSettings.Channels.ToString(CultureInfo.InvariantCulture) },
+ }
+ };
+
+ if (_activeSettings.Language != null)
+ {
+ request.Params["Language"] = _activeSettings.Language;
+ }
+
+ // StartAudioStream uses existing execute_command entry point — synchronous P/Invoke
+ var response = await Task.Run(
+ () => _coreInterop.StartAudioStream(request), ct)
+ .ConfigureAwait(false);
+
+ if (response.Error != null)
+ {
+ _outputChannel.Writer.TryComplete();
+ throw new FoundryLocalException(
+ $"Error starting audio stream session: {response.Error}", _logger);
+ }
+
+ _sessionHandle = response.Data
+ ?? throw new FoundryLocalException("Native core did not return a session handle.", _logger);
+ _started = true;
+ _stopped = false;
+
+ // Use a dedicated CTS for the push loop — NOT the caller's ct.
+#pragma warning disable IDISP003 // Dispose previous before re-assigning
+ _sessionCts = new CancellationTokenSource();
+#pragma warning restore IDISP003
+#pragma warning disable IDISP013 // Await in using
+ _pushLoopTask = Task.Run(() => PushLoopAsync(_sessionCts.Token), CancellationToken.None);
+#pragma warning restore IDISP013
+ }
+
+ ///
+ /// Push a chunk of raw PCM audio data to the streaming session.
+ /// Can be called from any thread (including audio device callbacks).
+ /// Chunks are internally queued and serialized to the native core.
+ ///
+ /// Raw PCM audio bytes matching the configured format.
+ /// Cancellation token.
+ public async ValueTask AppendAsync(ReadOnlyMemory pcmData, CancellationToken ct = default)
+ {
+ if (!_started || _stopped)
+ {
+ throw new FoundryLocalException("No active streaming session. Call StartAsync first.");
+ }
+
+ // Copy the data to avoid issues if the caller reuses the buffer (e.g. NAudio reuses e.Buffer)
+ var copy = new byte[pcmData.Length];
+ pcmData.CopyTo(copy);
+
+ await _pushChannel!.Writer.WriteAsync(copy, ct).ConfigureAwait(false);
+ }
+
+ ///
+ /// Internal loop that drains the push queue and sends chunks to native core one at a time.
+ /// Implements retry for transient native errors and terminates the session on permanent failures.
+ ///
+ private async Task PushLoopAsync(CancellationToken ct)
+ {
+ const int maxRetries = 3;
+ var initialRetryDelay = TimeSpan.FromMilliseconds(50);
+
+ try
+ {
+ await foreach (var audioData in _pushChannel!.Reader.ReadAllAsync(ct).ConfigureAwait(false))
+ {
+ var request = new CoreInteropRequest
+ {
+ Params = new Dictionary { { "SessionHandle", _sessionHandle! } }
+ };
+
+ var pushed = false;
+ for (int attempt = 0; attempt <= maxRetries && !pushed; attempt++)
+ {
+ var response = _coreInterop.PushAudioData(request, audioData);
+
+ if (response.Error == null)
+ {
+ pushed = true;
+
+ // Parse transcription result from push response and surface it
+ if (!string.IsNullOrEmpty(response.Data))
+ {
+ try
+ {
+ var transcription = LiveAudioTranscriptionResult.FromJson(response.Data);
+ if (!string.IsNullOrEmpty(transcription.Text))
+ {
+ _outputChannel?.Writer.TryWrite(transcription);
+ }
+ }
+ catch (Exception parseEx)
+ {
+ // Non-fatal: log and continue if response isn't a transcription result
+ _logger.LogDebug(parseEx, "Could not parse push response as transcription result");
+ }
+ }
+
+ continue;
+ }
+
+ // Parse structured error to determine transient vs permanent
+ var errorInfo = CoreErrorResponse.TryParse(response.Error);
+
+ if (errorInfo?.IsTransient == true && attempt < maxRetries)
+ {
+ var delay = initialRetryDelay * Math.Pow(2, attempt);
+ _logger.LogWarning(
+ "Transient push error (attempt {Attempt}/{Max}): {Code}. Retrying in {Delay}ms",
+ attempt + 1, maxRetries, errorInfo.Code, delay.TotalMilliseconds);
+ await Task.Delay(delay, ct).ConfigureAwait(false);
+ continue;
+ }
+
+ // Permanent error or retries exhausted — terminate the session
+ var fatalEx = new FoundryLocalException(
+ $"Push failed permanently (code={errorInfo?.Code ?? "UNKNOWN"}): {response.Error}",
+ _logger);
+ _logger.LogError("Terminating push loop due to permanent push failure: {Error}",
+ response.Error);
+ _outputChannel?.Writer.TryComplete(fatalEx);
+ return; // exit push loop
+ }
+ }
+ }
+ catch (OperationCanceledException)
+ {
+ // Expected on cancellation — push loop exits cleanly
+ }
+ catch (Exception ex)
+ {
+ _logger.LogError(ex, "Push loop terminated with unexpected error");
+ _outputChannel?.Writer.TryComplete(
+ new FoundryLocalException("Push loop terminated unexpectedly.", ex, _logger));
+ }
+ }
+
+ ///
+ /// Get the async stream of transcription results.
+ /// Results arrive as the native ASR engine processes audio data.
+ ///
+ /// Cancellation token.
+ /// Async enumerable of transcription results.
+ public async IAsyncEnumerable GetTranscriptionStream(
+ [EnumeratorCancellation] CancellationToken ct = default)
+ {
+ if (_outputChannel == null)
+ {
+ throw new FoundryLocalException("No active streaming session. Call StartAsync first.");
+ }
+
+ await foreach (var item in _outputChannel.Reader.ReadAllAsync(ct).ConfigureAwait(false))
+ {
+ yield return item;
+ }
+ }
+
+ ///
+ /// Signal end-of-audio and stop the streaming session.
+ /// Any remaining buffered audio in the push queue will be drained to native core first.
+ /// Final results are delivered through before it completes.
+ ///
+ /// Cancellation token.
+ public async Task StopAsync(CancellationToken ct = default)
+ {
+ using var disposable = await _lock.LockAsync().ConfigureAwait(false);
+
+ if (!_started || _stopped)
+ {
+ return; // already stopped or never started
+ }
+
+ _stopped = true;
+
+ // 1. Complete the push channel so the push loop drains remaining items and exits
+ _pushChannel?.Writer.TryComplete();
+
+ // 2. Wait for the push loop to finish draining
+ if (_pushLoopTask != null)
+ {
+ await _pushLoopTask.ConfigureAwait(false);
+ }
+
+ // 3. Cancel the session CTS (no-op if push loop already exited)
+ _sessionCts?.Cancel();
+
+ // 4. Tell native core to flush and finalize.
+ // This MUST happen even if ct is cancelled — otherwise native session leaks.
+ var request = new CoreInteropRequest
+ {
+ Params = new Dictionary { { "SessionHandle", _sessionHandle! } }
+ };
+
+ ICoreInterop.Response? response = null;
+ try
+ {
+ response = await Task.Run(
+ () => _coreInterop.StopAudioStream(request), ct)
+ .ConfigureAwait(false);
+ }
+ catch (OperationCanceledException) when (ct.IsCancellationRequested)
+ {
+ // ct fired, but we MUST still stop the native session to avoid a leak.
+ _logger.LogWarning("StopAsync cancelled — performing best-effort native session stop.");
+ try
+ {
+ response = await Task.Run(
+ () => _coreInterop.StopAudioStream(request))
+ .ConfigureAwait(false);
+ }
+ catch (Exception cleanupEx)
+ {
+ _logger.LogError(cleanupEx, "Best-effort native session stop failed.");
+ }
+
+ throw; // Re-throw the cancellation after cleanup
+ }
+ finally
+ {
+ // Parse final transcription from stop response before completing the channel
+ if (response?.Data != null)
+ {
+ try
+ {
+ var finalResult = LiveAudioTranscriptionResult.FromJson(response.Data);
+ if (!string.IsNullOrEmpty(finalResult.Text))
+ {
+ _outputChannel?.Writer.TryWrite(finalResult);
+ }
+ }
+ catch (Exception parseEx)
+ {
+ _logger.LogDebug(parseEx, "Could not parse stop response as transcription result");
+ }
+ }
+
+ _sessionHandle = null;
+ _started = false;
+ _sessionCts?.Dispose();
+ _sessionCts = null;
+
+ // Complete the output channel AFTER writing final result
+ _outputChannel?.Writer.TryComplete();
+ }
+
+ if (response?.Error != null)
+ {
+ throw new FoundryLocalException(
+ $"Error stopping audio stream session: {response.Error}", _logger);
+ }
+ }
+
+ public async ValueTask DisposeAsync()
+ {
+ try
+ {
+ if (_started && !_stopped)
+ {
+ await StopAsync().ConfigureAwait(false);
+ }
+ }
+ catch (Exception ex)
+ {
+ // DisposeAsync must never throw — log and swallow
+ _logger.LogWarning(ex, "Error during DisposeAsync cleanup.");
+ }
+ finally
+ {
+ _sessionCts?.Dispose();
+ _lock.Dispose();
+ }
+ }
+}
\ No newline at end of file
diff --git a/sdk_v2/cs/src/OpenAI/LiveAudioTranscriptionTypes.cs b/sdk_v2/cs/src/OpenAI/LiveAudioTranscriptionTypes.cs
new file mode 100644
index 00000000..33820836
--- /dev/null
+++ b/sdk_v2/cs/src/OpenAI/LiveAudioTranscriptionTypes.cs
@@ -0,0 +1,74 @@
+namespace Microsoft.AI.Foundry.Local;
+
+using System.Text.Json;
+using System.Text.Json.Serialization;
+using Microsoft.AI.Foundry.Local.Detail;
+
+public record LiveAudioTranscriptionResult
+{
+ ///
+ /// Whether this is a final or partial (interim) result.
+ /// - Nemotron models always return true (every result is final).
+ /// - Other models (e.g., Azure Embedded) may return false for interim
+ /// hypotheses that will be replaced by a subsequent final result.
+ ///
+ [JsonPropertyName("is_final")]
+ public bool IsFinal { get; init; }
+
+ ///
+ /// Newly transcribed text from this audio chunk only (incremental hypothesis).
+ /// This is NOT the full accumulated transcript — each result contains only
+ /// the text decoded from the most recent audio chunk.
+ ///
+ [JsonPropertyName("text")]
+ public string Text { get; init; } = string.Empty;
+
+ /// Start time offset of this segment in the audio stream (seconds).
+ [JsonPropertyName("start_time")]
+ public double? StartTime { get; init; }
+
+ /// End time offset of this segment in the audio stream (seconds).
+ [JsonPropertyName("end_time")]
+ public double? EndTime { get; init; }
+
+ /// Confidence score (0.0 - 1.0) if available.
+ [JsonPropertyName("confidence")]
+ public float? Confidence { get; init; }
+
+ internal static LiveAudioTranscriptionResult FromJson(string json)
+ {
+ return JsonSerializer.Deserialize(json,
+ JsonSerializationContext.Default.LiveAudioTranscriptionResult)
+ ?? throw new FoundryLocalException("Failed to deserialize LiveAudioTranscriptionResult");
+ }
+}
+
+internal record CoreErrorResponse
+{
+ [JsonPropertyName("code")]
+ public string Code { get; init; } = "";
+
+ [JsonPropertyName("message")]
+ public string Message { get; init; } = "";
+
+ [JsonPropertyName("isTransient")]
+ public bool IsTransient { get; init; }
+
+ ///
+ /// Attempt to parse a native error string as structured JSON.
+ /// Returns null if the error is not valid JSON or doesn't match the schema,
+ /// which should be treated as a permanent/unknown error.
+ ///
+ internal static CoreErrorResponse? TryParse(string errorString)
+ {
+ try
+ {
+ return JsonSerializer.Deserialize(errorString,
+ JsonSerializationContext.Default.CoreErrorResponse);
+ }
+ catch
+ {
+ return null; // unstructured error — treat as permanent
+ }
+ }
+}
\ No newline at end of file
diff --git a/sdk_v2/cs/test/FoundryLocal.Tests/ModelTests.cs b/sdk_v2/cs/test/FoundryLocal.Tests/ModelTests.cs
index b5a49657..0e2ea1dc 100644
--- a/sdk_v2/cs/test/FoundryLocal.Tests/ModelTests.cs
+++ b/sdk_v2/cs/test/FoundryLocal.Tests/ModelTests.cs
@@ -52,3 +52,5 @@ public async Task GetLastestVersion_Works()
await Assert.That(latestB).IsEqualTo(variants[1]);
}
}
+
+
diff --git a/sdk_v2/cs/test/FoundryLocal.Tests/Utils.cs b/sdk_v2/cs/test/FoundryLocal.Tests/Utils.cs
index 55808da9..6b71921a 100644
--- a/sdk_v2/cs/test/FoundryLocal.Tests/Utils.cs
+++ b/sdk_v2/cs/test/FoundryLocal.Tests/Utils.cs
@@ -1,452 +1,74 @@
-// --------------------------------------------------------------------------------------------------------------------
-//
-// Copyright (c) Microsoft. All rights reserved.
-//
-// --------------------------------------------------------------------------------------------------------------------
-
-namespace Microsoft.AI.Foundry.Local.Tests;
-
-using System;
-using System.Collections.Generic;
-using System.Runtime.CompilerServices;
-using System.Text.Json;
-
-using Microsoft.AI.Foundry.Local.Detail;
-using Microsoft.Extensions.Configuration;
+using Microsoft.AI.Foundry.Local;
using Microsoft.Extensions.Logging;
-using Microsoft.VisualStudio.TestPlatform.TestHost;
+var loggerFactory = LoggerFactory.Create(b => b.AddConsole().SetMinimumLevel(LogLevel.Debug));
+var logger = loggerFactory.CreateLogger("AudioStreamTest");
-using Moq;
+// Point to the directory containing Core + ORT DLLs
+var corePath = @"C:\Users\ruiren\Desktop\audio-stream-test\Microsoft.AI.Foundry.Local.Core.dll";
-internal static class Utils
+var config = new Configuration
{
- internal struct TestCatalogInfo
- {
- internal readonly List TestCatalog { get; }
- internal readonly string ModelListJson { get; }
-
- internal TestCatalogInfo(bool includeCuda)
- {
-
- TestCatalog = Utils.BuildTestCatalog(includeCuda);
- ModelListJson = JsonSerializer.Serialize(TestCatalog, JsonSerializationContext.Default.ListModelInfo);
- }
- }
-
- internal static readonly TestCatalogInfo TestCatalog = new(true);
-
- [Before(Assembly)]
- public static void AssemblyInit(AssemblyHookContext _)
- {
- using var loggerFactory = LoggerFactory.Create(builder =>
- {
- builder
- .AddConsole()
- .SetMinimumLevel(LogLevel.Debug);
- });
-
- ILogger logger = loggerFactory.CreateLogger();
-
- // Read configuration from appsettings.Test.json
- logger.LogDebug("Reading configuration from appsettings.Test.json");
- var configuration = new ConfigurationBuilder()
- .SetBasePath(Directory.GetCurrentDirectory())
- .AddJsonFile("appsettings.Test.json", optional: true, reloadOnChange: false)
- .Build();
-
- var testModelCacheDirName = "test-data-shared";
- string testDataSharedPath;
- if (Path.IsPathRooted(testModelCacheDirName) ||
- testModelCacheDirName.Contains(Path.DirectorySeparatorChar) ||
- testModelCacheDirName.Contains(Path.AltDirectorySeparatorChar))
- {
- // It's a relative or complete filepath, resolve from current directory
- testDataSharedPath = Path.GetFullPath(testModelCacheDirName);
- }
- else
- {
- // It's just a directory name, combine with repo root parent
- testDataSharedPath = Path.GetFullPath(Path.Combine(GetRepoRoot(), "..", testModelCacheDirName));
- }
-
- logger.LogInformation("Using test model cache directory: {testDataSharedPath}", testDataSharedPath);
-
- if (!Directory.Exists(testDataSharedPath))
- {
- throw new DirectoryNotFoundException($"Test model cache directory does not exist: {testDataSharedPath}");
-
- }
-
- var config = new Configuration
- {
- AppName = "FoundryLocalSdkTest",
- LogLevel = Local.LogLevel.Debug,
- Web = new Configuration.WebService
- {
- Urls = "http://127.0.0.1:0"
- },
- ModelCacheDir = testDataSharedPath,
- LogsDir = Path.Combine(GetRepoRoot(), "sdk_v2", "cs", "logs")
- };
-
- // Initialize the singleton instance.
- FoundryLocalManager.CreateAsync(config, logger).GetAwaiter().GetResult();
-
- // standalone instance for testing individual components that skips the 'initialize' command
- CoreInterop = new CoreInterop(logger);
- }
-
- internal static ICoreInterop CoreInterop { get; private set; } = default!;
-
- internal static Mock CreateCapturingLoggerMock(List sink)
- {
- var mock = new Mock();
- mock.Setup(x => x.Log(
- It.IsAny(),
- It.IsAny(),
- It.IsAny(),
- It.IsAny(),
- (Func)It.IsAny