Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions samples/cs/LiveAudioTranscription/LiveAudioTranscription.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
<Project Sdk="Microsoft.NET.Sdk">

<ItemGroup>
<ProjectReference Include="..\..\..\sdk_v2\cs\src\Microsoft.AI.Foundry.Local.csproj" />
</ItemGroup>

<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="9.0.9" />
<PackageReference Include="NAudio" Version="2.2.1" />
</ItemGroup>

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net9.0</TargetFramework>
<RuntimeIdentifier>win-x64</RuntimeIdentifier>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>

<!-- Copy Core + ORT native DLLs to output directory.
These must be placed in the project root before building.
See README.md for instructions. -->
<ItemGroup>
<None Include="Microsoft.AI.Foundry.Local.Core.dll" CopyToOutputDirectory="PreserveNewest" Condition="Exists('Microsoft.AI.Foundry.Local.Core.dll')" />
<None Include="onnxruntime-genai.dll" CopyToOutputDirectory="PreserveNewest" Condition="Exists('onnxruntime-genai.dll')" />
<None Include="onnxruntime.dll" CopyToOutputDirectory="PreserveNewest" Condition="Exists('onnxruntime.dll')" />
<None Include="onnxruntime_providers_shared.dll" CopyToOutputDirectory="PreserveNewest" Condition="Exists('onnxruntime_providers_shared.dll')" />
</ItemGroup>

</Project>
169 changes: 169 additions & 0 deletions samples/cs/LiveAudioTranscription/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
// Live Audio Transcription — Foundry Local SDK Example
//
// Demonstrates real-time microphone-to-text using:
// SDK (FoundryLocalManager) → Core (NativeAOT DLL) → onnxruntime-genai (StreamingProcessor)
//
// Prerequisites:
// 1. Nemotron ASR model downloaded to a local cache folder
// 2. Microsoft.AI.Foundry.Local.Core.dll (built from neutron-server with GenAI 0.13.0+)
// 3. onnxruntime-genai.dll + onnxruntime.dll + onnxruntime_providers_shared.dll (native GenAI)
//
// Usage:
// dotnet run -- [model-cache-dir]
// dotnet run -- C:\path\to\models

using Microsoft.AI.Foundry.Local;
using Microsoft.Extensions.Logging;
using NAudio.Wave;

// Parse model cache directory from args or use default
var modelCacheDir = args.Length > 0
? args[0]
: Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData),
"FoundryLocal", "models");

var coreDllPath = Path.Combine(AppContext.BaseDirectory, "Microsoft.AI.Foundry.Local.Core.dll");

var loggerFactory = LoggerFactory.Create(b => b.AddConsole().SetMinimumLevel(Microsoft.Extensions.Logging.LogLevel.Information));
var logger = loggerFactory.CreateLogger("LiveAudioTranscription");

Console.WriteLine("===========================================================");
Console.WriteLine(" Foundry Local -- Live Audio Transcription Demo");
Console.WriteLine("===========================================================");
Console.WriteLine();
Console.WriteLine($" Model cache: {modelCacheDir}");
Console.WriteLine($" Core DLL: {coreDllPath} (exists: {File.Exists(coreDllPath)})");
Console.WriteLine();

try
{
// === Step 1: Initialize Foundry Local SDK ===
Console.WriteLine("[1/5] Initializing Foundry Local SDK...");
var config = new Configuration
{
AppName = "LiveAudioTranscription",
LogLevel = Microsoft.AI.Foundry.Local.LogLevel.Information,
ModelCacheDir = modelCacheDir,
AdditionalSettings = new Dictionary<string, string>
{
{ "FoundryLocalCorePath", coreDllPath }
}
};

await FoundryLocalManager.CreateAsync(config, logger);
Console.WriteLine(" SDK initialized.");

// === Step 2: Find and load the nemotron ASR model ===
Console.WriteLine("[2/5] Loading nemotron model...");
var catalog = await FoundryLocalManager.Instance.GetCatalogAsync();
var model = await catalog.GetModelAsync("nemotron");

if (model == null)
{
Console.WriteLine("ERROR: 'nemotron' not found in catalog.");
Console.WriteLine($" Ensure the model is downloaded to: {modelCacheDir}");
Console.WriteLine(" The folder should contain genai_config.json, encoder.onnx, decoder.onnx, etc.");
return;
}

Console.WriteLine($" Found model: {model.Alias}");
await model.LoadAsync();
Console.WriteLine(" Model loaded.");

// === Step 3: Create live transcription session ===
Console.WriteLine("[3/5] Creating live transcription session...");
var audioClient = await model.GetAudioClientAsync();
var session = audioClient.CreateLiveTranscriptionSession();
session.Settings.SampleRate = 16000;
session.Settings.Channels = 1;
session.Settings.Language = "en";

await session.StartAsync();
Console.WriteLine(" Session started (SDK -> Core -> GenAI pipeline active).");

// === Step 4: Set up microphone + transcription reader ===
Console.WriteLine("[4/5] Setting up microphone...");

// Background task reads transcription results as they arrive
var readTask = Task.Run(async () =>
{
try
{
await foreach (var result in session.GetTranscriptionStream())
{
if (result.IsFinal)
{
Console.WriteLine();
Console.WriteLine($" [FINAL] {result.Text}");
Console.Out.Flush();
}
else if (!string.IsNullOrEmpty(result.Text))
{
Console.ForegroundColor = ConsoleColor.Cyan;
Console.Write(result.Text);
Console.ResetColor();
Console.Out.Flush();
}
}
}
catch (OperationCanceledException) { }
});

// Microphone capture via NAudio
using var waveIn = new WaveInEvent
{
WaveFormat = new WaveFormat(rate: 16000, bits: 16, channels: 1),
BufferMilliseconds = 100
};

int totalChunks = 0;
long totalBytes = 0;

waveIn.DataAvailable += (sender, e) =>
{
if (e.BytesRecorded > 0)
{
_ = session.AppendAsync(new ReadOnlyMemory<byte>(e.Buffer, 0, e.BytesRecorded));
totalChunks++;
totalBytes += e.BytesRecorded;
}
};

// === Step 5: Record ===
Console.WriteLine();
Console.WriteLine("===========================================================");
Console.WriteLine(" LIVE TRANSCRIPTION ACTIVE");
Console.WriteLine(" Speak into your microphone.");
Console.WriteLine(" Transcription appears in real-time (cyan text).");
Console.WriteLine(" Press ENTER to stop recording.");
Console.WriteLine("===========================================================");
Console.WriteLine();

waveIn.StartRecording();
Console.ReadLine();
waveIn.StopRecording();

var totalSeconds = totalBytes / (16000.0 * 2);
Console.WriteLine($"\n Recording: {totalSeconds:F1}s | {totalChunks} chunks | {totalBytes / 1024} KB");

// Stop session (flushes remaining audio through the pipeline)
Console.WriteLine("\n[5/5] Stopping session...");
await session.StopAsync();
await readTask;

// Unload model
await model.UnloadAsync();

Console.WriteLine();
Console.WriteLine("===========================================================");
Console.WriteLine(" Demo complete!");
Console.WriteLine(" Pipeline: Mic -> NAudio -> SDK -> Core -> GenAI -> Text");
Console.WriteLine("===========================================================");
}
catch (Exception ex)
{
Console.WriteLine($"\nERROR: {ex.Message}");
if (ex.InnerException != null)
Console.WriteLine($"Inner: {ex.InnerException.Message}");
Console.WriteLine($"\n{ex.StackTrace}");
}
143 changes: 143 additions & 0 deletions samples/cs/LiveAudioTranscription/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
# Live Audio Transcription Demo

Real-time microphone-to-text using Foundry Local SDK, Core, and onnxruntime-genai.

## Architecture

```
Microphone (NAudio, 16kHz/16-bit/mono)
|
v
Foundry Local SDK (C#)
| AppendAsync(pcmBytes)
v
Foundry Local Core (NativeAOT DLL)
| AppendAudioChunk -> CommitTranscription
v
onnxruntime-genai (StreamingProcessor + Generator)
| RNNT encoder + decoder
v
Live transcription text
```

## Prerequisites

1. **Windows x64** with a microphone
2. **.NET 9.0 SDK** installed
3. **Nemotron ASR model** downloaded locally
4. **Native DLLs** (4 files — see Setup below)

## Setup (Step by Step)

### Step 1: Get the native DLLs

You need 4 DLLs placed in this project folder:

| DLL | Source |
|-----|--------|
| `Microsoft.AI.Foundry.Local.Core.dll` | Built from neutron-server (`dotnet publish` with NativeAOT) |
| `onnxruntime-genai.dll` | Built from onnxruntime-genai (Nenad's StreamingProcessor branch) |
| `onnxruntime.dll` | Comes with the Core publish output |
| `onnxruntime_providers_shared.dll` | Comes with the Core publish output |

**Option A: From CI artifacts**
- Download the Core DLL from the neutron-server CI pipeline artifacts
- Download the GenAI native DLLs from the onnxruntime-genai pipeline artifacts

**Option B: From a teammate**
- Ask for the 4 DLLs from someone who has already built them

Copy all 4 DLLs to this folder (`samples/cs/LiveAudioTranscription/`).

### Step 2: Get the Nemotron model

The model should be in a folder with this structure:
```
models/
nemotron/
genai_config.json
encoder.onnx
decoder.onnx
joint.onnx
tokenizer.json
vocab.txt
```

### Step 3: Build

```powershell
cd samples/cs/LiveAudioTranscription
dotnet build -c Debug
```

### Step 4: Copy native DLLs to output (if not auto-copied)

```powershell
Copy-Item onnxruntime-genai.dll bin\Debug\net9.0\win-x64\ -Force
Copy-Item onnxruntime.dll bin\Debug\net9.0\win-x64\ -Force
Copy-Item onnxruntime_providers_shared.dll bin\Debug\net9.0\win-x64\ -Force
Copy-Item Microsoft.AI.Foundry.Local.Core.dll bin\Debug\net9.0\win-x64\ -Force
```

### Step 5: Run

```powershell
# Default model cache location
dotnet run -c Debug --no-build

# Or specify model cache directory
dotnet run -c Debug --no-build -- C:\path\to\models
```

### Step 6: Speak!

- The app will show `LIVE TRANSCRIPTION ACTIVE`
- Speak into your microphone
- Text appears in **cyan** as you speak
- Press **ENTER** to stop

## Expected Output

```
===========================================================
Foundry Local -- Live Audio Transcription Demo
===========================================================

[1/5] Initializing Foundry Local SDK...
SDK initialized.
[2/5] Loading nemotron model...
Found model: nemotron
Model loaded.
[3/5] Creating live transcription session...
Session started (SDK -> Core -> GenAI pipeline active).
[4/5] Setting up microphone...

===========================================================
LIVE TRANSCRIPTION ACTIVE
Speak into your microphone.
Transcription appears in real-time (cyan text).
Press ENTER to stop recording.
===========================================================

Hello this is a demo of live audio transcription running entirely on device
[FINAL] Hello this is a demo of live audio transcription running entirely on device

Recording: 15.2s | 152 chunks | 475 KB

[5/5] Stopping session...

===========================================================
Demo complete!
Pipeline: Mic -> NAudio -> SDK -> Core -> GenAI -> Text
===========================================================
```

## Troubleshooting

| Error | Fix |
|-------|-----|
| `Core DLL not found` | Copy `Microsoft.AI.Foundry.Local.Core.dll` to project folder |
| `nemotron not found in catalog` | Check `ModelCacheDir` points to folder containing `nemotron/` with `genai_config.json` |
| `OgaStreamingProcessor not found` | The `onnxruntime-genai.dll` is old — rebuild from Nenad's branch or get from CI |
| `No microphone` | Ensure a mic is connected and set as default recording device |
| `num_mels unknown` | Fix `genai_config.json` — ASR params must be at model level, not nested under `speech` |
1 change: 1 addition & 0 deletions sdk_v2/cs/src/AssemblyInfo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@
using System.Runtime.CompilerServices;

[assembly: InternalsVisibleTo("Microsoft.AI.Foundry.Local.Tests")]
[assembly: InternalsVisibleTo("AudioStreamTest")]
[assembly: InternalsVisibleTo("DynamicProxyGenAssembly2")] // for Mock of ICoreInterop
Comment on lines +10 to 11
Loading
Loading