diff --git a/examples/340-tauri-live-transcription-rust-ts/.env.example b/examples/340-tauri-live-transcription-rust-ts/.env.example new file mode 100644 index 0000000..99314a3 --- /dev/null +++ b/examples/340-tauri-live-transcription-rust-ts/.env.example @@ -0,0 +1,2 @@ +# Deepgram — https://console.deepgram.com/ +DEEPGRAM_API_KEY= diff --git a/examples/340-tauri-live-transcription-rust-ts/README.md b/examples/340-tauri-live-transcription-rust-ts/README.md new file mode 100644 index 0000000..3d04960 --- /dev/null +++ b/examples/340-tauri-live-transcription-rust-ts/README.md @@ -0,0 +1,57 @@ +# Tauri Desktop Live Transcription + +A cross-platform desktop app built with Tauri v2 that captures microphone audio, streams it to Deepgram via WebSocket for real-time transcription, and displays live captions. Tauri's Rust backend handles the Deepgram connection using the official Rust SDK, while the TypeScript frontend captures audio and renders the UI. + +## What you'll build + +A Tauri desktop application with a Rust backend that connects to Deepgram's live STT WebSocket using the Deepgram Rust SDK and a TypeScript frontend that captures microphone audio at 16 kHz, streams it to the backend via Tauri commands, and displays rolling live captions with interim and final results. + +## Prerequisites + +- [Rust](https://www.rust-lang.org/tools/install) 1.70+ +- [Node.js](https://nodejs.org/) 18+ +- System WebView (WebKitGTK on Linux, WebView2 on Windows, WebKit on macOS) — see [Tauri prerequisites](https://v2.tauri.app/start/prerequisites/) +- Deepgram account — [get a free API key](https://console.deepgram.com/) + +## Environment variables + +| Variable | Where to find it | +|----------|-----------------| +| `DEEPGRAM_API_KEY` | [Deepgram console](https://console.deepgram.com/) | + +## Install and run + +```bash +cp .env.example .env +# Add your DEEPGRAM_API_KEY to .env + +cd src +npm install +npm run tauri dev +``` + +## Key parameters + +| Parameter | Value | Description | +|-----------|-------|-------------| +| `model` | `nova-3` | Deepgram's latest and most accurate STT model | +| `encoding` | `linear16` | 16-bit PCM audio from the microphone | +| `sample_rate` | `16000` | 16 kHz — good balance of quality and bandwidth | +| `interim_results` | `true` | Show partial transcripts as the user speaks | +| `smart_format` | `true` | Auto-capitalisation, numbers, and punctuation | +| `utterance_end_ms` | `1500` | Detect end of speech after 1.5 s of silence | + +## How it works + +1. The Tauri app starts with a Rust backend and a web-based frontend rendered in the system WebView +2. When you click **Start**, the TypeScript frontend requests microphone access via `getUserMedia` at 16 kHz +3. A `ScriptProcessorNode` captures raw PCM audio and converts float32 samples to signed 16-bit linear PCM +4. Audio chunks are sent to the Rust backend via Tauri's `invoke("send_audio", ...)` IPC +5. The Rust backend connects to Deepgram's live STT WebSocket using the official `deepgram` Rust crate with `transcription().stream_request_with_options(...).handle()` +6. A `tokio::select!` loop multiplexes audio forwarding and transcript receiving on the same `WebsocketHandle` +7. Transcript events (interim and final) are emitted back to the frontend via Tauri's event system (`app.emit("transcript", ...)`) +8. The frontend renders rolling captions with final text in white and interim text in grey + +## Starter templates + +[deepgram-starters](https://github.com/orgs/deepgram-starters/repositories) diff --git a/examples/340-tauri-live-transcription-rust-ts/requirements.txt b/examples/340-tauri-live-transcription-rust-ts/requirements.txt new file mode 100644 index 0000000..64a9945 --- /dev/null +++ b/examples/340-tauri-live-transcription-rust-ts/requirements.txt @@ -0,0 +1 @@ +deepgram-sdk==6.1.1 diff --git a/examples/340-tauri-live-transcription-rust-ts/src/index.html b/examples/340-tauri-live-transcription-rust-ts/src/index.html new file mode 100644 index 0000000..a3b3b27 --- /dev/null +++ b/examples/340-tauri-live-transcription-rust-ts/src/index.html @@ -0,0 +1,115 @@ + + + + + + Deepgram Live Transcription + + + + +

Tauri + Rust + Deepgram Nova-3

+ +
+
Click Start to begin transcription...
+
+ +
+ + + disconnected +
+ + + + diff --git a/examples/340-tauri-live-transcription-rust-ts/src/package.json b/examples/340-tauri-live-transcription-rust-ts/src/package.json new file mode 100644 index 0000000..00864f7 --- /dev/null +++ b/examples/340-tauri-live-transcription-rust-ts/src/package.json @@ -0,0 +1,19 @@ +{ + "name": "deepgram-tauri-live-transcription", + "private": true, + "version": "0.1.0", + "type": "module", + "scripts": { + "dev": "vite", + "build": "tsc && vite build", + "tauri": "tauri" + }, + "dependencies": { + "@tauri-apps/api": "2.2.0" + }, + "devDependencies": { + "@tauri-apps/cli": "2.2.0", + "typescript": "5.7.3", + "vite": "6.0.0" + } +} diff --git a/examples/340-tauri-live-transcription-rust-ts/src/src-tauri/Cargo.toml b/examples/340-tauri-live-transcription-rust-ts/src/src-tauri/Cargo.toml new file mode 100644 index 0000000..9edc8b4 --- /dev/null +++ b/examples/340-tauri-live-transcription-rust-ts/src/src-tauri/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "deepgram-tauri-live-transcription" +version = "0.1.0" +edition = "2021" + +[dependencies] +deepgram = "0.9.1" +dotenv = "0.15" +serde = { version = "1", features = ["derive"] } +serde_json = "1" +tauri = { version = "2", features = [] } +tokio = { version = "1", features = ["full"] } + +[build-dependencies] +tauri-build = { version = "2", features = [] } diff --git a/examples/340-tauri-live-transcription-rust-ts/src/src-tauri/build.rs b/examples/340-tauri-live-transcription-rust-ts/src/src-tauri/build.rs new file mode 100644 index 0000000..d860e1e --- /dev/null +++ b/examples/340-tauri-live-transcription-rust-ts/src/src-tauri/build.rs @@ -0,0 +1,3 @@ +fn main() { + tauri_build::build() +} diff --git a/examples/340-tauri-live-transcription-rust-ts/src/src-tauri/capabilities/default.json b/examples/340-tauri-live-transcription-rust-ts/src/src-tauri/capabilities/default.json new file mode 100644 index 0000000..074b8b5 --- /dev/null +++ b/examples/340-tauri-live-transcription-rust-ts/src/src-tauri/capabilities/default.json @@ -0,0 +1,8 @@ +{ + "identifier": "default", + "description": "Capability for the main window", + "windows": ["main"], + "permissions": [ + "core:default" + ] +} diff --git a/examples/340-tauri-live-transcription-rust-ts/src/src-tauri/src/main.rs b/examples/340-tauri-live-transcription-rust-ts/src/src-tauri/src/main.rs new file mode 100644 index 0000000..74b4cb3 --- /dev/null +++ b/examples/340-tauri-live-transcription-rust-ts/src/src-tauri/src/main.rs @@ -0,0 +1,195 @@ +#![cfg_attr(not(debug_assertions), windows_subsystem = "windows")] + +use deepgram::common::options::{Encoding, Model, Options}; +use deepgram::common::stream_response::StreamResponse; +use deepgram::Deepgram; +use std::env; +use std::sync::Arc; +use tauri::{AppHandle, Emitter, State}; +use tokio::sync::{mpsc, Mutex}; + +struct AppState { + // Channel for sending audio bytes to the Deepgram worker task. + // None when no transcription session is active. + audio_tx: Arc>>>>, +} + +// Runs the Deepgram WebSocket session: forwards audio from the mpsc channel +// to Deepgram and emits transcript events back to the Tauri frontend. +async fn run_session( + app: AppHandle, + api_key: String, + mut audio_rx: mpsc::Receiver>, +) { + let dg = match Deepgram::new(&api_key) { + Ok(d) => d, + Err(e) => { + let _ = app.emit("dg-error", e.to_string()); + return; + } + }; + + let options = Options::builder() + .model(Model::Nova3) + .smart_format(true) + .punctuate(true) + .tag(["deepgram-examples"]) // <- THIS tags usage for Deepgram console tracking + .build(); + + let mut handle = match dg + .transcription() + .stream_request_with_options(options) + .encoding(Encoding::Linear16) + .sample_rate(16000) + .channels(1) + .interim_results(true) + .utterance_end_ms(1500) + .keep_alive() + .handle() + .await + { + Ok(h) => h, + Err(e) => { + let _ = app.emit("dg-error", e.to_string()); + return; + } + }; + + let _ = app.emit("dg-status", "connected"); + + // Single loop that multiplexes audio sending and response receiving. + // tokio::select! lets us await both the audio channel and the Deepgram + // WebSocket concurrently without needing to split the handle. + loop { + tokio::select! { + biased; + + audio = audio_rx.recv() => { + match audio { + Some(data) => { + if let Err(e) = handle.send_data(data).await { + let _ = app.emit("dg-error", e.to_string()); + break; + } + } + // Frontend closed the channel — finalize + None => { + let _ = handle.close_stream().await; + break; + } + } + } + + resp = handle.receive() => { + match resp { + Some(Ok(StreamResponse::TranscriptResponse { + channel, + is_final, + speech_final, + .. + })) => { + // channel.alternatives[0].transcript holds the text + if let Some(alt) = channel.alternatives.first() { + if !alt.transcript.is_empty() { + let _ = app.emit( + "transcript", + serde_json::json!({ + "text": alt.transcript, + "is_final": is_final, + "speech_final": speech_final, + "confidence": alt.confidence, + }), + ); + } + } + } + Some(Ok(StreamResponse::TerminalResponse { .. })) => { + let _ = app.emit("dg-status", "closed"); + break; + } + Some(Ok(StreamResponse::UtteranceEndResponse { .. })) => { + let _ = app.emit("utterance-end", ""); + } + Some(Ok(_)) => {} + Some(Err(e)) => { + let _ = app.emit("dg-error", e.to_string()); + break; + } + None => { + let _ = app.emit("dg-status", "closed"); + break; + } + } + } + } + } + + let _ = app.emit("dg-status", "disconnected"); +} + +#[tauri::command] +async fn start_transcription( + app: AppHandle, + state: State<'_, AppState>, +) -> Result<(), String> { + let api_key = env::var("DEEPGRAM_API_KEY") + .map_err(|_| "DEEPGRAM_API_KEY not set".to_string())?; + + // Stop any existing session first + { + let mut tx_guard = state.audio_tx.lock().await; + tx_guard.take(); + } + + // Buffered channel — frontend sends audio chunks here; the worker + // forwards them to Deepgram's WebSocket. + let (audio_tx, audio_rx) = mpsc::channel::>(512); + + { + let mut tx_guard = state.audio_tx.lock().await; + *tx_guard = Some(audio_tx); + } + + tokio::spawn(run_session(app, api_key, audio_rx)); + + Ok(()) +} + +#[tauri::command] +async fn send_audio( + state: State<'_, AppState>, + audio: Vec, +) -> Result<(), String> { + let tx_guard = state.audio_tx.lock().await; + if let Some(tx) = tx_guard.as_ref() { + tx.send(audio).await.map_err(|e| e.to_string())?; + } + Ok(()) +} + +#[tauri::command] +async fn stop_transcription( + state: State<'_, AppState>, +) -> Result<(), String> { + let mut tx_guard = state.audio_tx.lock().await; + // Dropping the sender closes the channel, which signals the worker + // to finalize and disconnect from Deepgram. + tx_guard.take(); + Ok(()) +} + +fn main() { + dotenv::dotenv().ok(); + + tauri::Builder::default() + .manage(AppState { + audio_tx: Arc::new(Mutex::new(None)), + }) + .invoke_handler(tauri::generate_handler![ + start_transcription, + send_audio, + stop_transcription, + ]) + .run(tauri::generate_context!()) + .expect("error while running tauri application"); +} diff --git a/examples/340-tauri-live-transcription-rust-ts/src/src-tauri/tauri.conf.json b/examples/340-tauri-live-transcription-rust-ts/src/src-tauri/tauri.conf.json new file mode 100644 index 0000000..0fc0d76 --- /dev/null +++ b/examples/340-tauri-live-transcription-rust-ts/src/src-tauri/tauri.conf.json @@ -0,0 +1,26 @@ +{ + "$schema": "https://raw.githubusercontent.com/tauri-apps/tauri/dev/crates/tauri-cli/schema.json", + "productName": "Deepgram Live Transcription", + "version": "0.1.0", + "identifier": "com.deepgram.examples.live-transcription", + "build": { + "frontendDist": "../dist", + "devUrl": "http://localhost:1420", + "beforeDevCommand": "npm run dev", + "beforeBuildCommand": "npm run build" + }, + "app": { + "windows": [ + { + "title": "Deepgram Live Transcription", + "width": 700, + "height": 500, + "resizable": true, + "decorations": true + } + ], + "security": { + "csp": "default-src 'self'; script-src 'self'; style-src 'self' 'unsafe-inline'" + } + } +} diff --git a/examples/340-tauri-live-transcription-rust-ts/src/src/main.ts b/examples/340-tauri-live-transcription-rust-ts/src/src/main.ts new file mode 100644 index 0000000..ba55be8 --- /dev/null +++ b/examples/340-tauri-live-transcription-rust-ts/src/src/main.ts @@ -0,0 +1,127 @@ +import { invoke } from "@tauri-apps/api/core"; +import { listen } from "@tauri-apps/api/event"; + +const transcriptEl = document.getElementById("transcript")!; +const btnStart = document.getElementById("btn-start") as HTMLButtonElement; +const btnStop = document.getElementById("btn-stop") as HTMLButtonElement; +const statusEl = document.getElementById("status")!; + +const MAX_LINES = 6; +const finalLines: string[] = []; +let currentInterim = ""; + +let mediaStream: MediaStream | null = null; +let audioContext: AudioContext | null = null; +let processorNode: ScriptProcessorNode | null = null; + +function renderTranscript() { + const visible = finalLines.slice(-MAX_LINES); + let html = visible.map((l) => `${l}`).join("
"); + if (currentInterim) { + html += `
${currentInterim}`; + } + transcriptEl.innerHTML = html || "Listening..."; +} + +// Capture microphone at 16 kHz, convert float32 to linear16 PCM, +// and forward each chunk to the Rust backend via Tauri command. +async function startAudioCapture() { + mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true }); + + // 16 kHz matches the Deepgram encoding config exactly + audioContext = new AudioContext({ sampleRate: 16000 }); + const source = audioContext.createMediaStreamSource(mediaStream); + + // ScriptProcessorNode is deprecated but universally supported in + // WebView contexts. AudioWorklet is the modern alternative but adds + // file complexity not warranted for this example. + processorNode = audioContext.createScriptProcessor(4096, 1, 1); + + processorNode.onaudioprocess = (event: AudioProcessingEvent) => { + const float32 = event.inputBuffer.getChannelData(0); + + // Convert float32 [-1, 1] → signed 16-bit PCM + const int16 = new Int16Array(float32.length); + for (let i = 0; i < float32.length; i++) { + const s = Math.max(-1, Math.min(1, float32[i])); + int16[i] = s < 0 ? s * 0x8000 : s * 0x7fff; + } + + invoke("send_audio", { audio: Array.from(new Uint8Array(int16.buffer)) }); + }; + + source.connect(processorNode); + processorNode.connect(audioContext.destination); +} + +function stopAudioCapture() { + if (processorNode) { + processorNode.disconnect(); + processorNode = null; + } + if (audioContext) { + audioContext.close(); + audioContext = null; + } + if (mediaStream) { + mediaStream.getTracks().forEach((t) => t.stop()); + mediaStream = null; + } +} + +btnStart.addEventListener("click", async () => { + btnStart.disabled = true; + btnStop.disabled = false; + btnStart.classList.add("active"); + btnStop.classList.remove("active"); + + finalLines.length = 0; + currentInterim = ""; + transcriptEl.textContent = "Connecting..."; + + await invoke("start_transcription"); + await startAudioCapture(); +}); + +btnStop.addEventListener("click", async () => { + btnStop.disabled = true; + btnStart.disabled = false; + btnStop.classList.remove("active"); + btnStart.classList.remove("active"); + + stopAudioCapture(); + await invoke("stop_transcription"); + statusEl.textContent = "disconnected"; + statusEl.className = "status"; +}); + +listen<{ text: string; is_final: boolean; speech_final: boolean; confidence: number }>( + "transcript", + (event) => { + if (event.payload.is_final) { + finalLines.push(event.payload.text); + currentInterim = ""; + } else { + currentInterim = event.payload.text; + } + renderTranscript(); + } +); + +listen("dg-status", (event) => { + statusEl.textContent = event.payload; + statusEl.className = `status ${event.payload}`; +}); + +listen("dg-error", (event) => { + statusEl.textContent = `error: ${event.payload}`; + statusEl.className = "status error"; +}); + +listen("utterance-end", () => { + if (currentInterim) { + finalLines.push(currentInterim); + currentInterim = ""; + renderTranscript(); + } +}); diff --git a/examples/340-tauri-live-transcription-rust-ts/src/tsconfig.json b/examples/340-tauri-live-transcription-rust-ts/src/tsconfig.json new file mode 100644 index 0000000..20ba36b --- /dev/null +++ b/examples/340-tauri-live-transcription-rust-ts/src/tsconfig.json @@ -0,0 +1,14 @@ +{ + "compilerOptions": { + "target": "ES2021", + "module": "ESNext", + "moduleResolution": "bundler", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "lib": ["ES2021", "DOM", "DOM.Iterable"], + "outDir": "dist" + }, + "include": ["src/**/*.ts"] +} diff --git a/examples/340-tauri-live-transcription-rust-ts/src/vite.config.ts b/examples/340-tauri-live-transcription-rust-ts/src/vite.config.ts new file mode 100644 index 0000000..e12a56d --- /dev/null +++ b/examples/340-tauri-live-transcription-rust-ts/src/vite.config.ts @@ -0,0 +1,15 @@ +import { defineConfig } from "vite"; + +export default defineConfig({ + clearScreen: false, + server: { + port: 1420, + strictPort: true, + }, + envPrefix: ["VITE_", "TAURI_"], + build: { + target: "es2021", + minify: !process.env.TAURI_DEBUG ? "esbuild" : false, + sourcemap: !!process.env.TAURI_DEBUG, + }, +}); diff --git a/examples/340-tauri-live-transcription-rust-ts/tests/test_example.py b/examples/340-tauri-live-transcription-rust-ts/tests/test_example.py new file mode 100644 index 0000000..1fee5ac --- /dev/null +++ b/examples/340-tauri-live-transcription-rust-ts/tests/test_example.py @@ -0,0 +1,105 @@ +"""Test Deepgram live STT integration used by the Tauri example. + +The Tauri desktop app requires a full build toolchain (Rust, system WebView). +This test verifies the Deepgram WebSocket STT call that the Rust backend wraps +— same model, same parameters — using the Python SDK as a test harness. +""" + +import os +import sys +from pathlib import Path + +# ── Credential check ──────────────────────────────────────────────────────── +# Exit code convention across all examples in this repo: +# 0 = all tests passed +# 1 = real test failure (code bug, assertion error, unexpected API response) +# 2 = missing credentials (expected in CI until secrets are configured) +env_example = Path(__file__).parent.parent / ".env.example" +required = [ + line.split("=")[0].strip() + for line in env_example.read_text().splitlines() + if line and not line.startswith("#") and "=" in line and line[0].isupper() +] +missing = [k for k in required if not os.environ.get(k)] +if missing: + print(f"MISSING_CREDENTIALS: {','.join(missing)}", file=sys.stderr) + sys.exit(2) +# ──────────────────────────────────────────────────────────────────────────── + +from deepgram import DeepgramClient + + +def test_file_structure(): + """Verify all required project files exist.""" + root = Path(__file__).parent.parent + required_files = [ + ".env.example", + "README.md", + "src/src-tauri/src/main.rs", + "src/src-tauri/Cargo.toml", + "src/src-tauri/tauri.conf.json", + "src/src/main.ts", + "src/index.html", + "src/package.json", + ] + for f in required_files: + full = root / f + assert full.exists(), f"Missing required file: {f}" + print("File structure check passed") + + +def test_rust_source_uses_deepgram_sdk(): + """Verify the Rust source uses the Deepgram SDK correctly.""" + root = Path(__file__).parent.parent + main_rs = (root / "src" / "src-tauri" / "src" / "main.rs").read_text() + + assert "deepgram::Deepgram" in main_rs or "use deepgram" in main_rs, \ + "main.rs does not import the Deepgram SDK" + assert "deepgram-examples" in main_rs, \ + "main.rs missing required tag 'deepgram-examples'" + assert "Model::Nova3" in main_rs, \ + "main.rs should use Nova3 model" + assert "DEEPGRAM_API_KEY" in main_rs, \ + "main.rs should read DEEPGRAM_API_KEY from environment" + assert "send_audio" in main_rs, \ + "main.rs should expose send_audio Tauri command" + assert "start_transcription" in main_rs, \ + "main.rs should expose start_transcription Tauri command" + + cargo_toml = (root / "src" / "src-tauri" / "Cargo.toml").read_text() + assert 'deepgram = "0.9.1"' in cargo_toml, \ + "Cargo.toml should pin deepgram = 0.9.1" + + print("Rust source validation passed") + + +def test_deepgram_stt(): + """Verify the Deepgram API key works and nova-3 returns a transcript. + + This exercises the same STT endpoint the Rust backend calls: + model=nova-3 with smart_format=true and tag=deepgram-examples. + """ + client = DeepgramClient() + # tag="deepgram-examples" is REQUIRED on every Deepgram API call + response = client.listen.v1.media.transcribe_url( + url="https://dpgr.am/spacewalk.wav", + model="nova-3", + smart_format=True, + tag="deepgram-examples", + ) + transcript = response.results.channels[0].alternatives[0].transcript + assert len(transcript) > 10, "Transcript too short" + + duration = response.results.channels[0].alternatives[0].words[-1].end if response.results.channels[0].alternatives[0].words else 0 + chars_per_sec = len(transcript) / max(duration, 1) + assert 1 < chars_per_sec < 100, f"Transcript length not proportional to duration: {len(transcript)} chars / {duration:.1f}s" + + print("Deepgram STT integration working") + print(f" Transcript preview: '{transcript[:80]}...'") + + +if __name__ == "__main__": + test_file_structure() + test_rust_source_uses_deepgram_sdk() + test_deepgram_stt() + print("\nAll tests passed")