deepgram · lukeocodes · Apr 2, 2026 · Apr 2, 2026 · Apr 2, 2026 · Apr 2, 2026
diff --git a/examples/370-swift-ios-live-transcription/.env.example b/examples/370-swift-ios-live-transcription/.env.example
@@ -0,0 +1,2 @@
+# Deepgram — https://console.deepgram.com/
+DEEPGRAM_API_KEY=
diff --git a/examples/370-swift-ios-live-transcription/Package.swift b/examples/370-swift-ios-live-transcription/Package.swift
@@ -0,0 +1,18 @@
+// swift-tools-version: 5.9
+import PackageDescription
+
+// This Package.swift exists so the example can be opened as a Swift package.
+// For a full Xcode project, create a new iOS App target and drag the files
+// from src/ into it. The app has no external dependencies — only Apple
+// frameworks (AVFoundation, SwiftUI, Foundation).
+
+let package = Package(
+    name: "DeepgramLiveTranscription",
+    platforms: [.iOS(.v17)],
+    targets: [
+        .executableTarget(
+            name: "DeepgramLiveTranscription",
+            path: "src"
+        ),
+    ]
+)
diff --git a/examples/370-swift-ios-live-transcription/README.md b/examples/370-swift-ios-live-transcription/README.md
@@ -0,0 +1,71 @@
+# Swift iOS Live Transcription
+
+A native SwiftUI iOS app that streams microphone audio to Deepgram's live speech-to-text API over WebSocket using AVAudioEngine. Displays real-time transcription with interim and final results — no third-party dependencies required.
+
+## What you'll build
+
+A SwiftUI screen with a microphone button that captures audio from the device microphone using AVAudioEngine, streams 16 kHz mono PCM audio to Deepgram via URLSessionWebSocketTask, and renders a live transcript. Interim results appear in grey as you speak; final results replace them in the primary text color.
+
+## Prerequisites
+
+- Xcode 15+ with iOS 17 SDK
+- Physical iOS device (microphone access requires a real device; Simulator has limited mic support)
+- Deepgram account — [get a free API key](https://console.deepgram.com/)
+
+## Environment variables
+
+| Variable | Where to find it |
+|----------|-----------------|
+| `DEEPGRAM_API_KEY` | [Deepgram console](https://console.deepgram.com/) → Settings → API Keys |
+
+## Install and run
+
+### Option 1: Open as Swift Package
+
+```bash
+cd examples/370-swift-ios-live-transcription
+open Package.swift
+```
+
+Xcode will open the package. Select an iOS device target and run.
+
+### Option 2: Add to an existing Xcode project
+
+1. Create a new iOS App project in Xcode (SwiftUI lifecycle)
+2. Drag all files from `src/` into the project navigator
+3. Merge `Info.plist` entries (microphone permission + background audio)
+4. Set `DEEPGRAM_API_KEY` in your scheme's environment variables
+
+### Setting the API key
+
+In Xcode: **Product → Scheme → Edit Scheme → Run → Arguments → Environment Variables** — add `DEEPGRAM_API_KEY` with your key.
+
+## Key parameters
+
+| Parameter | Value | Description |
+|-----------|-------|-------------|
+| `model` | `nova-3` | Deepgram's flagship STT model (2025) — best accuracy and speed |
+| `encoding` | `linear16` | Raw 16-bit signed integer PCM — what AVAudioEngine produces |
+| `sample_rate` | `16000` | 16 kHz — sufficient for speech; keeps bandwidth low on mobile |
+| `interim_results` | `true` | Get partial transcripts while the user is still speaking |
+| `utterance_end_ms` | `1000` | Silence threshold (ms) before Deepgram considers an utterance complete |
+| `tag` | `deepgram-examples` | Tags traffic in the Deepgram console for identification |
+
+## How it works
+
+1. User taps the microphone button; `AudioCaptureManager` requests microphone permission and starts `AVAudioEngine`
+2. The engine's input node tap delivers audio buffers at the hardware sample rate; `AVAudioConverter` resamples to 16 kHz mono Int16 PCM
+3. `DeepgramClient` opens a WebSocket to `wss://api.deepgram.com/v1/listen` with model, encoding, and sample rate as query parameters; the API key is sent as an `Authorization: Token <key>` header
+4. PCM buffers are sent as binary WebSocket frames (~100 ms chunks); Deepgram returns JSON `Results` messages with `is_final` and `speech_final` flags
+5. `TranscriptionViewModel` accumulates final transcripts and shows interim partials; SwiftUI updates the view reactively
+
+## Production considerations
+
+- **Don't ship API keys in the binary.** Use a backend token endpoint that issues short-lived Deepgram API keys or proxies the WebSocket connection
+- **Handle network transitions** — mobile apps switch between WiFi and cellular; implement WebSocket reconnection with exponential backoff
+- **Battery life** — stop the audio engine and close the WebSocket when the app goes to background (`scenePhase` observer)
+- **Background audio** — if you need transcription while backgrounded, enable the Audio background mode in Xcode capabilities and keep the audio session active
+
+## Starter templates
+
+[deepgram-starters](https://github.com/orgs/deepgram-starters/repositories)
diff --git a/examples/370-swift-ios-live-transcription/src/AudioCaptureManager.swift b/examples/370-swift-ios-live-transcription/src/AudioCaptureManager.swift
@@ -0,0 +1,105 @@
+import AVFoundation
+
+// Captures microphone audio using AVAudioEngine and delivers raw PCM buffers.
+// AVAudioEngine is preferred over AVAudioRecorder because it provides a
+// streaming tap (real-time buffer callback) rather than writing to a file.
+
+protocol AudioCaptureDelegate: AnyObject {
+    func audioCaptureDidReceive(pcmData: Data)
+}
+
+final class AudioCaptureManager {
+    weak var delegate: AudioCaptureDelegate?
+
+    private let engine = AVAudioEngine()
+    // 16 kHz mono LINEAR16 — matches the DeepgramClient's default encoding
+    private let desiredSampleRate: Double = 16000.0
+    private let desiredChannels: UInt32 = 1
+
+    func startCapture() throws {
+        let session = AVAudioSession.sharedInstance()
+        // .measurement avoids system audio processing (echo cancellation, AGC)
+        // which would distort the audio for transcription
+        try session.setCategory(.record, mode: .measurement, options: .duckOthers)
+        try session.setActive(true, options: .notifyOthersOnDeactivation)
+
+        let inputNode = engine.inputNode
+        let inputFormat = inputNode.outputFormat(forBus: 0)
+
+        // Convert hardware format → 16 kHz mono Int16 for Deepgram
+        guard let targetFormat = AVAudioFormat(
+            commonFormat: .pcmFormatInt16,
+            sampleRate: desiredSampleRate,
+            channels: AVAudioChannelCount(desiredChannels),
+            interleaved: true
+        ) else {
+            throw AudioCaptureError.formatCreationFailed
+        }
+
+        guard let converter = AVAudioConverter(from: inputFormat, to: targetFormat) else {
+            throw AudioCaptureError.converterCreationFailed
+        }
+
+        // Buffer size: 100 ms of audio at input sample rate
+        let bufferSize = AVAudioFrameCount(inputFormat.sampleRate * 0.1)
+
+        inputNode.installTap(onBus: 0, bufferSize: bufferSize, format: inputFormat) { [weak self] buffer, _ in
+            self?.convert(buffer: buffer, converter: converter, targetFormat: targetFormat)
+        }
+
+        try engine.start()
+    }
+
+    func stopCapture() {
+        engine.inputNode.removeTap(onBus: 0)
+        engine.stop()
+
+        try? AVAudioSession.sharedInstance().setActive(false, options: .notifyOthersOnDeactivation)
+    }
+
+    private func convert(buffer: AVAudioPCMBuffer, converter: AVAudioConverter, targetFormat: AVAudioFormat) {
+        let frameCapacity = AVAudioFrameCount(
+            Double(buffer.frameLength) * (targetFormat.sampleRate / buffer.format.sampleRate)
+        )
+        guard frameCapacity > 0,
+              let outputBuffer = AVAudioPCMBuffer(pcmFormat: targetFormat, frameCapacity: frameCapacity)
+        else { return }
+
+        var error: NSError?
+        var hasData = true
+        converter.convert(to: outputBuffer, error: &error) { _, outStatus in
+            if hasData {
+                hasData = false
+                outStatus.pointee = .haveData
+                return buffer
+            }
+            outStatus.pointee = .noDataNow
+            return nil
+        }
+
+        if let error = error {
+            print("Audio conversion error: \(error)")
+            return
+        }
+
+        // Extract raw Int16 bytes from the converted buffer
+        guard let channelData = outputBuffer.int16ChannelData else { return }
+        let byteCount = Int(outputBuffer.frameLength) * MemoryLayout<Int16>.size
+        let data = Data(bytes: channelData[0], count: byteCount)
+        delegate?.audioCaptureDidReceive(pcmData: data)
+    }
+}
+
+enum AudioCaptureError: LocalizedError {
+    case formatCreationFailed
+    case converterCreationFailed
+
+    var errorDescription: String? {
+        switch self {
+        case .formatCreationFailed:
+            return "Failed to create target audio format (16 kHz mono Int16)"
+        case .converterCreationFailed:
+            return "Failed to create audio converter from input to target format"
+        }
+    }
+}
diff --git a/examples/370-swift-ios-live-transcription/src/DeepgramClient.swift b/examples/370-swift-ios-live-transcription/src/DeepgramClient.swift
@@ -0,0 +1,134 @@
+import Foundation
+
+// Deepgram live STT WebSocket client.
+// There is no official Deepgram Swift SDK. This wraps the WebSocket API
+// (wss://api.deepgram.com/v1/listen) with URLSessionWebSocketTask.
+// If an official SDK is released, replace this file with SDK calls.
+
+struct DeepgramTranscriptMessage: Decodable {
+    let type: String
+    let channel: Channel?
+    let isFinal: Bool?
+    let speechFinal: Bool?
+
+    enum CodingKeys: String, CodingKey {
+        case type, channel
+        case isFinal = "is_final"
+        case speechFinal = "speech_final"
+    }
+
+    struct Channel: Decodable {
+        let alternatives: [Alternative]
+    }
+
+    struct Alternative: Decodable {
+        let transcript: String
+        let confidence: Double
+    }
+}
+
+protocol DeepgramClientDelegate: AnyObject {
+    func deepgramDidConnect()
+    func deepgramDidDisconnect(error: Error?)
+    func deepgramDidReceiveTranscript(_ text: String, isFinal: Bool)
+}
+
+final class DeepgramClient {
+    weak var delegate: DeepgramClientDelegate?
+
+    private var webSocketTask: URLSessionWebSocketTask?
+    private let apiKey: String
+    // nova-3 is the current flagship STT model (2025)
+    private let model: String
+    private let sampleRate: Int
+    private let encoding: String
+
+    init(apiKey: String, model: String = "nova-3", sampleRate: Int = 16000, encoding: String = "linear16") {
+        self.apiKey = apiKey
+        self.model = model
+        self.sampleRate = sampleRate
+        self.encoding = encoding
+    }
+
+    func connect() {
+        // tag=deepgram-examples — required to identify example traffic in the Deepgram console
+        var components = URLComponents(string: "wss://api.deepgram.com/v1/listen")!
+        components.queryItems = [
+            URLQueryItem(name: "model", value: model),
+            URLQueryItem(name: "encoding", value: encoding),
+            URLQueryItem(name: "sample_rate", value: String(sampleRate)),
+            URLQueryItem(name: "channels", value: "1"),
+            URLQueryItem(name: "interim_results", value: "true"),
+            // ← THIS enables utterance-level endpointing so we get speech_final
+            URLQueryItem(name: "utterance_end_ms", value: "1000"),
+            URLQueryItem(name: "tag", value: "deepgram-examples"),
+        ]
+
+        var request = URLRequest(url: components.url!)
+        // iOS URLSession supports custom headers on WebSocket (unlike browsers)
+        request.setValue("Token \(apiKey)", forHTTPHeaderField: "Authorization")
+
+        let session = URLSession(configuration: .default)
+        webSocketTask = session.webSocketTask(with: request)
+        webSocketTask?.resume()
+
+        delegate?.deepgramDidConnect()
+        listenForMessages()
+    }
+
+    func sendAudio(_ data: Data) {
+        webSocketTask?.send(.data(data)) { error in
+            if let error = error {
+                print("WebSocket send error: \(error)")
+            }
+        }
+    }
+
+    func disconnect() {
+        // Send CloseStream message per Deepgram protocol to flush final results
+        let closeMessage = #"{"type": "CloseStream"}"#
+        webSocketTask?.send(.string(closeMessage)) { [weak self] _ in
+            self?.webSocketTask?.cancel(with: .normalClosure, reason: nil)
+            self?.webSocketTask = nil
+            self?.delegate?.deepgramDidDisconnect(error: nil)
+        }
+    }
+
+    private func listenForMessages() {
+        webSocketTask?.receive { [weak self] result in
+            switch result {
+            case .success(let message):
+                switch message {
+                case .string(let text):
+                    self?.handleMessage(text)
+                case .data(let data):
+                    if let text = String(data: data, encoding: .utf8) {
+                        self?.handleMessage(text)
+                    }
+                @unknown default:
+                    break
+                }
+                // Keep listening for more messages
+                self?.listenForMessages()
+
+            case .failure(let error):
+                self?.delegate?.deepgramDidDisconnect(error: error)
+            }
+        }
+    }
+
+    private func handleMessage(_ text: String) {
+        guard let data = text.data(using: .utf8),
+              let message = try? JSONDecoder().decode(DeepgramTranscriptMessage.self, from: data),
+              message.type == "Results",
+              let transcript = message.channel?.alternatives.first?.transcript,
+              !transcript.isEmpty
+        else { return }
+
+        // is_final=true means Deepgram won't revise this segment further
+        let isFinal = message.isFinal ?? false
+        DispatchQueue.main.async { [weak self] in
+            self?.delegate?.deepgramDidReceiveTranscript(transcript, isFinal: isFinal)
+        }
+    }
+}
diff --git a/examples/370-swift-ios-live-transcription/src/DeepgramLiveTranscriptionApp.swift b/examples/370-swift-ios-live-transcription/src/DeepgramLiveTranscriptionApp.swift
@@ -0,0 +1,10 @@
+import SwiftUI
+
+@main
+struct DeepgramLiveTranscriptionApp: App {
+    var body: some Scene {
+        WindowGroup {
+            TranscriptionView()
+        }
+    }
+}
diff --git a/examples/370-swift-ios-live-transcription/src/Info.plist b/examples/370-swift-ios-live-transcription/src/Info.plist
@@ -0,0 +1,12 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>NSMicrophoneUsageDescription</key>
+	<string>This app needs microphone access to transcribe your speech in real-time using Deepgram.</string>
+	<key>UIBackgroundModes</key>
+	<array>
+		<string>audio</string>
+	</array>
+</dict>
+</plist>
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# Deepgram — https://console.deepgram.com/
		DEEPGRAM_API_KEY=