From a2538547e49de913646170ea5f46e7b2093c202a Mon Sep 17 00:00:00 2001
From: Bartosz Hanc <bartosz.hanc02@gmail.com>
Date: Fri, 22 May 2026 17:14:19 +0200
Subject: [PATCH 1/3] fix(docs): update useSpeechToText and useVAD
 documentation

---
 .../useSpeechToText.md                         |  1 -
 .../01-natural-language-processing/useVAD.md   | 14 ++++++++++----
 .../SpeechToTextModule.md                      | 18 +++++++++---------
 3 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/docs/docs/03-hooks/01-natural-language-processing/useSpeechToText.md b/docs/docs/03-hooks/01-natural-language-processing/useSpeechToText.md
index 02d0008dda..5862f7d52f 100644
--- a/docs/docs/03-hooks/01-natural-language-processing/useSpeechToText.md
+++ b/docs/docs/03-hooks/01-natural-language-processing/useSpeechToText.md
@@ -207,7 +207,6 @@ The hook returns an object with:
 - `streamInsert(audio)`: Push audio to the stream buffer.
 - `streamStop()`: Finish the current stream.
 - `isGenerating`: Boolean indicating if the model is busy.
-- `loading`: Boolean indicating if the model is being loaded.
 
 ## Supported models
 
diff --git a/docs/docs/03-hooks/01-natural-language-processing/useVAD.md b/docs/docs/03-hooks/01-natural-language-processing/useVAD.md
index f05d53c10f..7aebdaec6d 100644
--- a/docs/docs/03-hooks/01-natural-language-processing/useVAD.md
+++ b/docs/docs/03-hooks/01-natural-language-processing/useVAD.md
@@ -56,8 +56,10 @@ You can fine-tune the streaming behavior via the `options` object:
 
 ```tsx
 import { useVAD, models } from 'react-native-executorch';
+import { AudioRecorder } from 'react-native-audio-api';
 
 const model = useVAD({ model: models.vad.fsmn_vad() });
+const recorder = new AudioRecorder();
 
 const startLiveVAD = async () => {
   // Start the continuous streaming listener
@@ -70,13 +72,17 @@ const startLiveVAD = async () => {
     },
   });
 
-  // Example: Hook into your audio recorder's data event
-  audioRecorder.on('data', (chunk: Float32Array) => {
-    model.streamInsert(chunk);
-  });
+  // Capture microphone input at 16kHz
+  recorder.onAudioReady(
+    { sampleRate: 16000, bufferLength: 1600, channelCount: 1 },
+    (chunk) => model.streamInsert(chunk.buffer.getChannelData(0))
+  );
+
+  await recorder.start();
 };
 
 const stopLiveVAD = () => {
+  recorder.stop();
   model.streamStop();
 };
 ```
diff --git a/docs/docs/04-typescript-api/01-natural-language-processing/SpeechToTextModule.md b/docs/docs/04-typescript-api/01-natural-language-processing/SpeechToTextModule.md
index 2e2597397d..989fc4fe6e 100644
--- a/docs/docs/04-typescript-api/01-natural-language-processing/SpeechToTextModule.md
+++ b/docs/docs/04-typescript-api/01-natural-language-processing/SpeechToTextModule.md
@@ -98,20 +98,20 @@ const model = await SpeechToTextModule.fromModelName(
 AudioManager.setAudioSessionOptions({
   iosCategory: 'playAndRecord',
   iosMode: 'spokenAudio',
-  iosOptions: ['allowBluetooth', 'defaultToSpeaker'],
+  iosOptions: ['allowBluetoothHFP', 'defaultToSpeaker'],
 });
 await AudioManager.requestRecordingPermissions();
 
 // 2. Setup Audio Recorder
-const recorder = new AudioRecorder({
-  sampleRate: 16000,
-  channelCount: 1,
-});
+const recorder = new AudioRecorder();
 
-recorder.onAudioReady((chunk) => {
-  // Feed chunks directly into the model's buffer
-  model.streamInsert(chunk.buffer.getChannelData(0));
-});
+recorder.onAudioReady(
+  { sampleRate: 16000, bufferLength: 1600, channelCount: 1 },
+  (chunk) => {
+    // Feed chunks directly into the model's buffer
+    model.streamInsert(chunk.buffer.getChannelData(0));
+  }
+);
 
 await recorder.start();
 

From e4d8cc8892f861e8235a396ac5544828c619f63f Mon Sep 17 00:00:00 2001
From: Bartosz Hanc <bartosz.hanc02@gmail.com>
Date: Fri, 22 May 2026 17:21:22 +0200
Subject: [PATCH 2/3] fix(docs): fix useSpeechToText and useVAD return types in
 docs

---
 .../useSpeechToText.md                        | 19 ++++++++++++-------
 .../01-natural-language-processing/useVAD.md  |  2 +-
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/docs/docs/03-hooks/01-natural-language-processing/useSpeechToText.md b/docs/docs/03-hooks/01-natural-language-processing/useSpeechToText.md
index 5862f7d52f..53869c6fd5 100644
--- a/docs/docs/03-hooks/01-natural-language-processing/useSpeechToText.md
+++ b/docs/docs/03-hooks/01-natural-language-processing/useSpeechToText.md
@@ -200,13 +200,18 @@ const result = await model.transcribe(audioBuffer, { verbose: true });
 
 ### Returns
 
-The hook returns an object with:
-
-- `transcribe(audio, options)`: One-shot transcription.
-- `stream(options)`: Async generator for streaming results.
-- `streamInsert(audio)`: Push audio to the stream buffer.
-- `streamStop()`: Finish the current stream.
-- `isGenerating`: Boolean indicating if the model is busy.
+The hook returns a [`SpeechToTextType`](../../06-api-reference/interfaces/SpeechToTextType.md) object containing:
+
+- `error`: `null | RnExecutorchError` - Contains the error message if the model failed to load.
+- `isReady`: `boolean` - Indicates whether the model has successfully loaded and is ready for inference.
+- `isGenerating`: `boolean` - Indicates whether the model is currently processing an inference.
+- `downloadProgress`: `number` - Tracks the progress of the model download process as a value between `0` and `1`.
+- `transcribe(audio, options)`: Starts a transcription process for a given input array, which should be a waveform at 16kHz. Returns a promise resolving to a [`TranscriptionResult`](../../06-api-reference/interfaces/TranscriptionResult.md).
+- `stream(options)`: Starts a streaming transcription process. Asynchronous generator that yields objects containing `committed` and `nonCommitted` transcriptions, both of type [`TranscriptionResult`](../../06-api-reference/interfaces/TranscriptionResult.md).
+- `streamInsert(audio)`: Inserts a chunk of audio data (sampled at 16kHz) into the ongoing streaming transcription.
+- `streamStop()`: Stops the ongoing streaming transcription process.
+- `encode(audio)`: Runs the encoding part of the model on the provided waveform. Returns a promise resolving to the encoded `Float32Array`.
+- `decode(tokens, encoderOutput)`: Runs the decoder of the model with the given tokens (`Int32Array`) and encoder output (`Float32Array`). Returns a promise resolving to the decoded `Float32Array`.
 
 ## Supported models
 
diff --git a/docs/docs/03-hooks/01-natural-language-processing/useVAD.md b/docs/docs/03-hooks/01-natural-language-processing/useVAD.md
index 7aebdaec6d..99d41f1608 100644
--- a/docs/docs/03-hooks/01-natural-language-processing/useVAD.md
+++ b/docs/docs/03-hooks/01-natural-language-processing/useVAD.md
@@ -90,7 +90,7 @@ const stopLiveVAD = () => {
 ### Arguments & Returns
 
 - **Arguments**: `useVAD` takes a [`VADProps`](../../06-api-reference/interfaces/VADProps.md) object containing the `model` and an optional `preventLoad` flag.
-- **Returns**: A [`VADType`](../../06-api-reference/interfaces/VADType.md) object providing `forward`, `stream`, `streamInsert`, and `streamStop` methods, along with `isReady` and `error` states.
+- **Returns**: A [`VADType`](../../06-api-reference/interfaces/VADType.md) object providing `forward`, `stream`, `streamInsert`, and `streamStop` methods, along with `error`, `isReady`, `isGenerating`, and `downloadProgress` states.
 
 ## Supported models
 

From 53bb38f46bb6e085fd0c8a60cd9c4c56b433abdb Mon Sep 17 00:00:00 2001
From: Bartosz Hanc <bartosz.hanc02@gmail.com>
Date: Fri, 22 May 2026 17:25:11 +0200
Subject: [PATCH 3/3] fix(docs): update TextToSpeechModule examples to include
 streamInsert usage

---
 .../01-natural-language-processing/TextToSpeechModule.md    | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/docs/docs/04-typescript-api/01-natural-language-processing/TextToSpeechModule.md b/docs/docs/04-typescript-api/01-natural-language-processing/TextToSpeechModule.md
index daf5cb735b..fb248baecc 100644
--- a/docs/docs/04-typescript-api/01-natural-language-processing/TextToSpeechModule.md
+++ b/docs/docs/04-typescript-api/01-natural-language-processing/TextToSpeechModule.md
@@ -100,8 +100,8 @@ const tts = await TextToSpeechModule.fromModelName(
 const audioContext = new AudioContext({ sampleRate: 24000 });
 
 try {
+  tts.streamInsert('This is a streaming test, with a sample input.');
   for await (const chunk of tts.stream({
-    text: 'This is a streaming test, with a sample input.',
     speed: 1.0,
   })) {
     // Play each chunk sequentially
@@ -135,8 +135,10 @@ const tts = await TextToSpeechModule.fromModelName(
 const waveform = await tts.forward('həlˈO wˈɜɹld!', 1.0, false);
 
 // Or stream from phonemes
+tts.streamInsert(
+  'ɐ mˈæn hˌu dˈʌzᵊnt tɹˈʌst hɪmsˈɛlf, kæn nˈɛvəɹ ɹˈiᵊli tɹˈʌst ˈɛniwˌʌn ˈɛls.'
+);
 for await (const chunk of tts.stream({
-  text: 'ɐ mˈæn hˌu dˈʌzᵊnt tɹˈʌst hɪmsˈɛlf, kæn nˈɛvəɹ ɹˈiᵊli tɹˈʌst ˈɛniwˌʌn ˈɛls.',
   speed: 1.0,
   phonemize: false,
 })) {