Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
de92511
Add CoreML whisper models
IgorSwat May 7, 2026
0162b0c
Update urls & audio-api
IgorSwat May 14, 2026
efe744b
Add CoreML whisper models
IgorSwat May 7, 2026
bc334df
Implement VAD streaming
IgorSwat May 10, 2026
c52247d
Integrate VAD with STT
IgorSwat May 11, 2026
d495d54
Fix wrong include issue
IgorSwat May 14, 2026
535c15c
Rebase with other PR changes
IgorSwat May 20, 2026
ef5e7ca
Bump audio-api version
IgorSwat May 20, 2026
c10ac15
Apply review suggestions
IgorSwat May 21, 2026
9fb27e6
Fix demo app keyboard behavior
IgorSwat May 21, 2026
f6e774c
Update demos & change default STT model for iOS simulator
IgorSwat May 21, 2026
89bdd17
Apply review suggestions
IgorSwat May 22, 2026
ae72c0d
fetch latest phonemis version
IgorSwat May 22, 2026
3843755
fix(vad): guard audioBuffer_.size() with mutex in stream loop
msluszniak May 22, 2026
06aac93
fix(vad): copy stream snapshot under lock to avoid dangling span
msluszniak May 22, 2026
e3e4d2b
fix(vad): use std::max when extending merged segment end
msluszniak May 22, 2026
069cf74
fix(vad): reset isStreaming in finally on natural stream() finish
msluszniak May 22, 2026
ffc6513
refactor(stt): copy options before disabling VAD for finish()
msluszniak May 22, 2026
c1af612
refactor(vad): make streamInsert take std::span<const float>
msluszniak May 22, 2026
30e7fb6
fix(stt): use ?? for vadDetectionMargin default
msluszniak May 22, 2026
6cac582
docs(stt): document kVadGapFactor
msluszniak May 22, 2026
7ec8f9b
style(vad): add trailing newline to Constants.h
msluszniak May 22, 2026
1788e9a
style(vad): add trailing newline to Utils.h
msluszniak May 22, 2026
f4f4ef7
fix(stt): static_assert kVadDeadSamplesRemovalSamples > safety margin
msluszniak May 22, 2026
57b7373
test(stt): update SpeechToTextTests for the new 5-arg constructor
msluszniak May 22, 2026
3609232
test(vad): add mergeSegments unit tests and stream lifecycle tests
msluszniak May 22, 2026
e65731e
test(stt): add VAD integration coverage
msluszniak May 22, 2026
44b4a39
fix(jsi): add std::span<const float> conversion specialization
msluszniak May 22, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion apps/llm/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
"metro-config": "^0.83.0",
"react": "19.2.5",
"react-native": "0.83.4",
"react-native-audio-api": "0.12.0",
"react-native-audio-api": "0.12.2",
"react-native-device-info": "^15.0.2",
"react-native-executorch": "workspace:*",
"react-native-executorch-expo-resource-fetcher": "workspace:*",
Expand Down
18 changes: 17 additions & 1 deletion apps/speech/App.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import React, { useState } from 'react';
import { View, Text, StyleSheet, TouchableOpacity } from 'react-native';
import { TextToSpeechScreen } from './screens/TextToSpeechScreen';
import { SpeechToTextScreen } from './screens/SpeechToTextScreen';
import { VoiceActivityDetectionScreen } from './screens/VoiceActivityDetectionScreen';
import ColorPalette from './colors';
import ExecutorchLogo from './assets/executorch.svg';
import { Quiz } from './screens/Quiz';
Expand All @@ -15,7 +16,12 @@ initExecutorch({

export default function App() {
const [currentScreen, setCurrentScreen] = useState<
'menu' | 'speech-to-text' | 'text-to-speech' | 'quiz' | 'text-to-speech-llm'
| 'menu'
| 'speech-to-text'
| 'text-to-speech'
| 'quiz'
| 'text-to-speech-llm'
| 'vad'
>('menu');

const goToMenu = () => setCurrentScreen('menu');
Expand All @@ -28,6 +34,10 @@ export default function App() {
return <SpeechToTextScreen onBack={goToMenu} />;
}

if (currentScreen === 'vad') {
return <VoiceActivityDetectionScreen onBack={goToMenu} />;
}

if (currentScreen === 'quiz') {
return <Quiz onBack={goToMenu} />;
}
Expand All @@ -47,6 +57,12 @@ export default function App() {
>
<Text style={styles.buttonText}>Speech to Text</Text>
</TouchableOpacity>
<TouchableOpacity
style={styles.button}
onPress={() => setCurrentScreen('vad')}
>
<Text style={styles.buttonText}>Voice Activity Detection</Text>
</TouchableOpacity>
<TouchableOpacity
style={styles.button}
onPress={() => setCurrentScreen('text-to-speech')}
Expand Down
144 changes: 123 additions & 21 deletions apps/speech/screens/SpeechToTextScreen.tsx
Comment thread
IgorSwat marked this conversation as resolved.
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import {
KeyboardAvoidingView,
Platform,
Switch,
Keyboard,
} from 'react-native';
import { SafeAreaProvider, SafeAreaView } from 'react-native-safe-area-context';
import {
Expand All @@ -19,6 +20,7 @@ import {
} from 'react-native-executorch';
import { ModelPicker, ModelOption } from '../components/ModelPicker';
const speechToText = models.speech_to_text;
const vad = models.vad;

type STTModelSources = SpeechToTextProps['model'];

Expand Down Expand Up @@ -51,6 +53,7 @@ export const SpeechToTextScreen = ({ onBack }: { onBack: () => void }) => {

const model = useSpeechToText({
model: selectedModel,
vad: vad.fsmn_vad(),
});

const [transcription, setTranscription] =
Expand All @@ -65,6 +68,7 @@ export const SpeechToTextScreen = ({ onBack }: { onBack: () => void }) => {
} | null>(null);

const [enableTimestamps, setEnableTimestamps] = useState(false);
const [useVAD, setUseVAD] = useState(true);
const [error, setError] = useState<string | null>(null);
const [audioURL, setAudioURL] = useState('');
const [hasMicPermission, setHasMicPermission] = useState(false);
Expand Down Expand Up @@ -104,11 +108,15 @@ export const SpeechToTextScreen = ({ onBack }: { onBack: () => void }) => {
}

const handleTranscribeFromURL = async () => {
if (!audioURL.trim()) {
console.warn('Please provide a valid audio file URL');
if (!audioURL.trim() || model.isGenerating) {
if (!audioURL.trim()) {
console.warn('Please provide a valid audio file URL');
}
return;
}

Keyboard.dismiss();

// Reset previous states
setTranscription(null);
setLiveResult(null);
Expand All @@ -131,8 +139,10 @@ export const SpeechToTextScreen = ({ onBack }: { onBack: () => void }) => {
};

const handleStartTranscribeFromMicrophone = async () => {
if (!hasMicPermission) {
setError('Microphone permission denied. Please enable it in Settings.');
if (!hasMicPermission || model.isGenerating || liveTranscribing) {
if (!hasMicPermission) {
setError('Microphone permission denied. Please enable it in Settings.');
}
return;
}

Expand Down Expand Up @@ -177,7 +187,9 @@ export const SpeechToTextScreen = ({ onBack }: { onBack: () => void }) => {
try {
const streamIter = model.stream({
verbose: enableTimestamps,
timeout: 100,
timeout: 200,
useVAD: useVAD,
vadDetectionMargin: 1200,
});

for await (const { committed, nonCommitted } of streamIter) {
Expand Down Expand Up @@ -352,22 +364,64 @@ export const SpeechToTextScreen = ({ onBack }: { onBack: () => void }) => {
<Text style={styles.buttonText}> Stop Live Transcription</Text>
</TouchableOpacity>
) : (
<TouchableOpacity
disabled={recordingButtonDisabled}
onPress={handleStartTranscribeFromMicrophone}
style={[
styles.liveTranscriptionButton,
styles.backgroundBlue,
recordingButtonDisabled && styles.disabled,
]}
>
<FontAwesome name="microphone" size={20} color="white" />
<Text style={styles.buttonText}>
{isSimulator
? 'Recording is not available on Simulator'
: 'Start Live Transcription'}
</Text>
</TouchableOpacity>
<View style={styles.buttonRow}>
<TouchableOpacity
disabled={recordingButtonDisabled}
onPress={handleStartTranscribeFromMicrophone}
style={[
styles.liveTranscriptionButton,
styles.backgroundBlue,
styles.flex1,
recordingButtonDisabled && styles.disabled,
]}
>
<FontAwesome name="microphone" size={20} color="white" />
<Text style={styles.buttonText}>
{isSimulator ? 'No Mic' : 'Start Live'}
</Text>
</TouchableOpacity>

<TouchableOpacity
onPress={() => setUseVAD(!useVAD)}
activeOpacity={0.7}
accessibilityRole="switch"
accessibilityState={{ checked: useVAD }}
accessibilityLabel={`Voice Activity Detection ${useVAD ? 'on' : 'off'}`}
style={[
styles.vadButton,
useVAD ? styles.vadActive : styles.vadInactive,
recordingButtonDisabled && styles.disabled,
]}
>
<FontAwesome
name={useVAD ? 'check-circle' : 'circle-o'}
size={18}
color={useVAD ? '#ffffff' : '#94a3b8'}
/>
<View style={styles.vadTextContainer}>
<Text
style={[
styles.vadButtonLabel,
useVAD
? styles.vadButtonLabelActive
: styles.vadButtonLabelInactive,
]}
>
VAD
</Text>
<Text
style={[
styles.vadButtonState,
useVAD
? styles.vadButtonStateActive
: styles.vadButtonStateInactive,
]}
>
{useVAD ? 'ON' : 'OFF'}
</Text>
</View>
</TouchableOpacity>
</View>
)}
</View>
</KeyboardAvoidingView>
Expand Down Expand Up @@ -492,6 +546,54 @@ const styles = StyleSheet.create({
backgroundBlue: {
backgroundColor: '#0f186e',
},
buttonRow: {
flexDirection: 'row',
gap: 8,
marginTop: 12,
},
flex1: {
flex: 1,
marginTop: 0,
},
vadButton: {
flexDirection: 'row',
alignItems: 'center',
justifyContent: 'center',
paddingHorizontal: 14,
borderRadius: 12,
gap: 10,
},
vadActive: {
backgroundColor: '#0f186e',
},
vadInactive: {
backgroundColor: '#f1f5f9',
},
vadTextContainer: {
alignItems: 'flex-start',
},
vadButtonLabel: {
fontWeight: '800',
fontSize: 13,
letterSpacing: 0.5,
},
vadButtonLabelActive: {
color: 'white',
},
vadButtonLabelInactive: {
color: '#64748b',
},
vadButtonState: {
fontWeight: '700',
fontSize: 10,
letterSpacing: 1,
},
vadButtonStateActive: {
color: '#bbf7d0',
},
vadButtonStateInactive: {
color: '#94a3b8',
},
disabled: {
opacity: 0.5,
},
Expand Down
2 changes: 2 additions & 0 deletions apps/speech/screens/TextToSpeechScreen.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import {
TextInput,
KeyboardAvoidingView,
Platform,
Keyboard,
} from 'react-native';
import { SafeAreaProvider, SafeAreaView } from 'react-native-safe-area-context';
import {
Expand Down Expand Up @@ -124,6 +125,7 @@ export const TextToSpeechScreen = ({ onBack }: { onBack: () => void }) => {
return;
}

Keyboard.dismiss();
setIsPlaying(true);

try {
Expand Down
Loading
Loading