From 1b656cf8b0a86333c693d969614c0da90fa0273d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20S=C5=82uszniak?= Date: Thu, 19 Mar 2026 14:05:45 +0100 Subject: [PATCH 1/9] chore: Add basic statistics to demo apps --- .../app/classification/index.tsx | 5 ++ .../app/object_detection/index.tsx | 8 +++ apps/computer-vision/app/ocr/index.tsx | 8 +++ .../app/ocr_vertical/index.tsx | 8 +++ .../app/semantic_segmentation/index.tsx | 5 ++ .../app/style_transfer/index.tsx | 5 ++ .../app/text_to_image/index.tsx | 26 ++++++++++ apps/computer-vision/components/StatsBar.tsx | 44 ++++++++++++++++ apps/llm/app/llm/index.tsx | 6 ++- apps/llm/app/llm_structured_output/index.tsx | 8 ++- apps/llm/app/llm_tool_calling/index.tsx | 6 ++- apps/llm/app/multimodal_llm/index.tsx | 5 ++ apps/llm/components/StatsBar.tsx | 40 +++++++++++++++ apps/llm/hooks/useLLMStats.ts | 50 +++++++++++++++++++ apps/speech/screens/SpeechToTextScreen.tsx | 16 ++++++ .../app/clip-embeddings/index.tsx | 19 +++++++ .../app/text-embeddings/index.tsx | 16 ++++++ 17 files changed, 271 insertions(+), 4 deletions(-) create mode 100644 apps/computer-vision/components/StatsBar.tsx create mode 100644 apps/llm/components/StatsBar.tsx create mode 100644 apps/llm/hooks/useLLMStats.ts diff --git a/apps/computer-vision/app/classification/index.tsx b/apps/computer-vision/app/classification/index.tsx index 97dde1727d..c22e2fcd4b 100644 --- a/apps/computer-vision/app/classification/index.tsx +++ b/apps/computer-vision/app/classification/index.tsx @@ -9,12 +9,14 @@ import { BottomBar } from '../../components/BottomBar'; import React, { useContext, useEffect, useState } from 'react'; import { GeneratingContext } from '../../context'; import ScreenWrapper from '../../ScreenWrapper'; +import { StatsBar } from '../../components/StatsBar'; export default function ClassificationScreen() { const [results, setResults] = useState<{ label: string; score: number }[]>( [] ); const [imageUri, setImageUri] = useState(''); + const [inferenceTime, setInferenceTime] = useState(null); const model = useClassification({ model: EFFICIENTNET_V2_S_QUANTIZED }); const { setGlobalGenerating } = useContext(GeneratingContext); @@ -34,7 +36,9 @@ export default function ClassificationScreen() { const runForward = async () => { if (imageUri) { try { + const start = Date.now(); const output = await model.forward(imageUri); + setInferenceTime(Date.now() - start); const top10 = Object.entries(output) .sort(([, a], [, b]) => (b as number) - (a as number)) .slice(0, 10) @@ -80,6 +84,7 @@ export default function ClassificationScreen() { )} + [] = [ { label: 'RF-DeTR Nano', value: RF_DETR_NANO }, @@ -29,6 +30,7 @@ export default function ObjectDetectionScreen() { }>(); const [selectedModel, setSelectedModel] = useState(RF_DETR_NANO); + const [inferenceTime, setInferenceTime] = useState(null); const model = useObjectDetection({ model: selectedModel }); const { setGlobalGenerating } = useContext(GeneratingContext); @@ -52,7 +54,9 @@ export default function ObjectDetectionScreen() { const runForward = async () => { if (imageUri) { try { + const start = Date.now(); const output = await model.forward(imageUri); + setInferenceTime(Date.now() - start); setResults(output); } catch (e) { console.error(e); @@ -100,6 +104,10 @@ export default function ObjectDetectionScreen() { setResults([]); }} /> + 0 ? results.length : null} + /> (); const [selectedModel, setSelectedModel] = useState(OCR_ENGLISH); + const [inferenceTime, setInferenceTime] = useState(null); const model = useOCR({ model: selectedModel, @@ -63,7 +65,9 @@ export default function OCRScreen() { const runForward = async () => { try { + const start = Date.now(); const output = await model.forward(imageUri); + setInferenceTime(Date.now() - start); setResults(output); } catch (e) { console.error(e); @@ -123,6 +127,10 @@ export default function OCRScreen() { setResults([]); }} /> + 0 ? results.length : null} + /> (); + const [inferenceTime, setInferenceTime] = useState(null); const model = useVerticalOCR({ model: OCR_ENGLISH, independentCharacters: true, @@ -38,7 +40,9 @@ export default function VerticalOCRScree() { const runForward = async () => { try { + const start = Date.now(); const output = await model.forward(imageUri); + setInferenceTime(Date.now() - start); setResults(output); } catch (e) { console.error(e); @@ -89,6 +93,10 @@ export default function VerticalOCRScree() { )} + 0 ? results.length : null} + /> (null); const [canvasSize, setCanvasSize] = useState({ width: 0, height: 0 }); + const [inferenceTime, setInferenceTime] = useState(null); useEffect(() => { setGlobalGenerating(isGenerating); @@ -91,6 +93,7 @@ export default function SemanticSegmentationScreen() { const runForward = async () => { if (!imageUri || imageSize.width === 0 || imageSize.height === 0) return; try { + const start = Date.now(); const { width, height } = imageSize; const output = await forward(imageUri, [], true); const argmax = output.ARGMAX || []; @@ -119,6 +122,7 @@ export default function SemanticSegmentationScreen() { width * 4 ); setSegImage(img); + setInferenceTime(Date.now() - start); } catch (e) { console.error(e); } @@ -179,6 +183,7 @@ export default function SemanticSegmentationScreen() { setSegImage(null); }} /> + (null); const handleCameraPress = async (isCamera: boolean) => { const image = await getImage(isCamera); @@ -55,7 +57,9 @@ export default function StyleTransferScreen() { const runForward = async () => { if (imageUri) { try { + const start = Date.now(); const uri = await model.forward(imageUri, 'url'); + setInferenceTime(Date.now() - start); setStyledUri(uri); } catch (e) { console.error(e); @@ -96,6 +100,7 @@ export default function StyleTransferScreen() { setStyledUri(''); }} /> + [] = [ { label: 'BK-SDM 256', value: BK_SDM_TINY_VPRED_256 }, { label: 'BK-SDM 512', value: BK_SDM_TINY_VPRED_512 }, ]; +import { BottomBarWithTextInput } from '../../components/BottomBarWithTextInput'; +import { StatsBar } from '../../components/StatsBar'; export default function TextToImageScreen() { const [inferenceStepIdx, setInferenceStepIdx] = useState(0); @@ -39,6 +41,9 @@ export default function TextToImageScreen() { const [selectedModel, setSelectedModel] = useState( BK_SDM_TINY_VPRED_256 ); + const [generationTime, setGenerationTime] = useState(null); + const [showTextInput, setShowTextInput] = useState(false); + const [keyboardVisible, setKeyboardVisible] = useState(false); const imageSize = 224; const model = useTextToImage({ @@ -55,8 +60,15 @@ export default function TextToImageScreen() { const runForward = async () => { if (!input.trim()) return; try { + const start = Date.now(); const output = await model.generate(input, imageSize, steps); if (output.length) setImage(output); + else { + setImageTitle(prevImageTitle); + return; + } + setGenerationTime(Date.now() - start); + setImage(output); } catch (e) { console.error(e); } finally { @@ -134,6 +146,20 @@ export default function TextToImageScreen() { onSubmitEditing={runForward} returnKeyType="send" /> + + + + {model.isGenerating ? ( + Inference: {inferenceTime} ms + {detectionCount != null && ( + <> + · + + {detectionCount} detection{detectionCount !== 1 ? 's' : ''} + + + )} + + ); +} + +const styles = StyleSheet.create({ + container: { + flexDirection: 'row', + justifyContent: 'center', + alignItems: 'center', + gap: 8, + paddingVertical: 6, + }, + stat: { + fontSize: 13, + color: '#334155', + fontWeight: '500', + }, + separator: { + fontSize: 13, + color: '#94A3B8', + }, +}); diff --git a/apps/llm/app/llm/index.tsx b/apps/llm/app/llm/index.tsx index ab8b8e84db..a5e7806f12 100644 --- a/apps/llm/app/llm/index.tsx +++ b/apps/llm/app/llm/index.tsx @@ -28,6 +28,8 @@ const SUGGESTED_PROMPTS = [ 'What are the benefits of on-device AI?', 'Give me 3 fun facts about space', ]; +import { useLLMStats } from '../../hooks/useLLMStats'; +import { StatsBar } from '../../components/StatsBar'; export default function LLMScreenWrapper() { const isFocused = useIsFocused(); @@ -44,6 +46,7 @@ function LLMScreen() { const { setGlobalGenerating } = useContext(GeneratingContext); const llm = useLLM({ model: selectedModel }); + const { stats, onMessageSend } = useLLMStats(llm.response, llm.isGenerating); useEffect(() => { if (llm.error) { @@ -56,6 +59,7 @@ function LLMScreen() { }, [llm.isGenerating, setGlobalGenerating]); const sendMessage = async () => { + onMessageSend(); setUserInput(''); textInputRef.current?.clear(); try { @@ -109,7 +113,7 @@ function LLMScreen() { onSelect={(m) => setSelectedModel(m)} disabled={llm.isGenerating} /> - + (null); const { setGlobalGenerating } = useContext(GeneratingContext); - const llm = useLLM({ model: selectedModel }); // try out 4B model if 1.7B struggles with following structured output + const llm = useLLM({ model: selectedModel }); + const { stats, onMessageSend } = useLLMStats(llm.response, llm.isGenerating); useEffect(() => { setGlobalGenerating(llm.isGenerating); @@ -136,6 +139,7 @@ function LLMScreen() { }, [llm.error]); const sendMessage = async () => { + onMessageSend(); setUserInput(''); textInputRef.current?.clear(); try { @@ -190,7 +194,7 @@ function LLMScreen() { onSelect={(m) => setSelectedModel(m)} disabled={llm.isGenerating} /> - + { setGlobalGenerating(llm.isGenerating); @@ -158,6 +161,7 @@ function LLMToolCallingScreen() { }, []); const sendMessage = async () => { + onMessageSend(); setUserInput(''); textInputRef.current?.clear(); try { @@ -235,7 +239,7 @@ function LLMToolCallingScreen() { onSelect={(m) => setSelectedModel(m)} disabled={llm.isGenerating} /> - + { setGlobalGenerating(vlm.isGenerating); @@ -63,6 +66,7 @@ function MultimodalLLMScreen() { const sendMessage = async () => { if (!userInput.trim() || vlm.isGenerating) return; + onMessageSend(); const text = userInput.trim(); setUserInput(''); textInputRef.current?.clear(); @@ -138,6 +142,7 @@ function MultimodalLLMScreen() { )} + {/* Image picker button */} + TTFT: {stats.ttft} ms + · + {stats.tokensPerSec} tok/s + · + ~{stats.totalTokens} tokens + + ); +} + +const styles = StyleSheet.create({ + container: { + flexDirection: 'row', + justifyContent: 'center', + alignItems: 'center', + gap: 8, + paddingVertical: 6, + }, + stat: { + fontSize: 13, + color: '#334155', + fontWeight: '500', + }, + separator: { + fontSize: 13, + color: '#94A3B8', + }, +}); diff --git a/apps/llm/hooks/useLLMStats.ts b/apps/llm/hooks/useLLMStats.ts new file mode 100644 index 0000000000..b798947199 --- /dev/null +++ b/apps/llm/hooks/useLLMStats.ts @@ -0,0 +1,50 @@ +import { useEffect, useRef, useState } from 'react'; + +export interface LLMStats { + ttft: number; + tokensPerSec: number; + totalTokens: number; +} + +export function useLLMStats(response: string, isGenerating: boolean) { + const sendTimeRef = useRef(null); + const firstTokenTimeRef = useRef(null); + const lastResponseRef = useRef(''); + const [stats, setStats] = useState(null); + + useEffect(() => { + if (isGenerating && response.length > 0) { + lastResponseRef.current = response; + if (firstTokenTimeRef.current === null && sendTimeRef.current !== null) { + firstTokenTimeRef.current = Date.now(); + } + } + }, [response, isGenerating]); + + useEffect(() => { + if ( + !isGenerating && + sendTimeRef.current !== null && + firstTokenTimeRef.current !== null + ) { + const endTime = Date.now(); + const ttft = firstTokenTimeRef.current - sendTimeRef.current; + const totalTime = (endTime - firstTokenTimeRef.current) / 1000; + const totalTokens = Math.round(lastResponseRef.current.length / 4); + const tokensPerSec = + totalTime > 0 ? Math.round(totalTokens / totalTime) : 0; + setStats({ ttft, tokensPerSec, totalTokens }); + sendTimeRef.current = null; + firstTokenTimeRef.current = null; + } + }, [isGenerating]); + + const onMessageSend = () => { + sendTimeRef.current = Date.now(); + firstTokenTimeRef.current = null; + lastResponseRef.current = ''; + setStats(null); + }; + + return { stats, onMessageSend }; +} diff --git a/apps/speech/screens/SpeechToTextScreen.tsx b/apps/speech/screens/SpeechToTextScreen.tsx index 354431b855..7fdf7f392f 100644 --- a/apps/speech/screens/SpeechToTextScreen.tsx +++ b/apps/speech/screens/SpeechToTextScreen.tsx @@ -54,6 +54,9 @@ export const SpeechToTextScreen = ({ onBack }: { onBack: () => void }) => { const [transcription, setTranscription] = useState(null); + const [transcriptionTime, setTranscriptionTime] = useState( + null + ); const [liveResult, setLiveResult] = useState<{ fullText: string; @@ -113,9 +116,11 @@ export const SpeechToTextScreen = ({ onBack }: { onBack: () => void }) => { try { const decodedAudioData = await audioContext.decodeAudioData(uri); const audioBuffer = decodedAudioData.getChannelData(0); + const start = Date.now(); const result = await model.transcribe(audioBuffer, { verbose: enableTimestamps, }); + setTranscriptionTime(Date.now() - start); setTranscription(result); } catch (error) { console.error('Error decoding audio data', error); @@ -252,6 +257,11 @@ export const SpeechToTextScreen = ({ onBack }: { onBack: () => void }) => { Status: {getModelStatus()} + {transcriptionTime !== null && ( + + Transcription: {transcriptionTime} ms + + )} ([]); + const [embeddingTime, setEmbeddingTime] = useState(null); useEffect( () => { @@ -75,7 +76,9 @@ function ClipEmbeddingsScreen() { if (!textModel.isReady || !inputSentence.trim()) return; try { + const start = Date.now(); const inputEmbedding = await textModel.forward(inputSentence); + setEmbeddingTime(Date.now() - start); const matches = sentencesWithEmbeddings.map( ({ sentence, embedding }) => ({ sentence, @@ -93,7 +96,9 @@ function ClipEmbeddingsScreen() { if (!textModel.isReady || !inputSentence.trim()) return; try { + const start = Date.now(); const embedding = await textModel.forward(inputSentence); + setEmbeddingTime(Date.now() - start); setSentencesWithEmbeddings((prev) => [ ...prev, { sentence: inputSentence, embedding }, @@ -123,9 +128,12 @@ function ClipEmbeddingsScreen() { return; try { + const start = Date.now(); + // Array.from to get numbers[] const inputImageEmbedding = await imageModel.forward( output.assets[0].uri ); + setEmbeddingTime(Date.now() - start); const matches = sentencesWithEmbeddings.map( ({ sentence, embedding }) => ({ @@ -263,6 +271,11 @@ function ClipEmbeddingsScreen() { + {embeddingTime !== null && ( + + Embedding time: {embeddingTime} ms + + )} {topMatches.length > 0 && ( Top Matches @@ -375,6 +388,12 @@ const styles = StyleSheet.create({ topMatchesContainer: { marginTop: 20, }, + statsText: { + fontSize: 13, + color: '#64748B', + marginTop: 8, + textAlign: 'center', + }, flexContainer: { flex: 1, }, diff --git a/apps/text-embeddings/app/text-embeddings/index.tsx b/apps/text-embeddings/app/text-embeddings/index.tsx index 3e43004dde..40809a8e9b 100644 --- a/apps/text-embeddings/app/text-embeddings/index.tsx +++ b/apps/text-embeddings/app/text-embeddings/index.tsx @@ -31,6 +31,7 @@ function TextEmbeddingsScreen() { const [topMatches, setTopMatches] = useState< { sentence: string; similarity: number }[] >([]); + const [embeddingTime, setEmbeddingTime] = useState(null); useEffect( () => { @@ -66,7 +67,9 @@ function TextEmbeddingsScreen() { if (!model.isReady || !inputSentence.trim()) return; try { + const start = Date.now(); const inputEmbedding = await model.forward(inputSentence); + setEmbeddingTime(Date.now() - start); const matches = sentencesWithEmbeddings.map( ({ sentence, embedding }) => ({ sentence, @@ -84,7 +87,9 @@ function TextEmbeddingsScreen() { if (!model.isReady || !inputSentence.trim()) return; try { + const start = Date.now(); const embedding = await model.forward(inputSentence); + setEmbeddingTime(Date.now() - start); setSentencesWithEmbeddings((prev) => [ ...prev, { sentence: inputSentence, embedding }, @@ -217,6 +222,11 @@ function TextEmbeddingsScreen() { + {embeddingTime !== null && ( + + Embedding time: {embeddingTime} ms + + )} {topMatches.length > 0 && ( Top Matches @@ -329,6 +339,12 @@ const styles = StyleSheet.create({ topMatchesContainer: { marginTop: 20, }, + statsText: { + fontSize: 13, + color: '#64748B', + marginTop: 8, + textAlign: 'center', + }, flexContainer: { flex: 1, }, From cbabf515c5a2176f1e06cd7e74a1be7bbc6f652c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20S=C5=82uszniak?= Date: Fri, 20 Mar 2026 14:07:45 +0100 Subject: [PATCH 2/9] chore: handle rebased, magled code of app in text to image --- .../app/text_to_image/index.tsx | 33 +++++-------------- 1 file changed, 9 insertions(+), 24 deletions(-) diff --git a/apps/computer-vision/app/text_to_image/index.tsx b/apps/computer-vision/app/text_to_image/index.tsx index 34d575955b..b5e80bab63 100644 --- a/apps/computer-vision/app/text_to_image/index.tsx +++ b/apps/computer-vision/app/text_to_image/index.tsx @@ -23,6 +23,7 @@ import { GeneratingContext } from '../../context'; import ColorPalette from '../../colors'; import ProgressBar from '../../components/ProgressBar'; import { Ionicons } from '@expo/vector-icons'; +import { StatsBar } from '../../components/StatsBar'; type TextToImageModelSources = TextToImageProps['model']; @@ -30,8 +31,6 @@ const MODELS: ModelOption[] = [ { label: 'BK-SDM 256', value: BK_SDM_TINY_VPRED_256 }, { label: 'BK-SDM 512', value: BK_SDM_TINY_VPRED_512 }, ]; -import { BottomBarWithTextInput } from '../../components/BottomBarWithTextInput'; -import { StatsBar } from '../../components/StatsBar'; export default function TextToImageScreen() { const [inferenceStepIdx, setInferenceStepIdx] = useState(0); @@ -42,8 +41,6 @@ export default function TextToImageScreen() { BK_SDM_TINY_VPRED_256 ); const [generationTime, setGenerationTime] = useState(null); - const [showTextInput, setShowTextInput] = useState(false); - const [keyboardVisible, setKeyboardVisible] = useState(false); const imageSize = 224; const model = useTextToImage({ @@ -62,13 +59,11 @@ export default function TextToImageScreen() { try { const start = Date.now(); const output = await model.generate(input, imageSize, steps); - if (output.length) setImage(output); - else { - setImageTitle(prevImageTitle); - return; + + if (output.length) { + setImage(output); + setGenerationTime(Date.now() - start); } - setGenerationTime(Date.now() - start); - setImage(output); } catch (e) { console.error(e); } finally { @@ -117,6 +112,7 @@ export default function TextToImageScreen() { onSelect={(m) => { setSelectedModel(m); setImage(null); + setGenerationTime(null); }} /> @@ -136,6 +132,9 @@ export default function TextToImageScreen() { + {/* Added StatsBar here, just above the input row */} + + - - - - {model.isGenerating ? ( Date: Fri, 20 Mar 2026 18:18:15 +0100 Subject: [PATCH 3/9] Fix T2S streaming on unfinished sentences --- .../hooks/natural_language_processing/useTextToSpeech.ts | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/packages/react-native-executorch/src/hooks/natural_language_processing/useTextToSpeech.ts b/packages/react-native-executorch/src/hooks/natural_language_processing/useTextToSpeech.ts index b5e03ceb59..471dc2a2a1 100644 --- a/packages/react-native-executorch/src/hooks/natural_language_processing/useTextToSpeech.ts +++ b/packages/react-native-executorch/src/hooks/natural_language_processing/useTextToSpeech.ts @@ -119,7 +119,12 @@ export const useTextToSpeech = ({ setIsGenerating(true); try { if (input.text) { - instance.streamInsert(input.text); + // If the initial text does not end with an end of sentence character, + // we add an artificial dot to improve output's quality. + instance.streamInsert( + input.text + + ('.?!;'.includes(input.text.trim().slice(-1)) ? '' : '.') + ); } await input.onBegin?.(); From 7bc532e3a28078cefdd61d02358837ef5647bdd1 Mon Sep 17 00:00:00 2001 From: Mateusz Sluszniak <56299341+msluszniak@users.noreply.github.com> Date: Sat, 21 Mar 2026 12:24:04 +0100 Subject: [PATCH 4/9] Apply suggestions from code review Co-authored-by: Bartosz Hanc --- apps/computer-vision/app/text_to_image/index.tsx | 1 - apps/text-embeddings/app/clip-embeddings/index.tsx | 1 - 2 files changed, 2 deletions(-) diff --git a/apps/computer-vision/app/text_to_image/index.tsx b/apps/computer-vision/app/text_to_image/index.tsx index b5e80bab63..9b007be998 100644 --- a/apps/computer-vision/app/text_to_image/index.tsx +++ b/apps/computer-vision/app/text_to_image/index.tsx @@ -132,7 +132,6 @@ export default function TextToImageScreen() { - {/* Added StatsBar here, just above the input row */} diff --git a/apps/text-embeddings/app/clip-embeddings/index.tsx b/apps/text-embeddings/app/clip-embeddings/index.tsx index a373d799d0..b97d18ca87 100644 --- a/apps/text-embeddings/app/clip-embeddings/index.tsx +++ b/apps/text-embeddings/app/clip-embeddings/index.tsx @@ -129,7 +129,6 @@ function ClipEmbeddingsScreen() { try { const start = Date.now(); - // Array.from to get numbers[] const inputImageEmbedding = await imageModel.forward( output.assets[0].uri ); From 64d4f441765f1ed05c65d6f2f0fc8f6425391349 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20S=C5=82uszniak?= Date: Sat, 21 Mar 2026 12:52:41 +0100 Subject: [PATCH 5/9] chore: add model selection and basic statistics to instance segmentation --- .../app/instance_segmentation/index.tsx | 48 ++++++++++++++++++- 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/apps/computer-vision/app/instance_segmentation/index.tsx b/apps/computer-vision/app/instance_segmentation/index.tsx index e49594eb0a..f833ffa24e 100644 --- a/apps/computer-vision/app/instance_segmentation/index.tsx +++ b/apps/computer-vision/app/instance_segmentation/index.tsx @@ -1,7 +1,17 @@ import Spinner from '../../components/Spinner'; import { BottomBar } from '../../components/BottomBar'; import { getImage } from '../../utils'; -import { useInstanceSegmentation, YOLO26N_SEG } from 'react-native-executorch'; +import { ModelPicker, ModelOption } from '../../components/ModelPicker'; +import { + useInstanceSegmentation, + YOLO26N_SEG, + YOLO26S_SEG, + YOLO26M_SEG, + YOLO26L_SEG, + YOLO26X_SEG, + RF_DETR_NANO_SEG, + InstanceSegmentationModelSources, +} from 'react-native-executorch'; import { View, StyleSheet, @@ -16,8 +26,22 @@ import ImageWithMasks, { buildDisplayInstances, DisplayInstance, } from '../../components/ImageWithMasks'; +import { StatsBar } from '../../components/StatsBar'; + +const MODELS: ModelOption[] = [ + { label: 'Yolo26N', value: YOLO26N_SEG }, + { label: 'Yolo26S', value: YOLO26S_SEG }, + { label: 'Yolo26M', value: YOLO26M_SEG }, + { label: 'Yolo26L', value: YOLO26L_SEG }, + { label: 'Yolo26X', value: YOLO26X_SEG }, + { label: 'RF-DeTR Nano', value: RF_DETR_NANO_SEG }, +]; export default function InstanceSegmentationScreen() { + const [selectedModel, setSelectedModel] = + useState(YOLO26N_SEG); + const [inferenceTime, setInferenceTime] = useState(null); + const { setGlobalGenerating } = useContext(GeneratingContext); const { @@ -28,7 +52,7 @@ export default function InstanceSegmentationScreen() { error, getAvailableInputSizes, } = useInstanceSegmentation({ - model: YOLO26N_SEG, + model: selectedModel, }); const [imageUri, setImageUri] = useState(''); @@ -60,12 +84,14 @@ export default function InstanceSegmentationScreen() { height: image.height ?? 0, }); setInstances([]); + setInferenceTime(null); }; const runForward = async () => { if (!imageUri || imageSize.width === 0 || imageSize.height === 0) return; try { + const start = Date.now(); const output = await forward(imageUri, { confidenceThreshold: 0.5, iouThreshold: 0.55, @@ -74,6 +100,8 @@ export default function InstanceSegmentationScreen() { inputSize: selectedInputSize ?? undefined, }); + setInferenceTime(Date.now() - start); + // Convert raw masks → small Skia images immediately. // Raw Uint8Array mask buffers (backed by native OwningArrayBuffer) // go out of scope here and become eligible for GC right away. @@ -168,6 +196,22 @@ export default function InstanceSegmentationScreen() { )} + { + setSelectedModel(m); + setInstances([]); + setInferenceTime(null); + }} + /> + + 0 ? instances.length : null} + /> + Date: Sat, 21 Mar 2026 13:13:08 +0100 Subject: [PATCH 6/9] chore: split inference time in image embeddings example --- .../app/clip-embeddings/index.tsx | 83 ++++++++----------- 1 file changed, 36 insertions(+), 47 deletions(-) diff --git a/apps/text-embeddings/app/clip-embeddings/index.tsx b/apps/text-embeddings/app/clip-embeddings/index.tsx index b97d18ca87..e9831c6be8 100644 --- a/apps/text-embeddings/app/clip-embeddings/index.tsx +++ b/apps/text-embeddings/app/clip-embeddings/index.tsx @@ -40,7 +40,13 @@ function ClipEmbeddingsScreen() { const [topMatches, setTopMatches] = useState< { sentence: string; similarity: number }[] >([]); - const [embeddingTime, setEmbeddingTime] = useState(null); + + const [textEmbeddingTime, setTextEmbeddingTime] = useState( + null + ); + const [imageEmbeddingTime, setImageEmbeddingTime] = useState( + null + ); useEffect( () => { @@ -55,11 +61,15 @@ function ClipEmbeddingsScreen() { ]; try { + const start = Date.now(); const embeddings = []; + for (const sentence of sentences) { const embedding = await textModel.forward(sentence); embeddings.push({ sentence, embedding }); } + + setTextEmbeddingTime(Date.now() - start); setSentencesWithEmbeddings(embeddings); } catch (error) { console.error('Error generating embeddings:', error); @@ -78,7 +88,8 @@ function ClipEmbeddingsScreen() { try { const start = Date.now(); const inputEmbedding = await textModel.forward(inputSentence); - setEmbeddingTime(Date.now() - start); + setTextEmbeddingTime(Date.now() - start); + const matches = sentencesWithEmbeddings.map( ({ sentence, embedding }) => ({ sentence, @@ -98,7 +109,8 @@ function ClipEmbeddingsScreen() { try { const start = Date.now(); const embedding = await textModel.forward(inputSentence); - setEmbeddingTime(Date.now() - start); + setTextEmbeddingTime(Date.now() - start); + setSentencesWithEmbeddings((prev) => [ ...prev, { sentence: inputSentence, embedding }, @@ -119,6 +131,7 @@ function ClipEmbeddingsScreen() { console.error('Error clearing the list:', error); } }; + const checkImage = async () => { if (!imageModel.isReady) return; @@ -132,7 +145,7 @@ function ClipEmbeddingsScreen() { const inputImageEmbedding = await imageModel.forward( output.assets[0].uri ); - setEmbeddingTime(Date.now() - start); + setImageEmbeddingTime(Date.now() - start); const matches = sentencesWithEmbeddings.map( ({ sentence, embedding }) => ({ @@ -270,11 +283,18 @@ function ClipEmbeddingsScreen() { - {embeddingTime !== null && ( + + {textEmbeddingTime !== null && ( + + Text Embedding time: {textEmbeddingTime} ms + + )} + {imageEmbeddingTime !== null && ( - Embedding time: {embeddingTime} ms + Image Embedding time: {imageEmbeddingTime} ms )} + {topMatches.length > 0 && ( Top Matches @@ -293,15 +313,8 @@ function ClipEmbeddingsScreen() { } const styles = StyleSheet.create({ - container: { - flex: 1, - backgroundColor: '#F8FAFC', - }, - scrollContainer: { - padding: 20, - alignItems: 'center', - flexGrow: 1, - }, + container: { flex: 1, backgroundColor: '#F8FAFC' }, + scrollContainer: { padding: 20, alignItems: 'center', flexGrow: 1 }, heading: { fontSize: 24, fontWeight: '500', @@ -323,11 +336,7 @@ const styles = StyleSheet.create({ marginBottom: 12, color: '#1E293B', }, - sentenceText: { - fontSize: 14, - marginBottom: 6, - color: '#334155', - }, + sentenceText: { fontSize: 14, marginBottom: 6, color: '#334155' }, input: { backgroundColor: '#F1F5F9', borderRadius: 10, @@ -338,10 +347,7 @@ const styles = StyleSheet.create({ minHeight: 40, textAlignVertical: 'top', }, - buttonContainer: { - width: '100%', - gap: 10, - }, + buttonContainer: { width: '100%', gap: 10 }, buttonGroup: { flexDirection: 'row', justifyContent: 'space-between', @@ -367,33 +373,16 @@ const styles = StyleSheet.create({ alignItems: 'center', justifyContent: 'center', }, - buttonDisabled: { - backgroundColor: '#f0f0f0', - borderColor: '#d3d3d3', - }, - buttonText: { - color: 'white', - textAlign: 'center', - fontWeight: '500', - }, - buttonTextOutline: { - color: 'navy', - textAlign: 'center', - fontWeight: '500', - }, - buttonTextDisabled: { - color: 'gray', - }, - topMatchesContainer: { - marginTop: 20, - }, + buttonDisabled: { backgroundColor: '#f0f0f0', borderColor: '#d3d3d3' }, + buttonText: { color: 'white', textAlign: 'center', fontWeight: '500' }, + buttonTextOutline: { color: 'navy', textAlign: 'center', fontWeight: '500' }, + buttonTextDisabled: { color: 'gray' }, + topMatchesContainer: { marginTop: 20 }, statsText: { fontSize: 13, color: '#64748B', marginTop: 8, textAlign: 'center', }, - flexContainer: { - flex: 1, - }, + flexContainer: { flex: 1 }, }); From 55c912f3606749b7f4901da58bf4c707af0da1f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20S=C5=82uszniak?= Date: Sat, 21 Mar 2026 13:16:46 +0100 Subject: [PATCH 7/9] chore: reformat From f3dbcb54c6a514043cbe2c4458fdee8f7f150b88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20S=C5=82uszniak?= Date: Mon, 23 Mar 2026 12:25:49 +0100 Subject: [PATCH 8/9] chore: add suggestions from code review --- apps/computer-vision/app/classification/index.tsx | 1 + apps/computer-vision/app/object_detection/index.tsx | 1 + apps/computer-vision/app/ocr/index.tsx | 1 + apps/computer-vision/app/ocr_vertical/index.tsx | 1 + .../app/semantic_segmentation/index.tsx | 1 + apps/computer-vision/app/style_transfer/index.tsx | 1 + apps/llm/app/llm/index.tsx | 7 ++++++- apps/llm/app/llm_structured_output/index.tsx | 7 ++++++- apps/llm/app/llm_tool_calling/index.tsx | 7 ++++++- apps/llm/app/multimodal_llm/index.tsx | 7 ++++++- apps/llm/components/StatsBar.tsx | 2 +- apps/llm/hooks/useLLMStats.ts | 11 +++++++---- .../common/rnexecutorch/models/llm/LLM.cpp | 8 ++++---- .../common/rnexecutorch/models/llm/LLM.h | 4 ++-- 14 files changed, 44 insertions(+), 15 deletions(-) diff --git a/apps/computer-vision/app/classification/index.tsx b/apps/computer-vision/app/classification/index.tsx index c22e2fcd4b..79469b156c 100644 --- a/apps/computer-vision/app/classification/index.tsx +++ b/apps/computer-vision/app/classification/index.tsx @@ -30,6 +30,7 @@ export default function ClassificationScreen() { if (typeof uri === 'string') { setImageUri(uri as string); setResults([]); + setInferenceTime(null); } }; diff --git a/apps/computer-vision/app/object_detection/index.tsx b/apps/computer-vision/app/object_detection/index.tsx index 361c38c6e8..a0600d9276 100644 --- a/apps/computer-vision/app/object_detection/index.tsx +++ b/apps/computer-vision/app/object_detection/index.tsx @@ -48,6 +48,7 @@ export default function ObjectDetectionScreen() { setImageUri(image.uri as string); setImageDimensions({ width: width as number, height: height as number }); setResults([]); + setInferenceTime(null); } }; diff --git a/apps/computer-vision/app/ocr/index.tsx b/apps/computer-vision/app/ocr/index.tsx index fcfa5f4e2b..7033061a3b 100644 --- a/apps/computer-vision/app/ocr/index.tsx +++ b/apps/computer-vision/app/ocr/index.tsx @@ -60,6 +60,7 @@ export default function OCRScreen() { if (typeof uri === 'string') { setImageUri(uri as string); setResults([]); + setInferenceTime(null); } }; diff --git a/apps/computer-vision/app/ocr_vertical/index.tsx b/apps/computer-vision/app/ocr_vertical/index.tsx index bc6b7e57cf..b42a9055f0 100644 --- a/apps/computer-vision/app/ocr_vertical/index.tsx +++ b/apps/computer-vision/app/ocr_vertical/index.tsx @@ -35,6 +35,7 @@ export default function VerticalOCRScree() { if (typeof uri === 'string') { setImageUri(uri as string); setResults([]); + setInferenceTime(null); } }; diff --git a/apps/computer-vision/app/semantic_segmentation/index.tsx b/apps/computer-vision/app/semantic_segmentation/index.tsx index f40ec650c2..e8061b0597 100644 --- a/apps/computer-vision/app/semantic_segmentation/index.tsx +++ b/apps/computer-vision/app/semantic_segmentation/index.tsx @@ -88,6 +88,7 @@ export default function SemanticSegmentationScreen() { setImageUri(image.uri); setImageSize({ width: image.width ?? 0, height: image.height ?? 0 }); setSegImage(null); + setInferenceTime(null); }; const runForward = async () => { diff --git a/apps/computer-vision/app/style_transfer/index.tsx b/apps/computer-vision/app/style_transfer/index.tsx index e7ae7e9b17..08578375b8 100644 --- a/apps/computer-vision/app/style_transfer/index.tsx +++ b/apps/computer-vision/app/style_transfer/index.tsx @@ -51,6 +51,7 @@ export default function StyleTransferScreen() { if (typeof uri === 'string') { setImageUri(uri); setStyledUri(''); + setInferenceTime(null); } }; diff --git a/apps/llm/app/llm/index.tsx b/apps/llm/app/llm/index.tsx index a5e7806f12..9ef743cd43 100644 --- a/apps/llm/app/llm/index.tsx +++ b/apps/llm/app/llm/index.tsx @@ -46,7 +46,12 @@ function LLMScreen() { const { setGlobalGenerating } = useContext(GeneratingContext); const llm = useLLM({ model: selectedModel }); - const { stats, onMessageSend } = useLLMStats(llm.response, llm.isGenerating); + const tokenCount = llm.isReady ? llm.getGeneratedTokenCount() : 0; + const { stats, onMessageSend } = useLLMStats( + llm.response, + llm.isGenerating, + tokenCount + ); useEffect(() => { if (llm.error) { diff --git a/apps/llm/app/llm_structured_output/index.tsx b/apps/llm/app/llm_structured_output/index.tsx index 845504437b..18dea4af83 100644 --- a/apps/llm/app/llm_structured_output/index.tsx +++ b/apps/llm/app/llm_structured_output/index.tsx @@ -88,7 +88,12 @@ function LLMScreen() { const { setGlobalGenerating } = useContext(GeneratingContext); const llm = useLLM({ model: selectedModel }); - const { stats, onMessageSend } = useLLMStats(llm.response, llm.isGenerating); + const tokenCount = llm.isReady ? llm.getGeneratedTokenCount() : 0; + const { stats, onMessageSend } = useLLMStats( + llm.response, + llm.isGenerating, + tokenCount + ); useEffect(() => { setGlobalGenerating(llm.isGenerating); diff --git a/apps/llm/app/llm_tool_calling/index.tsx b/apps/llm/app/llm_tool_calling/index.tsx index c3f19e10f8..4ee4b913cf 100644 --- a/apps/llm/app/llm_tool_calling/index.tsx +++ b/apps/llm/app/llm_tool_calling/index.tsx @@ -60,7 +60,12 @@ function LLMToolCallingScreen() { const { setGlobalGenerating } = useContext(GeneratingContext); const llm = useLLM({ model: selectedModel }); - const { stats, onMessageSend } = useLLMStats(llm.response, llm.isGenerating); + const tokenCount = llm.isReady ? llm.getGeneratedTokenCount() : 0; + const { stats, onMessageSend } = useLLMStats( + llm.response, + llm.isGenerating, + tokenCount + ); useEffect(() => { setGlobalGenerating(llm.isGenerating); diff --git a/apps/llm/app/multimodal_llm/index.tsx b/apps/llm/app/multimodal_llm/index.tsx index ad9f7c380a..0cff3a74cc 100644 --- a/apps/llm/app/multimodal_llm/index.tsx +++ b/apps/llm/app/multimodal_llm/index.tsx @@ -46,7 +46,12 @@ function MultimodalLLMScreen() { const vlm = useLLM({ model: LFM2_VL_1_6B_QUANTIZED, }); - const { stats, onMessageSend } = useLLMStats(vlm.response, vlm.isGenerating); + const tokenCount = vlm.isReady ? vlm.getGeneratedTokenCount() : 0; + const { stats, onMessageSend } = useLLMStats( + vlm.response, + vlm.isGenerating, + tokenCount + ); useEffect(() => { setGlobalGenerating(vlm.isGenerating); diff --git a/apps/llm/components/StatsBar.tsx b/apps/llm/components/StatsBar.tsx index 7ab2e69198..20bad6cfa1 100644 --- a/apps/llm/components/StatsBar.tsx +++ b/apps/llm/components/StatsBar.tsx @@ -15,7 +15,7 @@ export function StatsBar({ stats }: Props) { · {stats.tokensPerSec} tok/s · - ~{stats.totalTokens} tokens + {stats.totalTokens} tokens ); } diff --git a/apps/llm/hooks/useLLMStats.ts b/apps/llm/hooks/useLLMStats.ts index b798947199..4cb6e7abdb 100644 --- a/apps/llm/hooks/useLLMStats.ts +++ b/apps/llm/hooks/useLLMStats.ts @@ -6,7 +6,11 @@ export interface LLMStats { totalTokens: number; } -export function useLLMStats(response: string, isGenerating: boolean) { +export function useLLMStats( + response: string, + isGenerating: boolean, + totalTokens: number +) { const sendTimeRef = useRef(null); const firstTokenTimeRef = useRef(null); const lastResponseRef = useRef(''); @@ -19,7 +23,7 @@ export function useLLMStats(response: string, isGenerating: boolean) { firstTokenTimeRef.current = Date.now(); } } - }, [response, isGenerating]); + }, [response, isGenerating, totalTokens]); useEffect(() => { if ( @@ -30,14 +34,13 @@ export function useLLMStats(response: string, isGenerating: boolean) { const endTime = Date.now(); const ttft = firstTokenTimeRef.current - sendTimeRef.current; const totalTime = (endTime - firstTokenTimeRef.current) / 1000; - const totalTokens = Math.round(lastResponseRef.current.length / 4); const tokensPerSec = totalTime > 0 ? Math.round(totalTokens / totalTime) : 0; setStats({ ttft, tokensPerSec, totalTokens }); sendTimeRef.current = null; firstTokenTimeRef.current = null; } - }, [isGenerating]); + }, [isGenerating, totalTokens]); const onMessageSend = () => { sendTimeRef.current = Date.now(); diff --git a/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.cpp b/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.cpp index 95ebed1d57..64e94c2ff0 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.cpp @@ -166,23 +166,23 @@ void LLM::reset() { runner_->reset(); } -size_t LLM::getGeneratedTokenCount() const noexcept { +int32_t LLM::getGeneratedTokenCount() const noexcept { if (!runner_ || !runner_->is_loaded()) return 0; return runner_->stats_.num_generated_tokens; } -size_t LLM::getPromptTokenCount() const noexcept { +int32_t LLM::getPromptTokenCount() const noexcept { if (!runner_ || !runner_->is_loaded()) return 0; - return runner_->stats_.num_prompt_tokens; + return static_cast(runner_->stats_.num_prompt_tokens); } int32_t LLM::getVisualTokenCount() const { if (!runner_ || !runner_->is_loaded()) { return 0; } - return runner_->get_visual_token_count(); + return static_cast(runner_->get_visual_token_count()); } int32_t LLM::countTextTokens(std::string text) const { diff --git a/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.h b/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.h index fcb93d0c18..5c9bc258d7 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.h @@ -30,8 +30,8 @@ class LLM : public BaseModel { void interrupt(); void reset(); void unload() noexcept; - size_t getGeneratedTokenCount() const noexcept; - size_t getPromptTokenCount() const noexcept; + int32_t getGeneratedTokenCount() const noexcept; + int32_t getPromptTokenCount() const noexcept; int32_t countTextTokens(std::string text) const; int32_t getVisualTokenCount() const; size_t getMemoryLowerBound() const noexcept; From dab77d7c59f9ac1bc4e1d8ad397e7d04166b46d8 Mon Sep 17 00:00:00 2001 From: Bartosz Hanc Date: Mon, 23 Mar 2026 13:25:58 +0100 Subject: [PATCH 9/9] fix: disable model picker when generating --- apps/computer-vision/components/ModelPicker.tsx | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/apps/computer-vision/components/ModelPicker.tsx b/apps/computer-vision/components/ModelPicker.tsx index dc76f5b171..991793768b 100644 --- a/apps/computer-vision/components/ModelPicker.tsx +++ b/apps/computer-vision/components/ModelPicker.tsx @@ -1,4 +1,4 @@ -import React, { useState } from 'react'; +import React, { useEffect, useState } from 'react'; import { View, StyleSheet, @@ -30,6 +30,10 @@ export function ModelPicker({ const [open, setOpen] = useState(false); const selected = models.find((m) => m.value === selectedModel); + useEffect(() => { + if (disabled) setOpen(false); + }, [disabled]); + return (