diff --git a/apps/computer-vision/app/classification/index.tsx b/apps/computer-vision/app/classification/index.tsx index 97dde1727d..79469b156c 100644 --- a/apps/computer-vision/app/classification/index.tsx +++ b/apps/computer-vision/app/classification/index.tsx @@ -9,12 +9,14 @@ import { BottomBar } from '../../components/BottomBar'; import React, { useContext, useEffect, useState } from 'react'; import { GeneratingContext } from '../../context'; import ScreenWrapper from '../../ScreenWrapper'; +import { StatsBar } from '../../components/StatsBar'; export default function ClassificationScreen() { const [results, setResults] = useState<{ label: string; score: number }[]>( [] ); const [imageUri, setImageUri] = useState(''); + const [inferenceTime, setInferenceTime] = useState(null); const model = useClassification({ model: EFFICIENTNET_V2_S_QUANTIZED }); const { setGlobalGenerating } = useContext(GeneratingContext); @@ -28,13 +30,16 @@ export default function ClassificationScreen() { if (typeof uri === 'string') { setImageUri(uri as string); setResults([]); + setInferenceTime(null); } }; const runForward = async () => { if (imageUri) { try { + const start = Date.now(); const output = await model.forward(imageUri); + setInferenceTime(Date.now() - start); const top10 = Object.entries(output) .sort(([, a], [, b]) => (b as number) - (a as number)) .slice(0, 10) @@ -80,6 +85,7 @@ export default function ClassificationScreen() { )} + [] = [ + { label: 'Yolo26N', value: YOLO26N_SEG }, + { label: 'Yolo26S', value: YOLO26S_SEG }, + { label: 'Yolo26M', value: YOLO26M_SEG }, + { label: 'Yolo26L', value: YOLO26L_SEG }, + { label: 'Yolo26X', value: YOLO26X_SEG }, + { label: 'RF-DeTR Nano', value: RF_DETR_NANO_SEG }, +]; export default function InstanceSegmentationScreen() { + const [selectedModel, setSelectedModel] = + useState(YOLO26N_SEG); + const [inferenceTime, setInferenceTime] = useState(null); + const { setGlobalGenerating } = useContext(GeneratingContext); const { @@ -28,7 +52,7 @@ export default function InstanceSegmentationScreen() { error, getAvailableInputSizes, } = useInstanceSegmentation({ - model: YOLO26N_SEG, + model: selectedModel, }); const [imageUri, setImageUri] = useState(''); @@ -60,12 +84,14 @@ export default function InstanceSegmentationScreen() { height: image.height ?? 0, }); setInstances([]); + setInferenceTime(null); }; const runForward = async () => { if (!imageUri || imageSize.width === 0 || imageSize.height === 0) return; try { + const start = Date.now(); const output = await forward(imageUri, { confidenceThreshold: 0.5, iouThreshold: 0.55, @@ -74,6 +100,8 @@ export default function InstanceSegmentationScreen() { inputSize: selectedInputSize ?? undefined, }); + setInferenceTime(Date.now() - start); + // Convert raw masks → small Skia images immediately. // Raw Uint8Array mask buffers (backed by native OwningArrayBuffer) // go out of scope here and become eligible for GC right away. @@ -168,6 +196,22 @@ export default function InstanceSegmentationScreen() { )} + { + setSelectedModel(m); + setInstances([]); + setInferenceTime(null); + }} + /> + + 0 ? instances.length : null} + /> + [] = [ { label: 'RF-DeTR Nano', value: RF_DETR_NANO }, @@ -29,6 +30,7 @@ export default function ObjectDetectionScreen() { }>(); const [selectedModel, setSelectedModel] = useState(RF_DETR_NANO); + const [inferenceTime, setInferenceTime] = useState(null); const model = useObjectDetection({ model: selectedModel }); const { setGlobalGenerating } = useContext(GeneratingContext); @@ -46,13 +48,16 @@ export default function ObjectDetectionScreen() { setImageUri(image.uri as string); setImageDimensions({ width: width as number, height: height as number }); setResults([]); + setInferenceTime(null); } }; const runForward = async () => { if (imageUri) { try { + const start = Date.now(); const output = await model.forward(imageUri); + setInferenceTime(Date.now() - start); setResults(output); } catch (e) { console.error(e); @@ -100,6 +105,10 @@ export default function ObjectDetectionScreen() { setResults([]); }} /> + 0 ? results.length : null} + /> (); const [selectedModel, setSelectedModel] = useState(OCR_ENGLISH); + const [inferenceTime, setInferenceTime] = useState(null); const model = useOCR({ model: selectedModel, @@ -58,12 +60,15 @@ export default function OCRScreen() { if (typeof uri === 'string') { setImageUri(uri as string); setResults([]); + setInferenceTime(null); } }; const runForward = async () => { try { + const start = Date.now(); const output = await model.forward(imageUri); + setInferenceTime(Date.now() - start); setResults(output); } catch (e) { console.error(e); @@ -123,6 +128,10 @@ export default function OCRScreen() { setResults([]); }} /> + 0 ? results.length : null} + /> (); + const [inferenceTime, setInferenceTime] = useState(null); const model = useVerticalOCR({ model: OCR_ENGLISH, independentCharacters: true, @@ -33,12 +35,15 @@ export default function VerticalOCRScree() { if (typeof uri === 'string') { setImageUri(uri as string); setResults([]); + setInferenceTime(null); } }; const runForward = async () => { try { + const start = Date.now(); const output = await model.forward(imageUri); + setInferenceTime(Date.now() - start); setResults(output); } catch (e) { console.error(e); @@ -89,6 +94,10 @@ export default function VerticalOCRScree() { )} + 0 ? results.length : null} + /> (null); const [canvasSize, setCanvasSize] = useState({ width: 0, height: 0 }); + const [inferenceTime, setInferenceTime] = useState(null); useEffect(() => { setGlobalGenerating(isGenerating); @@ -86,11 +88,13 @@ export default function SemanticSegmentationScreen() { setImageUri(image.uri); setImageSize({ width: image.width ?? 0, height: image.height ?? 0 }); setSegImage(null); + setInferenceTime(null); }; const runForward = async () => { if (!imageUri || imageSize.width === 0 || imageSize.height === 0) return; try { + const start = Date.now(); const { width, height } = imageSize; const output = await forward(imageUri, [], true); const argmax = output.ARGMAX || []; @@ -119,6 +123,7 @@ export default function SemanticSegmentationScreen() { width * 4 ); setSegImage(img); + setInferenceTime(Date.now() - start); } catch (e) { console.error(e); } @@ -179,6 +184,7 @@ export default function SemanticSegmentationScreen() { setSegImage(null); }} /> + (null); const handleCameraPress = async (isCamera: boolean) => { const image = await getImage(isCamera); @@ -49,13 +51,16 @@ export default function StyleTransferScreen() { if (typeof uri === 'string') { setImageUri(uri); setStyledUri(''); + setInferenceTime(null); } }; const runForward = async () => { if (imageUri) { try { + const start = Date.now(); const uri = await model.forward(imageUri, 'url'); + setInferenceTime(Date.now() - start); setStyledUri(uri); } catch (e) { console.error(e); @@ -96,6 +101,7 @@ export default function StyleTransferScreen() { setStyledUri(''); }} /> + ( BK_SDM_TINY_VPRED_256 ); + const [generationTime, setGenerationTime] = useState(null); const imageSize = 224; const model = useTextToImage({ @@ -55,8 +57,13 @@ export default function TextToImageScreen() { const runForward = async () => { if (!input.trim()) return; try { + const start = Date.now(); const output = await model.generate(input, imageSize, steps); - if (output.length) setImage(output); + + if (output.length) { + setImage(output); + setGenerationTime(Date.now() - start); + } } catch (e) { console.error(e); } finally { @@ -105,6 +112,7 @@ export default function TextToImageScreen() { onSelect={(m) => { setSelectedModel(m); setImage(null); + setGenerationTime(null); }} /> @@ -124,6 +132,8 @@ export default function TextToImageScreen() { + + ({ const [open, setOpen] = useState(false); const selected = models.find((m) => m.value === selectedModel); + useEffect(() => { + if (disabled) setOpen(false); + }, [disabled]); + return ( + Inference: {inferenceTime} ms + {detectionCount != null && ( + <> + · + + {detectionCount} detection{detectionCount !== 1 ? 's' : ''} + + + )} + + ); +} + +const styles = StyleSheet.create({ + container: { + flexDirection: 'row', + justifyContent: 'center', + alignItems: 'center', + gap: 8, + paddingVertical: 6, + }, + stat: { + fontSize: 13, + color: '#334155', + fontWeight: '500', + }, + separator: { + fontSize: 13, + color: '#94A3B8', + }, +}); diff --git a/apps/llm/app/llm/index.tsx b/apps/llm/app/llm/index.tsx index ab8b8e84db..9ef743cd43 100644 --- a/apps/llm/app/llm/index.tsx +++ b/apps/llm/app/llm/index.tsx @@ -28,6 +28,8 @@ const SUGGESTED_PROMPTS = [ 'What are the benefits of on-device AI?', 'Give me 3 fun facts about space', ]; +import { useLLMStats } from '../../hooks/useLLMStats'; +import { StatsBar } from '../../components/StatsBar'; export default function LLMScreenWrapper() { const isFocused = useIsFocused(); @@ -44,6 +46,12 @@ function LLMScreen() { const { setGlobalGenerating } = useContext(GeneratingContext); const llm = useLLM({ model: selectedModel }); + const tokenCount = llm.isReady ? llm.getGeneratedTokenCount() : 0; + const { stats, onMessageSend } = useLLMStats( + llm.response, + llm.isGenerating, + tokenCount + ); useEffect(() => { if (llm.error) { @@ -56,6 +64,7 @@ function LLMScreen() { }, [llm.isGenerating, setGlobalGenerating]); const sendMessage = async () => { + onMessageSend(); setUserInput(''); textInputRef.current?.clear(); try { @@ -109,7 +118,7 @@ function LLMScreen() { onSelect={(m) => setSelectedModel(m)} disabled={llm.isGenerating} /> - + (null); const { setGlobalGenerating } = useContext(GeneratingContext); - const llm = useLLM({ model: selectedModel }); // try out 4B model if 1.7B struggles with following structured output + const llm = useLLM({ model: selectedModel }); + const tokenCount = llm.isReady ? llm.getGeneratedTokenCount() : 0; + const { stats, onMessageSend } = useLLMStats( + llm.response, + llm.isGenerating, + tokenCount + ); useEffect(() => { setGlobalGenerating(llm.isGenerating); @@ -136,6 +144,7 @@ function LLMScreen() { }, [llm.error]); const sendMessage = async () => { + onMessageSend(); setUserInput(''); textInputRef.current?.clear(); try { @@ -190,7 +199,7 @@ function LLMScreen() { onSelect={(m) => setSelectedModel(m)} disabled={llm.isGenerating} /> - + { setGlobalGenerating(llm.isGenerating); @@ -158,6 +166,7 @@ function LLMToolCallingScreen() { }, []); const sendMessage = async () => { + onMessageSend(); setUserInput(''); textInputRef.current?.clear(); try { @@ -235,7 +244,7 @@ function LLMToolCallingScreen() { onSelect={(m) => setSelectedModel(m)} disabled={llm.isGenerating} /> - + { setGlobalGenerating(vlm.isGenerating); @@ -63,6 +71,7 @@ function MultimodalLLMScreen() { const sendMessage = async () => { if (!userInput.trim() || vlm.isGenerating) return; + onMessageSend(); const text = userInput.trim(); setUserInput(''); textInputRef.current?.clear(); @@ -138,6 +147,7 @@ function MultimodalLLMScreen() { )} + {/* Image picker button */} + TTFT: {stats.ttft} ms + · + {stats.tokensPerSec} tok/s + · + {stats.totalTokens} tokens + + ); +} + +const styles = StyleSheet.create({ + container: { + flexDirection: 'row', + justifyContent: 'center', + alignItems: 'center', + gap: 8, + paddingVertical: 6, + }, + stat: { + fontSize: 13, + color: '#334155', + fontWeight: '500', + }, + separator: { + fontSize: 13, + color: '#94A3B8', + }, +}); diff --git a/apps/llm/hooks/useLLMStats.ts b/apps/llm/hooks/useLLMStats.ts new file mode 100644 index 0000000000..4cb6e7abdb --- /dev/null +++ b/apps/llm/hooks/useLLMStats.ts @@ -0,0 +1,53 @@ +import { useEffect, useRef, useState } from 'react'; + +export interface LLMStats { + ttft: number; + tokensPerSec: number; + totalTokens: number; +} + +export function useLLMStats( + response: string, + isGenerating: boolean, + totalTokens: number +) { + const sendTimeRef = useRef(null); + const firstTokenTimeRef = useRef(null); + const lastResponseRef = useRef(''); + const [stats, setStats] = useState(null); + + useEffect(() => { + if (isGenerating && response.length > 0) { + lastResponseRef.current = response; + if (firstTokenTimeRef.current === null && sendTimeRef.current !== null) { + firstTokenTimeRef.current = Date.now(); + } + } + }, [response, isGenerating, totalTokens]); + + useEffect(() => { + if ( + !isGenerating && + sendTimeRef.current !== null && + firstTokenTimeRef.current !== null + ) { + const endTime = Date.now(); + const ttft = firstTokenTimeRef.current - sendTimeRef.current; + const totalTime = (endTime - firstTokenTimeRef.current) / 1000; + const tokensPerSec = + totalTime > 0 ? Math.round(totalTokens / totalTime) : 0; + setStats({ ttft, tokensPerSec, totalTokens }); + sendTimeRef.current = null; + firstTokenTimeRef.current = null; + } + }, [isGenerating, totalTokens]); + + const onMessageSend = () => { + sendTimeRef.current = Date.now(); + firstTokenTimeRef.current = null; + lastResponseRef.current = ''; + setStats(null); + }; + + return { stats, onMessageSend }; +} diff --git a/apps/speech/screens/SpeechToTextScreen.tsx b/apps/speech/screens/SpeechToTextScreen.tsx index 354431b855..7fdf7f392f 100644 --- a/apps/speech/screens/SpeechToTextScreen.tsx +++ b/apps/speech/screens/SpeechToTextScreen.tsx @@ -54,6 +54,9 @@ export const SpeechToTextScreen = ({ onBack }: { onBack: () => void }) => { const [transcription, setTranscription] = useState(null); + const [transcriptionTime, setTranscriptionTime] = useState( + null + ); const [liveResult, setLiveResult] = useState<{ fullText: string; @@ -113,9 +116,11 @@ export const SpeechToTextScreen = ({ onBack }: { onBack: () => void }) => { try { const decodedAudioData = await audioContext.decodeAudioData(uri); const audioBuffer = decodedAudioData.getChannelData(0); + const start = Date.now(); const result = await model.transcribe(audioBuffer, { verbose: enableTimestamps, }); + setTranscriptionTime(Date.now() - start); setTranscription(result); } catch (error) { console.error('Error decoding audio data', error); @@ -252,6 +257,11 @@ export const SpeechToTextScreen = ({ onBack }: { onBack: () => void }) => { Status: {getModelStatus()} + {transcriptionTime !== null && ( + + Transcription: {transcriptionTime} ms + + )} ([]); + const [textEmbeddingTime, setTextEmbeddingTime] = useState( + null + ); + const [imageEmbeddingTime, setImageEmbeddingTime] = useState( + null + ); + useEffect( () => { const computeEmbeddings = async () => { @@ -54,11 +61,15 @@ function ClipEmbeddingsScreen() { ]; try { + const start = Date.now(); const embeddings = []; + for (const sentence of sentences) { const embedding = await textModel.forward(sentence); embeddings.push({ sentence, embedding }); } + + setTextEmbeddingTime(Date.now() - start); setSentencesWithEmbeddings(embeddings); } catch (error) { console.error('Error generating embeddings:', error); @@ -75,7 +86,10 @@ function ClipEmbeddingsScreen() { if (!textModel.isReady || !inputSentence.trim()) return; try { + const start = Date.now(); const inputEmbedding = await textModel.forward(inputSentence); + setTextEmbeddingTime(Date.now() - start); + const matches = sentencesWithEmbeddings.map( ({ sentence, embedding }) => ({ sentence, @@ -93,7 +107,10 @@ function ClipEmbeddingsScreen() { if (!textModel.isReady || !inputSentence.trim()) return; try { + const start = Date.now(); const embedding = await textModel.forward(inputSentence); + setTextEmbeddingTime(Date.now() - start); + setSentencesWithEmbeddings((prev) => [ ...prev, { sentence: inputSentence, embedding }, @@ -114,6 +131,7 @@ function ClipEmbeddingsScreen() { console.error('Error clearing the list:', error); } }; + const checkImage = async () => { if (!imageModel.isReady) return; @@ -123,9 +141,11 @@ function ClipEmbeddingsScreen() { return; try { + const start = Date.now(); const inputImageEmbedding = await imageModel.forward( output.assets[0].uri ); + setImageEmbeddingTime(Date.now() - start); const matches = sentencesWithEmbeddings.map( ({ sentence, embedding }) => ({ @@ -263,6 +283,18 @@ function ClipEmbeddingsScreen() { + + {textEmbeddingTime !== null && ( + + Text Embedding time: {textEmbeddingTime} ms + + )} + {imageEmbeddingTime !== null && ( + + Image Embedding time: {imageEmbeddingTime} ms + + )} + {topMatches.length > 0 && ( Top Matches @@ -281,15 +313,8 @@ function ClipEmbeddingsScreen() { } const styles = StyleSheet.create({ - container: { - flex: 1, - backgroundColor: '#F8FAFC', - }, - scrollContainer: { - padding: 20, - alignItems: 'center', - flexGrow: 1, - }, + container: { flex: 1, backgroundColor: '#F8FAFC' }, + scrollContainer: { padding: 20, alignItems: 'center', flexGrow: 1 }, heading: { fontSize: 24, fontWeight: '500', @@ -311,11 +336,7 @@ const styles = StyleSheet.create({ marginBottom: 12, color: '#1E293B', }, - sentenceText: { - fontSize: 14, - marginBottom: 6, - color: '#334155', - }, + sentenceText: { fontSize: 14, marginBottom: 6, color: '#334155' }, input: { backgroundColor: '#F1F5F9', borderRadius: 10, @@ -326,10 +347,7 @@ const styles = StyleSheet.create({ minHeight: 40, textAlignVertical: 'top', }, - buttonContainer: { - width: '100%', - gap: 10, - }, + buttonContainer: { width: '100%', gap: 10 }, buttonGroup: { flexDirection: 'row', justifyContent: 'space-between', @@ -355,27 +373,16 @@ const styles = StyleSheet.create({ alignItems: 'center', justifyContent: 'center', }, - buttonDisabled: { - backgroundColor: '#f0f0f0', - borderColor: '#d3d3d3', - }, - buttonText: { - color: 'white', + buttonDisabled: { backgroundColor: '#f0f0f0', borderColor: '#d3d3d3' }, + buttonText: { color: 'white', textAlign: 'center', fontWeight: '500' }, + buttonTextOutline: { color: 'navy', textAlign: 'center', fontWeight: '500' }, + buttonTextDisabled: { color: 'gray' }, + topMatchesContainer: { marginTop: 20 }, + statsText: { + fontSize: 13, + color: '#64748B', + marginTop: 8, textAlign: 'center', - fontWeight: '500', - }, - buttonTextOutline: { - color: 'navy', - textAlign: 'center', - fontWeight: '500', - }, - buttonTextDisabled: { - color: 'gray', - }, - topMatchesContainer: { - marginTop: 20, - }, - flexContainer: { - flex: 1, }, + flexContainer: { flex: 1 }, }); diff --git a/apps/text-embeddings/app/text-embeddings/index.tsx b/apps/text-embeddings/app/text-embeddings/index.tsx index 3e43004dde..40809a8e9b 100644 --- a/apps/text-embeddings/app/text-embeddings/index.tsx +++ b/apps/text-embeddings/app/text-embeddings/index.tsx @@ -31,6 +31,7 @@ function TextEmbeddingsScreen() { const [topMatches, setTopMatches] = useState< { sentence: string; similarity: number }[] >([]); + const [embeddingTime, setEmbeddingTime] = useState(null); useEffect( () => { @@ -66,7 +67,9 @@ function TextEmbeddingsScreen() { if (!model.isReady || !inputSentence.trim()) return; try { + const start = Date.now(); const inputEmbedding = await model.forward(inputSentence); + setEmbeddingTime(Date.now() - start); const matches = sentencesWithEmbeddings.map( ({ sentence, embedding }) => ({ sentence, @@ -84,7 +87,9 @@ function TextEmbeddingsScreen() { if (!model.isReady || !inputSentence.trim()) return; try { + const start = Date.now(); const embedding = await model.forward(inputSentence); + setEmbeddingTime(Date.now() - start); setSentencesWithEmbeddings((prev) => [ ...prev, { sentence: inputSentence, embedding }, @@ -217,6 +222,11 @@ function TextEmbeddingsScreen() { + {embeddingTime !== null && ( + + Embedding time: {embeddingTime} ms + + )} {topMatches.length > 0 && ( Top Matches @@ -329,6 +339,12 @@ const styles = StyleSheet.create({ topMatchesContainer: { marginTop: 20, }, + statsText: { + fontSize: 13, + color: '#64748B', + marginTop: 8, + textAlign: 'center', + }, flexContainer: { flex: 1, }, diff --git a/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.cpp b/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.cpp index 95ebed1d57..64e94c2ff0 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.cpp @@ -166,23 +166,23 @@ void LLM::reset() { runner_->reset(); } -size_t LLM::getGeneratedTokenCount() const noexcept { +int32_t LLM::getGeneratedTokenCount() const noexcept { if (!runner_ || !runner_->is_loaded()) return 0; return runner_->stats_.num_generated_tokens; } -size_t LLM::getPromptTokenCount() const noexcept { +int32_t LLM::getPromptTokenCount() const noexcept { if (!runner_ || !runner_->is_loaded()) return 0; - return runner_->stats_.num_prompt_tokens; + return static_cast(runner_->stats_.num_prompt_tokens); } int32_t LLM::getVisualTokenCount() const { if (!runner_ || !runner_->is_loaded()) { return 0; } - return runner_->get_visual_token_count(); + return static_cast(runner_->get_visual_token_count()); } int32_t LLM::countTextTokens(std::string text) const { diff --git a/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.h b/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.h index fcb93d0c18..5c9bc258d7 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.h @@ -30,8 +30,8 @@ class LLM : public BaseModel { void interrupt(); void reset(); void unload() noexcept; - size_t getGeneratedTokenCount() const noexcept; - size_t getPromptTokenCount() const noexcept; + int32_t getGeneratedTokenCount() const noexcept; + int32_t getPromptTokenCount() const noexcept; int32_t countTextTokens(std::string text) const; int32_t getVisualTokenCount() const; size_t getMemoryLowerBound() const noexcept; diff --git a/packages/react-native-executorch/src/hooks/natural_language_processing/useTextToSpeech.ts b/packages/react-native-executorch/src/hooks/natural_language_processing/useTextToSpeech.ts index b5e03ceb59..471dc2a2a1 100644 --- a/packages/react-native-executorch/src/hooks/natural_language_processing/useTextToSpeech.ts +++ b/packages/react-native-executorch/src/hooks/natural_language_processing/useTextToSpeech.ts @@ -119,7 +119,12 @@ export const useTextToSpeech = ({ setIsGenerating(true); try { if (input.text) { - instance.streamInsert(input.text); + // If the initial text does not end with an end of sentence character, + // we add an artificial dot to improve output's quality. + instance.streamInsert( + input.text + + ('.?!;'.includes(input.text.trim().slice(-1)) ? '' : '.') + ); } await input.onBegin?.();