From 1b656cf8b0a86333c693d969614c0da90fa0273d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mateusz=20S=C5=82uszniak?= <mateusz.sluszniak@swmansion.com>
Date: Thu, 19 Mar 2026 14:05:45 +0100
Subject: [PATCH 1/9] chore: Add basic statistics to demo apps

---
 .../app/classification/index.tsx              |  5 ++
 .../app/object_detection/index.tsx            |  8 +++
 apps/computer-vision/app/ocr/index.tsx        |  8 +++
 .../app/ocr_vertical/index.tsx                |  8 +++
 .../app/semantic_segmentation/index.tsx       |  5 ++
 .../app/style_transfer/index.tsx              |  5 ++
 .../app/text_to_image/index.tsx               | 26 ++++++++++
 apps/computer-vision/components/StatsBar.tsx  | 44 ++++++++++++++++
 apps/llm/app/llm/index.tsx                    |  6 ++-
 apps/llm/app/llm_structured_output/index.tsx  |  8 ++-
 apps/llm/app/llm_tool_calling/index.tsx       |  6 ++-
 apps/llm/app/multimodal_llm/index.tsx         |  5 ++
 apps/llm/components/StatsBar.tsx              | 40 +++++++++++++++
 apps/llm/hooks/useLLMStats.ts                 | 50 +++++++++++++++++++
 apps/speech/screens/SpeechToTextScreen.tsx    | 16 ++++++
 .../app/clip-embeddings/index.tsx             | 19 +++++++
 .../app/text-embeddings/index.tsx             | 16 ++++++
 17 files changed, 271 insertions(+), 4 deletions(-)
 create mode 100644 apps/computer-vision/components/StatsBar.tsx
 create mode 100644 apps/llm/components/StatsBar.tsx
 create mode 100644 apps/llm/hooks/useLLMStats.ts
diff --git a/apps/computer-vision/app/classification/index.tsx b/apps/computer-vision/app/classification/index.tsx
index 97dde1727d..c22e2fcd4b 100644
--- a/apps/computer-vision/app/classification/index.tsx
+++ b/apps/computer-vision/app/classification/index.tsx
@@ -9,12 +9,14 @@ import { BottomBar } from '../../components/BottomBar';
 import React, { useContext, useEffect, useState } from 'react';
 import { GeneratingContext } from '../../context';
 import ScreenWrapper from '../../ScreenWrapper';
+import { StatsBar } from '../../components/StatsBar';
 
 export default function ClassificationScreen() {
   const [results, setResults] = useState<{ label: string; score: number }[]>(
     []
   );
   const [imageUri, setImageUri] = useState('');
+  const [inferenceTime, setInferenceTime] = useState<number | null>(null);
 
   const model = useClassification({ model: EFFICIENTNET_V2_S_QUANTIZED });
   const { setGlobalGenerating } = useContext(GeneratingContext);
@@ -34,7 +36,9 @@ export default function ClassificationScreen() {
   const runForward = async () => {
     if (imageUri) {
       try {
+        const start = Date.now();
         const output = await model.forward(imageUri);
+        setInferenceTime(Date.now() - start);
         const top10 = Object.entries(output)
           .sort(([, a], [, b]) => (b as number) - (a as number))
           .slice(0, 10)
@@ -80,6 +84,7 @@ export default function ClassificationScreen() {
           </View>
         )}
       </View>
+      <StatsBar inferenceTime={inferenceTime} />
       <BottomBar
         handleCameraPress={handleCameraPress}
         runForward={runForward}
diff --git a/apps/computer-vision/app/object_detection/index.tsx b/apps/computer-vision/app/object_detection/index.tsx
index c6ec9f1dc3..361c38c6e8 100644
--- a/apps/computer-vision/app/object_detection/index.tsx
+++ b/apps/computer-vision/app/object_detection/index.tsx
@@ -14,6 +14,7 @@ import ImageWithBboxes from '../../components/ImageWithBboxes';
 import React, { useContext, useEffect, useState } from 'react';
 import { GeneratingContext } from '../../context';
 import ScreenWrapper from '../../ScreenWrapper';
+import { StatsBar } from '../../components/StatsBar';
 
 const MODELS: ModelOption<ObjectDetectionModelSources>[] = [
   { label: 'RF-DeTR Nano', value: RF_DETR_NANO },
@@ -29,6 +30,7 @@ export default function ObjectDetectionScreen() {
   }>();
   const [selectedModel, setSelectedModel] =
     useState<ObjectDetectionModelSources>(RF_DETR_NANO);
+  const [inferenceTime, setInferenceTime] = useState<number | null>(null);
 
   const model = useObjectDetection({ model: selectedModel });
   const { setGlobalGenerating } = useContext(GeneratingContext);
@@ -52,7 +54,9 @@ export default function ObjectDetectionScreen() {
   const runForward = async () => {
     if (imageUri) {
       try {
+        const start = Date.now();
         const output = await model.forward(imageUri);
+        setInferenceTime(Date.now() - start);
         setResults(output);
       } catch (e) {
         console.error(e);
@@ -100,6 +104,10 @@ export default function ObjectDetectionScreen() {
           setResults([]);
         }}
       />
+      <StatsBar
+        inferenceTime={inferenceTime}
+        detectionCount={results.length > 0 ? results.length : null}
+      />
       <BottomBar
         handleCameraPress={handleCameraPress}
         runForward={runForward}
diff --git a/apps/computer-vision/app/ocr/index.tsx b/apps/computer-vision/app/ocr/index.tsx
index 1c0bf5ab4f..fcfa5f4e2b 100644
--- a/apps/computer-vision/app/ocr/index.tsx
+++ b/apps/computer-vision/app/ocr/index.tsx
@@ -18,6 +18,7 @@ import ImageWithBboxes2 from '../../components/ImageWithOCRBboxes';
 import React, { useContext, useEffect, useState } from 'react';
 import { GeneratingContext } from '../../context';
 import ScreenWrapper from '../../ScreenWrapper';
+import { StatsBar } from '../../components/StatsBar';
 
 type OCRModelSources = OCRProps['model'];
 
@@ -40,6 +41,7 @@ export default function OCRScreen() {
   }>();
   const [selectedModel, setSelectedModel] =
     useState<OCRModelSources>(OCR_ENGLISH);
+  const [inferenceTime, setInferenceTime] = useState<number | null>(null);
 
   const model = useOCR({
     model: selectedModel,
@@ -63,7 +65,9 @@ export default function OCRScreen() {
 
   const runForward = async () => {
     try {
+      const start = Date.now();
       const output = await model.forward(imageUri);
+      setInferenceTime(Date.now() - start);
       setResults(output);
     } catch (e) {
       console.error(e);
@@ -123,6 +127,10 @@ export default function OCRScreen() {
           setResults([]);
         }}
       />
+      <StatsBar
+        inferenceTime={inferenceTime}
+        detectionCount={results.length > 0 ? results.length : null}
+      />
       <BottomBar
         handleCameraPress={handleCameraPress}
         runForward={runForward}
diff --git a/apps/computer-vision/app/ocr_vertical/index.tsx b/apps/computer-vision/app/ocr_vertical/index.tsx
index f298a3d5c0..bc6b7e57cf 100644
--- a/apps/computer-vision/app/ocr_vertical/index.tsx
+++ b/apps/computer-vision/app/ocr_vertical/index.tsx
@@ -7,6 +7,7 @@ import ImageWithBboxes2 from '../../components/ImageWithOCRBboxes';
 import React, { useContext, useEffect, useState } from 'react';
 import { GeneratingContext } from '../../context';
 import ScreenWrapper from '../../ScreenWrapper';
+import { StatsBar } from '../../components/StatsBar';
 
 export default function VerticalOCRScree() {
   const [imageUri, setImageUri] = useState('');
@@ -15,6 +16,7 @@ export default function VerticalOCRScree() {
     width: number;
     height: number;
   }>();
+  const [inferenceTime, setInferenceTime] = useState<number | null>(null);
   const model = useVerticalOCR({
     model: OCR_ENGLISH,
     independentCharacters: true,
@@ -38,7 +40,9 @@ export default function VerticalOCRScree() {
 
   const runForward = async () => {
     try {
+      const start = Date.now();
       const output = await model.forward(imageUri);
+      setInferenceTime(Date.now() - start);
       setResults(output);
     } catch (e) {
       console.error(e);
@@ -89,6 +93,10 @@ export default function VerticalOCRScree() {
           </View>
         )}
       </View>
+      <StatsBar
+        inferenceTime={inferenceTime}
+        detectionCount={results.length > 0 ? results.length : null}
+      />
       <BottomBar
         handleCameraPress={handleCameraPress}
         runForward={runForward}
diff --git a/apps/computer-vision/app/semantic_segmentation/index.tsx b/apps/computer-vision/app/semantic_segmentation/index.tsx
index 433c9022f9..f40ec650c2 100644
--- a/apps/computer-vision/app/semantic_segmentation/index.tsx
+++ b/apps/computer-vision/app/semantic_segmentation/index.tsx
@@ -24,6 +24,7 @@ import { View, StyleSheet, Image } from 'react-native';
 import React, { useContext, useEffect, useState } from 'react';
 import { GeneratingContext } from '../../context';
 import ScreenWrapper from '../../ScreenWrapper';
+import { StatsBar } from '../../components/StatsBar';
 
 const numberToColor: number[][] = [
   [255, 87, 51], // 0 Red
@@ -75,6 +76,7 @@ export default function SemanticSegmentationScreen() {
   const [imageSize, setImageSize] = useState({ width: 0, height: 0 });
   const [segImage, setSegImage] = useState<SkImage | null>(null);
   const [canvasSize, setCanvasSize] = useState({ width: 0, height: 0 });
+  const [inferenceTime, setInferenceTime] = useState<number | null>(null);
 
   useEffect(() => {
     setGlobalGenerating(isGenerating);
@@ -91,6 +93,7 @@ export default function SemanticSegmentationScreen() {
   const runForward = async () => {
     if (!imageUri || imageSize.width === 0 || imageSize.height === 0) return;
     try {
+      const start = Date.now();
       const { width, height } = imageSize;
       const output = await forward(imageUri, [], true);
       const argmax = output.ARGMAX || [];
@@ -119,6 +122,7 @@ export default function SemanticSegmentationScreen() {
         width * 4
       );
       setSegImage(img);
+      setInferenceTime(Date.now() - start);
     } catch (e) {
       console.error(e);
     }
@@ -179,6 +183,7 @@ export default function SemanticSegmentationScreen() {
           setSegImage(null);
         }}
       />
+      <StatsBar inferenceTime={inferenceTime} />
       <BottomBar
         handleCameraPress={handleCameraPress}
         runForward={runForward}
diff --git a/apps/computer-vision/app/style_transfer/index.tsx b/apps/computer-vision/app/style_transfer/index.tsx
index 9cc6022ec7..e7ae7e9b17 100644
--- a/apps/computer-vision/app/style_transfer/index.tsx
+++ b/apps/computer-vision/app/style_transfer/index.tsx
@@ -16,6 +16,7 @@ import { View, StyleSheet, Image } from 'react-native';
 import React, { useContext, useEffect, useState } from 'react';
 import { GeneratingContext } from '../../context';
 import ScreenWrapper from '../../ScreenWrapper';
+import { StatsBar } from '../../components/StatsBar';
 
 type StyleTransferModelSources = {
   modelName: StyleTransferModelName;
@@ -42,6 +43,7 @@ export default function StyleTransferScreen() {
 
   const [imageUri, setImageUri] = useState('');
   const [styledUri, setStyledUri] = useState('');
+  const [inferenceTime, setInferenceTime] = useState<number | null>(null);
 
   const handleCameraPress = async (isCamera: boolean) => {
     const image = await getImage(isCamera);
@@ -55,7 +57,9 @@ export default function StyleTransferScreen() {
   const runForward = async () => {
     if (imageUri) {
       try {
+        const start = Date.now();
         const uri = await model.forward(imageUri, 'url');
+        setInferenceTime(Date.now() - start);
         setStyledUri(uri);
       } catch (e) {
         console.error(e);
@@ -96,6 +100,7 @@ export default function StyleTransferScreen() {
           setStyledUri('');
         }}
       />
+      <StatsBar inferenceTime={inferenceTime} />
       <BottomBar
         handleCameraPress={handleCameraPress}
         runForward={runForward}
diff --git a/apps/computer-vision/app/text_to_image/index.tsx b/apps/computer-vision/app/text_to_image/index.tsx
index f8c442cc98..34d575955b 100644
--- a/apps/computer-vision/app/text_to_image/index.tsx
+++ b/apps/computer-vision/app/text_to_image/index.tsx
@@ -30,6 +30,8 @@ const MODELS: ModelOption<TextToImageModelSources>[] = [
   { label: 'BK-SDM 256', value: BK_SDM_TINY_VPRED_256 },
   { label: 'BK-SDM 512', value: BK_SDM_TINY_VPRED_512 },
 ];
+import { BottomBarWithTextInput } from '../../components/BottomBarWithTextInput';
+import { StatsBar } from '../../components/StatsBar';
 
 export default function TextToImageScreen() {
   const [inferenceStepIdx, setInferenceStepIdx] = useState<number>(0);
@@ -39,6 +41,9 @@ export default function TextToImageScreen() {
   const [selectedModel, setSelectedModel] = useState<TextToImageModelSources>(
     BK_SDM_TINY_VPRED_256
   );
+  const [generationTime, setGenerationTime] = useState<number | null>(null);
+  const [showTextInput, setShowTextInput] = useState(false);
+  const [keyboardVisible, setKeyboardVisible] = useState(false);
 
   const imageSize = 224;
   const model = useTextToImage({
@@ -55,8 +60,15 @@ export default function TextToImageScreen() {
   const runForward = async () => {
     if (!input.trim()) return;
     try {
+      const start = Date.now();
       const output = await model.generate(input, imageSize, steps);
       if (output.length) setImage(output);
+      else {
+        setImageTitle(prevImageTitle);
+        return;
+      }
+      setGenerationTime(Date.now() - start);
+      setImage(output);
     } catch (e) {
       console.error(e);
     } finally {
@@ -134,6 +146,20 @@ export default function TextToImageScreen() {
             onSubmitEditing={runForward}
             returnKeyType="send"
           />
+        </View>
+        <StatsBar inferenceTime={generationTime} />
+        <View style={styles.bottomContainer}>
+          <BottomBarWithTextInput
+            runModel={runForward}
+            numSteps={steps}
+            setSteps={setSteps}
+            stopModel={model.interrupt}
+            isGenerating={model.isGenerating}
+            isReady={model.isReady}
+            showTextInput={showTextInput}
+            setShowTextInput={setShowTextInput}
+            keyboardVisible={keyboardVisible}
+          />
           {model.isGenerating ? (
             <TouchableOpacity
               style={styles.sendButton}
diff --git a/apps/computer-vision/components/StatsBar.tsx b/apps/computer-vision/components/StatsBar.tsx
new file mode 100644
index 0000000000..621ecd21d1
--- /dev/null
+++ b/apps/computer-vision/components/StatsBar.tsx
@@ -0,0 +1,44 @@
+import React from 'react';
+import { View, Text, StyleSheet } from 'react-native';
+
+interface Props {
+  inferenceTime: number | null;
+  detectionCount?: number | null;
+}
+
+export function StatsBar({ inferenceTime, detectionCount }: Props) {
+  if (inferenceTime === null) return null;
+
+  return (
+    <View style={styles.container}>
+      <Text style={styles.stat}>Inference: {inferenceTime} ms</Text>
+      {detectionCount != null && (
+        <>
+          <Text style={styles.separator}>·</Text>
+          <Text style={styles.stat}>
+            {detectionCount} detection{detectionCount !== 1 ? 's' : ''}
+          </Text>
+        </>
+      )}
+    </View>
+  );
+}
+
+const styles = StyleSheet.create({
+  container: {
+    flexDirection: 'row',
+    justifyContent: 'center',
+    alignItems: 'center',
+    gap: 8,
+    paddingVertical: 6,
+  },
+  stat: {
+    fontSize: 13,
+    color: '#334155',
+    fontWeight: '500',
+  },
+  separator: {
+    fontSize: 13,
+    color: '#94A3B8',
+  },
+});
diff --git a/apps/llm/app/llm/index.tsx b/apps/llm/app/llm/index.tsx
index ab8b8e84db..a5e7806f12 100644
--- a/apps/llm/app/llm/index.tsx
+++ b/apps/llm/app/llm/index.tsx
@@ -28,6 +28,8 @@ const SUGGESTED_PROMPTS = [
   'What are the benefits of on-device AI?',
   'Give me 3 fun facts about space',
 ];
+import { useLLMStats } from '../../hooks/useLLMStats';
+import { StatsBar } from '../../components/StatsBar';
 
 export default function LLMScreenWrapper() {
   const isFocused = useIsFocused();
@@ -44,6 +46,7 @@ function LLMScreen() {
   const { setGlobalGenerating } = useContext(GeneratingContext);
 
   const llm = useLLM({ model: selectedModel });
+  const { stats, onMessageSend } = useLLMStats(llm.response, llm.isGenerating);
 
   useEffect(() => {
     if (llm.error) {
@@ -56,6 +59,7 @@ function LLMScreen() {
   }, [llm.isGenerating, setGlobalGenerating]);
 
   const sendMessage = async () => {
+    onMessageSend();
     setUserInput('');
     textInputRef.current?.clear();
     try {
@@ -109,7 +113,7 @@ function LLMScreen() {
             onSelect={(m) => setSelectedModel(m)}
             disabled={llm.isGenerating}
           />
-
+          <StatsBar stats={stats} />
           <View style={styles.bottomContainer}>
             <TextInput
               autoCorrect={false}
diff --git a/apps/llm/app/llm_structured_output/index.tsx b/apps/llm/app/llm_structured_output/index.tsx
index 316787fb71..845504437b 100644
--- a/apps/llm/app/llm_structured_output/index.tsx
+++ b/apps/llm/app/llm_structured_output/index.tsx
@@ -20,6 +20,8 @@ const SUGGESTED_PROMPTS = [
   "I'm Bob. Does it have warranty? I'll pay €50.",
   "Name's Sara. What condition? My bid is $75.",
 ];
+import { useLLMStats } from '../../hooks/useLLMStats';
+import { StatsBar } from '../../components/StatsBar';
 import {
   useLLM,
   fixAndValidateStructuredOutput,
@@ -85,7 +87,8 @@ function LLMScreen() {
   const textInputRef = useRef<TextInput>(null);
   const { setGlobalGenerating } = useContext(GeneratingContext);
 
-  const llm = useLLM({ model: selectedModel }); // try out 4B model if 1.7B struggles with following structured output
+  const llm = useLLM({ model: selectedModel });
+  const { stats, onMessageSend } = useLLMStats(llm.response, llm.isGenerating);
 
   useEffect(() => {
     setGlobalGenerating(llm.isGenerating);
@@ -136,6 +139,7 @@ function LLMScreen() {
   }, [llm.error]);
 
   const sendMessage = async () => {
+    onMessageSend();
     setUserInput('');
     textInputRef.current?.clear();
     try {
@@ -190,7 +194,7 @@ function LLMScreen() {
             onSelect={(m) => setSelectedModel(m)}
             disabled={llm.isGenerating}
           />
-
+          <StatsBar stats={stats} />
           <View style={styles.bottomContainer}>
             <TextInput
               autoCorrect={false}
diff --git a/apps/llm/app/llm_tool_calling/index.tsx b/apps/llm/app/llm_tool_calling/index.tsx
index a65544081c..c3f19e10f8 100644
--- a/apps/llm/app/llm_tool_calling/index.tsx
+++ b/apps/llm/app/llm_tool_calling/index.tsx
@@ -39,6 +39,8 @@ const SUGGESTED_PROMPTS = [
   'Set screen brightness to 50%',
   'What do I have scheduled this week?',
 ];
+import { useLLMStats } from '../../hooks/useLLMStats';
+import { StatsBar } from '../../components/StatsBar';
 
 export default function LLMToolCallingScreenWrapper() {
   const isFocused = useIsFocused();
@@ -58,6 +60,7 @@ function LLMToolCallingScreen() {
   const { setGlobalGenerating } = useContext(GeneratingContext);
 
   const llm = useLLM({ model: selectedModel });
+  const { stats, onMessageSend } = useLLMStats(llm.response, llm.isGenerating);
 
   useEffect(() => {
     setGlobalGenerating(llm.isGenerating);
@@ -158,6 +161,7 @@ function LLMToolCallingScreen() {
   }, []);
 
   const sendMessage = async () => {
+    onMessageSend();
     setUserInput('');
     textInputRef.current?.clear();
     try {
@@ -235,7 +239,7 @@ function LLMToolCallingScreen() {
             onSelect={(m) => setSelectedModel(m)}
             disabled={llm.isGenerating}
           />
-
+          <StatsBar stats={stats} />
           <View style={styles.bottomContainer}>
             <TextInput
               autoCorrect={false}
diff --git a/apps/llm/app/multimodal_llm/index.tsx b/apps/llm/app/multimodal_llm/index.tsx
index 5b7b8f735b..ad9f7c380a 100644
--- a/apps/llm/app/multimodal_llm/index.tsx
+++ b/apps/llm/app/multimodal_llm/index.tsx
@@ -28,6 +28,8 @@ const SUGGESTED_PROMPTS = [
   'What objects can you see?',
   'What text appears in this image?',
 ];
+import { useLLMStats } from '../../hooks/useLLMStats';
+import { StatsBar } from '../../components/StatsBar';
 
 export default function MultimodalLLMScreenWrapper() {
   const isFocused = useIsFocused();
@@ -44,6 +46,7 @@ function MultimodalLLMScreen() {
   const vlm = useLLM({
     model: LFM2_VL_1_6B_QUANTIZED,
   });
+  const { stats, onMessageSend } = useLLMStats(vlm.response, vlm.isGenerating);
 
   useEffect(() => {
     setGlobalGenerating(vlm.isGenerating);
@@ -63,6 +66,7 @@ function MultimodalLLMScreen() {
 
   const sendMessage = async () => {
     if (!userInput.trim() || vlm.isGenerating) return;
+    onMessageSend();
     const text = userInput.trim();
     setUserInput('');
     textInputRef.current?.clear();
@@ -138,6 +142,7 @@ function MultimodalLLMScreen() {
             </TouchableOpacity>
           )}
 
+          <StatsBar stats={stats} />
           <View style={styles.bottomContainer}>
             {/* Image picker button */}
             <TouchableOpacity
diff --git a/apps/llm/components/StatsBar.tsx b/apps/llm/components/StatsBar.tsx
new file mode 100644
index 0000000000..7ab2e69198
--- /dev/null
+++ b/apps/llm/components/StatsBar.tsx
@@ -0,0 +1,40 @@
+import React from 'react';
+import { View, Text, StyleSheet } from 'react-native';
+import { LLMStats } from '../hooks/useLLMStats';
+
+interface Props {
+  stats: LLMStats | null;
+}
+
+export function StatsBar({ stats }: Props) {
+  if (!stats) return null;
+
+  return (
+    <View style={styles.container}>
+      <Text style={styles.stat}>TTFT: {stats.ttft} ms</Text>
+      <Text style={styles.separator}>·</Text>
+      <Text style={styles.stat}>{stats.tokensPerSec} tok/s</Text>
+      <Text style={styles.separator}>·</Text>
+      <Text style={styles.stat}>~{stats.totalTokens} tokens</Text>
+    </View>
+  );
+}
+
+const styles = StyleSheet.create({
+  container: {
+    flexDirection: 'row',
+    justifyContent: 'center',
+    alignItems: 'center',
+    gap: 8,
+    paddingVertical: 6,
+  },
+  stat: {
+    fontSize: 13,
+    color: '#334155',
+    fontWeight: '500',
+  },
+  separator: {
+    fontSize: 13,
+    color: '#94A3B8',
+  },
+});
diff --git a/apps/llm/hooks/useLLMStats.ts b/apps/llm/hooks/useLLMStats.ts
new file mode 100644
index 0000000000..b798947199
--- /dev/null
+++ b/apps/llm/hooks/useLLMStats.ts
@@ -0,0 +1,50 @@
+import { useEffect, useRef, useState } from 'react';
+
+export interface LLMStats {
+  ttft: number;
+  tokensPerSec: number;
+  totalTokens: number;
+}
+
+export function useLLMStats(response: string, isGenerating: boolean) {
+  const sendTimeRef = useRef<number | null>(null);
+  const firstTokenTimeRef = useRef<number | null>(null);
+  const lastResponseRef = useRef<string>('');
+  const [stats, setStats] = useState<LLMStats | null>(null);
+
+  useEffect(() => {
+    if (isGenerating && response.length > 0) {
+      lastResponseRef.current = response;
+      if (firstTokenTimeRef.current === null && sendTimeRef.current !== null) {
+        firstTokenTimeRef.current = Date.now();
+      }
+    }
+  }, [response, isGenerating]);
+
+  useEffect(() => {
+    if (
+      !isGenerating &&
+      sendTimeRef.current !== null &&
+      firstTokenTimeRef.current !== null
+    ) {
+      const endTime = Date.now();
+      const ttft = firstTokenTimeRef.current - sendTimeRef.current;
+      const totalTime = (endTime - firstTokenTimeRef.current) / 1000;
+      const totalTokens = Math.round(lastResponseRef.current.length / 4);
+      const tokensPerSec =
+        totalTime > 0 ? Math.round(totalTokens / totalTime) : 0;
+      setStats({ ttft, tokensPerSec, totalTokens });
+      sendTimeRef.current = null;
+      firstTokenTimeRef.current = null;
+    }
+  }, [isGenerating]);
+
+  const onMessageSend = () => {
+    sendTimeRef.current = Date.now();
+    firstTokenTimeRef.current = null;
+    lastResponseRef.current = '';
+    setStats(null);
+  };
+
+  return { stats, onMessageSend };
+}
diff --git a/apps/speech/screens/SpeechToTextScreen.tsx b/apps/speech/screens/SpeechToTextScreen.tsx
index 354431b855..7fdf7f392f 100644
--- a/apps/speech/screens/SpeechToTextScreen.tsx
+++ b/apps/speech/screens/SpeechToTextScreen.tsx
@@ -54,6 +54,9 @@ export const SpeechToTextScreen = ({ onBack }: { onBack: () => void }) => {
 
   const [transcription, setTranscription] =
     useState<TranscriptionResult | null>(null);
+  const [transcriptionTime, setTranscriptionTime] = useState<number | null>(
+    null
+  );
 
   const [liveResult, setLiveResult] = useState<{
     fullText: string;
@@ -113,9 +116,11 @@ export const SpeechToTextScreen = ({ onBack }: { onBack: () => void }) => {
     try {
       const decodedAudioData = await audioContext.decodeAudioData(uri);
       const audioBuffer = decodedAudioData.getChannelData(0);
+      const start = Date.now();
       const result = await model.transcribe(audioBuffer, {
         verbose: enableTimestamps,
       });
+      setTranscriptionTime(Date.now() - start);
       setTranscription(result);
     } catch (error) {
       console.error('Error decoding audio data', error);
@@ -252,6 +257,11 @@ export const SpeechToTextScreen = ({ onBack }: { onBack: () => void }) => {
 
           <View style={styles.statusContainer}>
             <Text>Status: {getModelStatus()}</Text>
+            {transcriptionTime !== null && (
+              <Text style={styles.statsText}>
+                Transcription: {transcriptionTime} ms
+              </Text>
+            )}
           </View>
 
           <ModelPicker
@@ -385,6 +395,12 @@ const styles = StyleSheet.create({
     marginTop: 12,
     alignItems: 'center',
   },
+  statsText: {
+    fontSize: 13,
+    color: '#334155',
+    fontWeight: '500',
+    marginTop: 4,
+  },
   toggleContainer: {
     flexDirection: 'row',
     alignItems: 'center',
diff --git a/apps/text-embeddings/app/clip-embeddings/index.tsx b/apps/text-embeddings/app/clip-embeddings/index.tsx
index e1de0c06b8..a373d799d0 100644
--- a/apps/text-embeddings/app/clip-embeddings/index.tsx
+++ b/apps/text-embeddings/app/clip-embeddings/index.tsx
@@ -40,6 +40,7 @@ function ClipEmbeddingsScreen() {
   const [topMatches, setTopMatches] = useState<
     { sentence: string; similarity: number }[]
   >([]);
+  const [embeddingTime, setEmbeddingTime] = useState<number | null>(null);
 
   useEffect(
     () => {
@@ -75,7 +76,9 @@ function ClipEmbeddingsScreen() {
     if (!textModel.isReady || !inputSentence.trim()) return;
 
     try {
+      const start = Date.now();
       const inputEmbedding = await textModel.forward(inputSentence);
+      setEmbeddingTime(Date.now() - start);
       const matches = sentencesWithEmbeddings.map(
         ({ sentence, embedding }) => ({
           sentence,
@@ -93,7 +96,9 @@ function ClipEmbeddingsScreen() {
     if (!textModel.isReady || !inputSentence.trim()) return;
 
     try {
+      const start = Date.now();
       const embedding = await textModel.forward(inputSentence);
+      setEmbeddingTime(Date.now() - start);
       setSentencesWithEmbeddings((prev) => [
         ...prev,
         { sentence: inputSentence, embedding },
@@ -123,9 +128,12 @@ function ClipEmbeddingsScreen() {
       return;
 
     try {
+      const start = Date.now();
+      // Array.from to get numbers[]
       const inputImageEmbedding = await imageModel.forward(
         output.assets[0].uri
       );
+      setEmbeddingTime(Date.now() - start);
 
       const matches = sentencesWithEmbeddings.map(
         ({ sentence, embedding }) => ({
@@ -263,6 +271,11 @@ function ClipEmbeddingsScreen() {
                 </TouchableOpacity>
               </View>
             </View>
+            {embeddingTime !== null && (
+              <Text style={styles.statsText}>
+                Embedding time: {embeddingTime} ms
+              </Text>
+            )}
             {topMatches.length > 0 && (
               <View style={styles.topMatchesContainer}>
                 <Text style={styles.sectionTitle}>Top Matches</Text>
@@ -375,6 +388,12 @@ const styles = StyleSheet.create({
   topMatchesContainer: {
     marginTop: 20,
   },
+  statsText: {
+    fontSize: 13,
+    color: '#64748B',
+    marginTop: 8,
+    textAlign: 'center',
+  },
   flexContainer: {
     flex: 1,
   },
diff --git a/apps/text-embeddings/app/text-embeddings/index.tsx b/apps/text-embeddings/app/text-embeddings/index.tsx
index 3e43004dde..40809a8e9b 100644
--- a/apps/text-embeddings/app/text-embeddings/index.tsx
+++ b/apps/text-embeddings/app/text-embeddings/index.tsx
@@ -31,6 +31,7 @@ function TextEmbeddingsScreen() {
   const [topMatches, setTopMatches] = useState<
     { sentence: string; similarity: number }[]
   >([]);
+  const [embeddingTime, setEmbeddingTime] = useState<number | null>(null);
 
   useEffect(
     () => {
@@ -66,7 +67,9 @@ function TextEmbeddingsScreen() {
     if (!model.isReady || !inputSentence.trim()) return;
 
     try {
+      const start = Date.now();
       const inputEmbedding = await model.forward(inputSentence);
+      setEmbeddingTime(Date.now() - start);
       const matches = sentencesWithEmbeddings.map(
         ({ sentence, embedding }) => ({
           sentence,
@@ -84,7 +87,9 @@ function TextEmbeddingsScreen() {
     if (!model.isReady || !inputSentence.trim()) return;
 
     try {
+      const start = Date.now();
       const embedding = await model.forward(inputSentence);
+      setEmbeddingTime(Date.now() - start);
       setSentencesWithEmbeddings((prev) => [
         ...prev,
         { sentence: inputSentence, embedding },
@@ -217,6 +222,11 @@ function TextEmbeddingsScreen() {
                 </TouchableOpacity>
               </View>
             </View>
+            {embeddingTime !== null && (
+              <Text style={styles.statsText}>
+                Embedding time: {embeddingTime} ms
+              </Text>
+            )}
             {topMatches.length > 0 && (
               <View style={styles.topMatchesContainer}>
                 <Text style={styles.sectionTitle}>Top Matches</Text>
@@ -329,6 +339,12 @@ const styles = StyleSheet.create({
   topMatchesContainer: {
     marginTop: 20,
   },
+  statsText: {
+    fontSize: 13,
+    color: '#64748B',
+    marginTop: 8,
+    textAlign: 'center',
+  },
   flexContainer: {
     flex: 1,
   },

From cbabf515c5a2176f1e06cd7e74a1be7bbc6f652c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mateusz=20S=C5=82uszniak?= <mateusz.sluszniak@swmansion.com>
Date: Fri, 20 Mar 2026 14:07:45 +0100
Subject: [PATCH 2/9] chore: handle rebased, magled code of app in text to
 image

---
 .../app/text_to_image/index.tsx               | 33 +++++--------------
 1 file changed, 9 insertions(+), 24 deletions(-)

diff --git a/apps/computer-vision/app/text_to_image/index.tsx b/apps/computer-vision/app/text_to_image/index.tsx
index 34d575955b..b5e80bab63 100644
--- a/apps/computer-vision/app/text_to_image/index.tsx
+++ b/apps/computer-vision/app/text_to_image/index.tsx
@@ -23,6 +23,7 @@ import { GeneratingContext } from '../../context';
 import ColorPalette from '../../colors';
 import ProgressBar from '../../components/ProgressBar';
 import { Ionicons } from '@expo/vector-icons';
+import { StatsBar } from '../../components/StatsBar';
 
 type TextToImageModelSources = TextToImageProps['model'];
 
@@ -30,8 +31,6 @@ const MODELS: ModelOption<TextToImageModelSources>[] = [
   { label: 'BK-SDM 256', value: BK_SDM_TINY_VPRED_256 },
   { label: 'BK-SDM 512', value: BK_SDM_TINY_VPRED_512 },
 ];
-import { BottomBarWithTextInput } from '../../components/BottomBarWithTextInput';
-import { StatsBar } from '../../components/StatsBar';
 
 export default function TextToImageScreen() {
   const [inferenceStepIdx, setInferenceStepIdx] = useState<number>(0);
@@ -42,8 +41,6 @@ export default function TextToImageScreen() {
     BK_SDM_TINY_VPRED_256
   );
   const [generationTime, setGenerationTime] = useState<number | null>(null);
-  const [showTextInput, setShowTextInput] = useState(false);
-  const [keyboardVisible, setKeyboardVisible] = useState(false);
 
   const imageSize = 224;
   const model = useTextToImage({
@@ -62,13 +59,11 @@ export default function TextToImageScreen() {
     try {
       const start = Date.now();
       const output = await model.generate(input, imageSize, steps);
-      if (output.length) setImage(output);
-      else {
-        setImageTitle(prevImageTitle);
-        return;
+
+      if (output.length) {
+        setImage(output);
+        setGenerationTime(Date.now() - start);
       }
-      setGenerationTime(Date.now() - start);
-      setImage(output);
     } catch (e) {
       console.error(e);
     } finally {
@@ -117,6 +112,7 @@ export default function TextToImageScreen() {
           onSelect={(m) => {
             setSelectedModel(m);
             setImage(null);
+            setGenerationTime(null);
           }}
         />
 
@@ -136,6 +132,9 @@ export default function TextToImageScreen() {
           </TouchableOpacity>
         </View>
 
+        {/* Added StatsBar here, just above the input row */}
+        <StatsBar inferenceTime={generationTime} />
+
         <View style={styles.inputRow}>
           <TextInput
             style={styles.textInput}
@@ -146,20 +145,6 @@ export default function TextToImageScreen() {
             onSubmitEditing={runForward}
             returnKeyType="send"
           />
-        </View>
-        <StatsBar inferenceTime={generationTime} />
-        <View style={styles.bottomContainer}>
-          <BottomBarWithTextInput
-            runModel={runForward}
-            numSteps={steps}
-            setSteps={setSteps}
-            stopModel={model.interrupt}
-            isGenerating={model.isGenerating}
-            isReady={model.isReady}
-            showTextInput={showTextInput}
-            setShowTextInput={setShowTextInput}
-            keyboardVisible={keyboardVisible}
-          />
           {model.isGenerating ? (
             <TouchableOpacity
               style={styles.sendButton}

From a6d5440f0589d79e678d63174803515f74326cfa Mon Sep 17 00:00:00 2001
From: IgorSwat <igorswat2002@o2.pl>
Date: Fri, 20 Mar 2026 18:18:15 +0100
Subject: [PATCH 3/9] Fix T2S streaming on unfinished sentences

---
 .../hooks/natural_language_processing/useTextToSpeech.ts   | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/packages/react-native-executorch/src/hooks/natural_language_processing/useTextToSpeech.ts b/packages/react-native-executorch/src/hooks/natural_language_processing/useTextToSpeech.ts
index b5e03ceb59..471dc2a2a1 100644
--- a/packages/react-native-executorch/src/hooks/natural_language_processing/useTextToSpeech.ts
+++ b/packages/react-native-executorch/src/hooks/natural_language_processing/useTextToSpeech.ts
@@ -119,7 +119,12 @@ export const useTextToSpeech = ({
       setIsGenerating(true);
       try {
         if (input.text) {
-          instance.streamInsert(input.text);
+          // If the initial text does not end with an end of sentence character,
+          // we add an artificial dot to improve output's quality.
+          instance.streamInsert(
+            input.text +
+              ('.?!;'.includes(input.text.trim().slice(-1)) ? '' : '.')
+          );
         }
 
         await input.onBegin?.();

From 7bc532e3a28078cefdd61d02358837ef5647bdd1 Mon Sep 17 00:00:00 2001
From: Mateusz Sluszniak <56299341+msluszniak@users.noreply.github.com>
Date: Sat, 21 Mar 2026 12:24:04 +0100
Subject: [PATCH 4/9] Apply suggestions from code review

Co-authored-by: Bartosz Hanc <bartosz.hanc02@gmail.com>
---
 apps/computer-vision/app/text_to_image/index.tsx   | 1 -
 apps/text-embeddings/app/clip-embeddings/index.tsx | 1 -
 2 files changed, 2 deletions(-)

diff --git a/apps/computer-vision/app/text_to_image/index.tsx b/apps/computer-vision/app/text_to_image/index.tsx
index b5e80bab63..9b007be998 100644
--- a/apps/computer-vision/app/text_to_image/index.tsx
+++ b/apps/computer-vision/app/text_to_image/index.tsx
@@ -132,7 +132,6 @@ export default function TextToImageScreen() {
           </TouchableOpacity>
         </View>
 
-        {/* Added StatsBar here, just above the input row */}
         <StatsBar inferenceTime={generationTime} />
 
         <View style={styles.inputRow}>
diff --git a/apps/text-embeddings/app/clip-embeddings/index.tsx b/apps/text-embeddings/app/clip-embeddings/index.tsx
index a373d799d0..b97d18ca87 100644
--- a/apps/text-embeddings/app/clip-embeddings/index.tsx
+++ b/apps/text-embeddings/app/clip-embeddings/index.tsx
@@ -129,7 +129,6 @@ function ClipEmbeddingsScreen() {
 
     try {
       const start = Date.now();
-      // Array.from to get numbers[]
       const inputImageEmbedding = await imageModel.forward(
         output.assets[0].uri
       );

From 64d4f441765f1ed05c65d6f2f0fc8f6425391349 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mateusz=20S=C5=82uszniak?= <mateusz.sluszniak@swmansion.com>
Date: Sat, 21 Mar 2026 12:52:41 +0100
Subject: [PATCH 5/9] chore: add model selection and basic statistics to
 instance segmentation

---
 .../app/instance_segmentation/index.tsx       | 48 ++++++++++++++++++-
 1 file changed, 46 insertions(+), 2 deletions(-)

diff --git a/apps/computer-vision/app/instance_segmentation/index.tsx b/apps/computer-vision/app/instance_segmentation/index.tsx
index e49594eb0a..f833ffa24e 100644
--- a/apps/computer-vision/app/instance_segmentation/index.tsx
+++ b/apps/computer-vision/app/instance_segmentation/index.tsx
@@ -1,7 +1,17 @@
 import Spinner from '../../components/Spinner';
 import { BottomBar } from '../../components/BottomBar';
 import { getImage } from '../../utils';
-import { useInstanceSegmentation, YOLO26N_SEG } from 'react-native-executorch';
+import { ModelPicker, ModelOption } from '../../components/ModelPicker';
+import {
+  useInstanceSegmentation,
+  YOLO26N_SEG,
+  YOLO26S_SEG,
+  YOLO26M_SEG,
+  YOLO26L_SEG,
+  YOLO26X_SEG,
+  RF_DETR_NANO_SEG,
+  InstanceSegmentationModelSources,
+} from 'react-native-executorch';
 import {
   View,
   StyleSheet,
@@ -16,8 +26,22 @@ import ImageWithMasks, {
   buildDisplayInstances,
   DisplayInstance,
 } from '../../components/ImageWithMasks';
+import { StatsBar } from '../../components/StatsBar';
+
+const MODELS: ModelOption<InstanceSegmentationModelSources>[] = [
+  { label: 'Yolo26N', value: YOLO26N_SEG },
+  { label: 'Yolo26S', value: YOLO26S_SEG },
+  { label: 'Yolo26M', value: YOLO26M_SEG },
+  { label: 'Yolo26L', value: YOLO26L_SEG },
+  { label: 'Yolo26X', value: YOLO26X_SEG },
+  { label: 'RF-DeTR Nano', value: RF_DETR_NANO_SEG },
+];
 
 export default function InstanceSegmentationScreen() {
+  const [selectedModel, setSelectedModel] =
+    useState<InstanceSegmentationModelSources>(YOLO26N_SEG);
+  const [inferenceTime, setInferenceTime] = useState<number | null>(null);
+
   const { setGlobalGenerating } = useContext(GeneratingContext);
 
   const {
@@ -28,7 +52,7 @@ export default function InstanceSegmentationScreen() {
     error,
     getAvailableInputSizes,
   } = useInstanceSegmentation({
-    model: YOLO26N_SEG,
+    model: selectedModel,
   });
 
   const [imageUri, setImageUri] = useState('');
@@ -60,12 +84,14 @@ export default function InstanceSegmentationScreen() {
       height: image.height ?? 0,
     });
     setInstances([]);
+    setInferenceTime(null);
   };
 
   const runForward = async () => {
     if (!imageUri || imageSize.width === 0 || imageSize.height === 0) return;
 
     try {
+      const start = Date.now();
       const output = await forward(imageUri, {
         confidenceThreshold: 0.5,
         iouThreshold: 0.55,
@@ -74,6 +100,8 @@ export default function InstanceSegmentationScreen() {
         inputSize: selectedInputSize ?? undefined,
       });
 
+      setInferenceTime(Date.now() - start);
+
       // Convert raw masks → small Skia images immediately.
       // Raw Uint8Array mask buffers (backed by native OwningArrayBuffer)
       // go out of scope here and become eligible for GC right away.
@@ -168,6 +196,22 @@ export default function InstanceSegmentationScreen() {
         )}
       </View>
 
+      <ModelPicker
+        models={MODELS}
+        selectedModel={selectedModel}
+        disabled={isGenerating}
+        onSelect={(m) => {
+          setSelectedModel(m);
+          setInstances([]);
+          setInferenceTime(null);
+        }}
+      />
+
+      <StatsBar
+        inferenceTime={inferenceTime}
+        detectionCount={instances.length > 0 ? instances.length : null}
+      />
+
       <BottomBar
         handleCameraPress={handleCameraPress}
         runForward={runForward}

From 95ed309e3abe4772bec6d70a82a6ea5ce5b69e69 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mateusz=20S=C5=82uszniak?= <mateusz.sluszniak@swmansion.com>
Date: Sat, 21 Mar 2026 13:13:08 +0100
Subject: [PATCH 6/9] chore: split inference time in image embeddings example

---
 .../app/clip-embeddings/index.tsx             | 83 ++++++++-----------
 1 file changed, 36 insertions(+), 47 deletions(-)

diff --git a/apps/text-embeddings/app/clip-embeddings/index.tsx b/apps/text-embeddings/app/clip-embeddings/index.tsx
index b97d18ca87..e9831c6be8 100644
--- a/apps/text-embeddings/app/clip-embeddings/index.tsx
+++ b/apps/text-embeddings/app/clip-embeddings/index.tsx
@@ -40,7 +40,13 @@ function ClipEmbeddingsScreen() {
   const [topMatches, setTopMatches] = useState<
     { sentence: string; similarity: number }[]
   >([]);
-  const [embeddingTime, setEmbeddingTime] = useState<number | null>(null);
+
+  const [textEmbeddingTime, setTextEmbeddingTime] = useState<number | null>(
+    null
+  );
+  const [imageEmbeddingTime, setImageEmbeddingTime] = useState<number | null>(
+    null
+  );
 
   useEffect(
     () => {
@@ -55,11 +61,15 @@ function ClipEmbeddingsScreen() {
         ];
 
         try {
+          const start = Date.now();
           const embeddings = [];
+
           for (const sentence of sentences) {
             const embedding = await textModel.forward(sentence);
             embeddings.push({ sentence, embedding });
           }
+
+          setTextEmbeddingTime(Date.now() - start);
           setSentencesWithEmbeddings(embeddings);
         } catch (error) {
           console.error('Error generating embeddings:', error);
@@ -78,7 +88,8 @@ function ClipEmbeddingsScreen() {
     try {
       const start = Date.now();
       const inputEmbedding = await textModel.forward(inputSentence);
-      setEmbeddingTime(Date.now() - start);
+      setTextEmbeddingTime(Date.now() - start);
+
       const matches = sentencesWithEmbeddings.map(
         ({ sentence, embedding }) => ({
           sentence,
@@ -98,7 +109,8 @@ function ClipEmbeddingsScreen() {
     try {
       const start = Date.now();
       const embedding = await textModel.forward(inputSentence);
-      setEmbeddingTime(Date.now() - start);
+      setTextEmbeddingTime(Date.now() - start);
+
       setSentencesWithEmbeddings((prev) => [
         ...prev,
         { sentence: inputSentence, embedding },
@@ -119,6 +131,7 @@ function ClipEmbeddingsScreen() {
       console.error('Error clearing the list:', error);
     }
   };
+
   const checkImage = async () => {
     if (!imageModel.isReady) return;
 
@@ -132,7 +145,7 @@ function ClipEmbeddingsScreen() {
       const inputImageEmbedding = await imageModel.forward(
         output.assets[0].uri
       );
-      setEmbeddingTime(Date.now() - start);
+      setImageEmbeddingTime(Date.now() - start);
 
       const matches = sentencesWithEmbeddings.map(
         ({ sentence, embedding }) => ({
@@ -270,11 +283,18 @@ function ClipEmbeddingsScreen() {
                 </TouchableOpacity>
               </View>
             </View>
-            {embeddingTime !== null && (
+
+            {textEmbeddingTime !== null && (
+              <Text style={styles.statsText}>
+                Text Embedding time: {textEmbeddingTime} ms
+              </Text>
+            )}
+            {imageEmbeddingTime !== null && (
               <Text style={styles.statsText}>
-                Embedding time: {embeddingTime} ms
+                Image Embedding time: {imageEmbeddingTime} ms
               </Text>
             )}
+
             {topMatches.length > 0 && (
               <View style={styles.topMatchesContainer}>
                 <Text style={styles.sectionTitle}>Top Matches</Text>
@@ -293,15 +313,8 @@ function ClipEmbeddingsScreen() {
 }
 
 const styles = StyleSheet.create({
-  container: {
-    flex: 1,
-    backgroundColor: '#F8FAFC',
-  },
-  scrollContainer: {
-    padding: 20,
-    alignItems: 'center',
-    flexGrow: 1,
-  },
+  container: { flex: 1, backgroundColor: '#F8FAFC' },
+  scrollContainer: { padding: 20, alignItems: 'center', flexGrow: 1 },
   heading: {
     fontSize: 24,
     fontWeight: '500',
@@ -323,11 +336,7 @@ const styles = StyleSheet.create({
     marginBottom: 12,
     color: '#1E293B',
   },
-  sentenceText: {
-    fontSize: 14,
-    marginBottom: 6,
-    color: '#334155',
-  },
+  sentenceText: { fontSize: 14, marginBottom: 6, color: '#334155' },
   input: {
     backgroundColor: '#F1F5F9',
     borderRadius: 10,
@@ -338,10 +347,7 @@ const styles = StyleSheet.create({
     minHeight: 40,
     textAlignVertical: 'top',
   },
-  buttonContainer: {
-    width: '100%',
-    gap: 10,
-  },
+  buttonContainer: { width: '100%', gap: 10 },
   buttonGroup: {
     flexDirection: 'row',
     justifyContent: 'space-between',
@@ -367,33 +373,16 @@ const styles = StyleSheet.create({
     alignItems: 'center',
     justifyContent: 'center',
   },
-  buttonDisabled: {
-    backgroundColor: '#f0f0f0',
-    borderColor: '#d3d3d3',
-  },
-  buttonText: {
-    color: 'white',
-    textAlign: 'center',
-    fontWeight: '500',
-  },
-  buttonTextOutline: {
-    color: 'navy',
-    textAlign: 'center',
-    fontWeight: '500',
-  },
-  buttonTextDisabled: {
-    color: 'gray',
-  },
-  topMatchesContainer: {
-    marginTop: 20,
-  },
+  buttonDisabled: { backgroundColor: '#f0f0f0', borderColor: '#d3d3d3' },
+  buttonText: { color: 'white', textAlign: 'center', fontWeight: '500' },
+  buttonTextOutline: { color: 'navy', textAlign: 'center', fontWeight: '500' },
+  buttonTextDisabled: { color: 'gray' },
+  topMatchesContainer: { marginTop: 20 },
   statsText: {
     fontSize: 13,
     color: '#64748B',
     marginTop: 8,
     textAlign: 'center',
   },
-  flexContainer: {
-    flex: 1,
-  },
+  flexContainer: { flex: 1 },
 });

From 55c912f3606749b7f4901da58bf4c707af0da1f7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mateusz=20S=C5=82uszniak?= <mateusz.sluszniak@swmansion.com>
Date: Sat, 21 Mar 2026 13:16:46 +0100
Subject: [PATCH 7/9] chore: reformat


From f3dbcb54c6a514043cbe2c4458fdee8f7f150b88 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mateusz=20S=C5=82uszniak?= <mateusz.sluszniak@swmansion.com>
Date: Mon, 23 Mar 2026 12:25:49 +0100
Subject: [PATCH 8/9] chore: add suggestions from code review

---
 apps/computer-vision/app/classification/index.tsx     |  1 +
 apps/computer-vision/app/object_detection/index.tsx   |  1 +
 apps/computer-vision/app/ocr/index.tsx                |  1 +
 apps/computer-vision/app/ocr_vertical/index.tsx       |  1 +
 .../app/semantic_segmentation/index.tsx               |  1 +
 apps/computer-vision/app/style_transfer/index.tsx     |  1 +
 apps/llm/app/llm/index.tsx                            |  7 ++++++-
 apps/llm/app/llm_structured_output/index.tsx          |  7 ++++++-
 apps/llm/app/llm_tool_calling/index.tsx               |  7 ++++++-
 apps/llm/app/multimodal_llm/index.tsx                 |  7 ++++++-
 apps/llm/components/StatsBar.tsx                      |  2 +-
 apps/llm/hooks/useLLMStats.ts                         | 11 +++++++----
 .../common/rnexecutorch/models/llm/LLM.cpp            |  8 ++++----
 .../common/rnexecutorch/models/llm/LLM.h              |  4 ++--
 14 files changed, 44 insertions(+), 15 deletions(-)

diff --git a/apps/computer-vision/app/classification/index.tsx b/apps/computer-vision/app/classification/index.tsx
index c22e2fcd4b..79469b156c 100644
--- a/apps/computer-vision/app/classification/index.tsx
+++ b/apps/computer-vision/app/classification/index.tsx
@@ -30,6 +30,7 @@ export default function ClassificationScreen() {
     if (typeof uri === 'string') {
       setImageUri(uri as string);
       setResults([]);
+      setInferenceTime(null);
     }
   };
 
diff --git a/apps/computer-vision/app/object_detection/index.tsx b/apps/computer-vision/app/object_detection/index.tsx
index 361c38c6e8..a0600d9276 100644
--- a/apps/computer-vision/app/object_detection/index.tsx
+++ b/apps/computer-vision/app/object_detection/index.tsx
@@ -48,6 +48,7 @@ export default function ObjectDetectionScreen() {
       setImageUri(image.uri as string);
       setImageDimensions({ width: width as number, height: height as number });
       setResults([]);
+      setInferenceTime(null);
     }
   };
 
diff --git a/apps/computer-vision/app/ocr/index.tsx b/apps/computer-vision/app/ocr/index.tsx
index fcfa5f4e2b..7033061a3b 100644
--- a/apps/computer-vision/app/ocr/index.tsx
+++ b/apps/computer-vision/app/ocr/index.tsx
@@ -60,6 +60,7 @@ export default function OCRScreen() {
     if (typeof uri === 'string') {
       setImageUri(uri as string);
       setResults([]);
+      setInferenceTime(null);
     }
   };
 
diff --git a/apps/computer-vision/app/ocr_vertical/index.tsx b/apps/computer-vision/app/ocr_vertical/index.tsx
index bc6b7e57cf..b42a9055f0 100644
--- a/apps/computer-vision/app/ocr_vertical/index.tsx
+++ b/apps/computer-vision/app/ocr_vertical/index.tsx
@@ -35,6 +35,7 @@ export default function VerticalOCRScree() {
     if (typeof uri === 'string') {
       setImageUri(uri as string);
       setResults([]);
+      setInferenceTime(null);
     }
   };
 
diff --git a/apps/computer-vision/app/semantic_segmentation/index.tsx b/apps/computer-vision/app/semantic_segmentation/index.tsx
index f40ec650c2..e8061b0597 100644
--- a/apps/computer-vision/app/semantic_segmentation/index.tsx
+++ b/apps/computer-vision/app/semantic_segmentation/index.tsx
@@ -88,6 +88,7 @@ export default function SemanticSegmentationScreen() {
     setImageUri(image.uri);
     setImageSize({ width: image.width ?? 0, height: image.height ?? 0 });
     setSegImage(null);
+    setInferenceTime(null);
   };
 
   const runForward = async () => {
diff --git a/apps/computer-vision/app/style_transfer/index.tsx b/apps/computer-vision/app/style_transfer/index.tsx
index e7ae7e9b17..08578375b8 100644
--- a/apps/computer-vision/app/style_transfer/index.tsx
+++ b/apps/computer-vision/app/style_transfer/index.tsx
@@ -51,6 +51,7 @@ export default function StyleTransferScreen() {
     if (typeof uri === 'string') {
       setImageUri(uri);
       setStyledUri('');
+      setInferenceTime(null);
     }
   };
 
diff --git a/apps/llm/app/llm/index.tsx b/apps/llm/app/llm/index.tsx
index a5e7806f12..9ef743cd43 100644
--- a/apps/llm/app/llm/index.tsx
+++ b/apps/llm/app/llm/index.tsx
@@ -46,7 +46,12 @@ function LLMScreen() {
   const { setGlobalGenerating } = useContext(GeneratingContext);
 
   const llm = useLLM({ model: selectedModel });
-  const { stats, onMessageSend } = useLLMStats(llm.response, llm.isGenerating);
+  const tokenCount = llm.isReady ? llm.getGeneratedTokenCount() : 0;
+  const { stats, onMessageSend } = useLLMStats(
+    llm.response,
+    llm.isGenerating,
+    tokenCount
+  );
 
   useEffect(() => {
     if (llm.error) {
diff --git a/apps/llm/app/llm_structured_output/index.tsx b/apps/llm/app/llm_structured_output/index.tsx
index 845504437b..18dea4af83 100644
--- a/apps/llm/app/llm_structured_output/index.tsx
+++ b/apps/llm/app/llm_structured_output/index.tsx
@@ -88,7 +88,12 @@ function LLMScreen() {
   const { setGlobalGenerating } = useContext(GeneratingContext);
 
   const llm = useLLM({ model: selectedModel });
-  const { stats, onMessageSend } = useLLMStats(llm.response, llm.isGenerating);
+  const tokenCount = llm.isReady ? llm.getGeneratedTokenCount() : 0;
+  const { stats, onMessageSend } = useLLMStats(
+    llm.response,
+    llm.isGenerating,
+    tokenCount
+  );
 
   useEffect(() => {
     setGlobalGenerating(llm.isGenerating);
diff --git a/apps/llm/app/llm_tool_calling/index.tsx b/apps/llm/app/llm_tool_calling/index.tsx
index c3f19e10f8..4ee4b913cf 100644
--- a/apps/llm/app/llm_tool_calling/index.tsx
+++ b/apps/llm/app/llm_tool_calling/index.tsx
@@ -60,7 +60,12 @@ function LLMToolCallingScreen() {
   const { setGlobalGenerating } = useContext(GeneratingContext);
 
   const llm = useLLM({ model: selectedModel });
-  const { stats, onMessageSend } = useLLMStats(llm.response, llm.isGenerating);
+  const tokenCount = llm.isReady ? llm.getGeneratedTokenCount() : 0;
+  const { stats, onMessageSend } = useLLMStats(
+    llm.response,
+    llm.isGenerating,
+    tokenCount
+  );
 
   useEffect(() => {
     setGlobalGenerating(llm.isGenerating);
diff --git a/apps/llm/app/multimodal_llm/index.tsx b/apps/llm/app/multimodal_llm/index.tsx
index ad9f7c380a..0cff3a74cc 100644
--- a/apps/llm/app/multimodal_llm/index.tsx
+++ b/apps/llm/app/multimodal_llm/index.tsx
@@ -46,7 +46,12 @@ function MultimodalLLMScreen() {
   const vlm = useLLM({
     model: LFM2_VL_1_6B_QUANTIZED,
   });
-  const { stats, onMessageSend } = useLLMStats(vlm.response, vlm.isGenerating);
+  const tokenCount = vlm.isReady ? vlm.getGeneratedTokenCount() : 0;
+  const { stats, onMessageSend } = useLLMStats(
+    vlm.response,
+    vlm.isGenerating,
+    tokenCount
+  );
 
   useEffect(() => {
     setGlobalGenerating(vlm.isGenerating);
diff --git a/apps/llm/components/StatsBar.tsx b/apps/llm/components/StatsBar.tsx
index 7ab2e69198..20bad6cfa1 100644
--- a/apps/llm/components/StatsBar.tsx
+++ b/apps/llm/components/StatsBar.tsx
@@ -15,7 +15,7 @@ export function StatsBar({ stats }: Props) {
       <Text style={styles.separator}>·</Text>
       <Text style={styles.stat}>{stats.tokensPerSec} tok/s</Text>
       <Text style={styles.separator}>·</Text>
-      <Text style={styles.stat}>~{stats.totalTokens} tokens</Text>
+      <Text style={styles.stat}>{stats.totalTokens} tokens</Text>
     </View>
   );
 }
diff --git a/apps/llm/hooks/useLLMStats.ts b/apps/llm/hooks/useLLMStats.ts
index b798947199..4cb6e7abdb 100644
--- a/apps/llm/hooks/useLLMStats.ts
+++ b/apps/llm/hooks/useLLMStats.ts
@@ -6,7 +6,11 @@ export interface LLMStats {
   totalTokens: number;
 }
 
-export function useLLMStats(response: string, isGenerating: boolean) {
+export function useLLMStats(
+  response: string,
+  isGenerating: boolean,
+  totalTokens: number
+) {
   const sendTimeRef = useRef<number | null>(null);
   const firstTokenTimeRef = useRef<number | null>(null);
   const lastResponseRef = useRef<string>('');
@@ -19,7 +23,7 @@ export function useLLMStats(response: string, isGenerating: boolean) {
         firstTokenTimeRef.current = Date.now();
       }
     }
-  }, [response, isGenerating]);
+  }, [response, isGenerating, totalTokens]);
 
   useEffect(() => {
     if (
@@ -30,14 +34,13 @@ export function useLLMStats(response: string, isGenerating: boolean) {
       const endTime = Date.now();
       const ttft = firstTokenTimeRef.current - sendTimeRef.current;
       const totalTime = (endTime - firstTokenTimeRef.current) / 1000;
-      const totalTokens = Math.round(lastResponseRef.current.length / 4);
       const tokensPerSec =
         totalTime > 0 ? Math.round(totalTokens / totalTime) : 0;
       setStats({ ttft, tokensPerSec, totalTokens });
       sendTimeRef.current = null;
       firstTokenTimeRef.current = null;
     }
-  }, [isGenerating]);
+  }, [isGenerating, totalTokens]);
 
   const onMessageSend = () => {
     sendTimeRef.current = Date.now();
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.cpp b/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.cpp
index 95ebed1d57..64e94c2ff0 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.cpp
@@ -166,23 +166,23 @@ void LLM::reset() {
   runner_->reset();
 }
 
-size_t LLM::getGeneratedTokenCount() const noexcept {
+int32_t LLM::getGeneratedTokenCount() const noexcept {
   if (!runner_ || !runner_->is_loaded())
     return 0;
   return runner_->stats_.num_generated_tokens;
 }
 
-size_t LLM::getPromptTokenCount() const noexcept {
+int32_t LLM::getPromptTokenCount() const noexcept {
   if (!runner_ || !runner_->is_loaded())
     return 0;
-  return runner_->stats_.num_prompt_tokens;
+  return static_cast<int32_t>(runner_->stats_.num_prompt_tokens);
 }
 
 int32_t LLM::getVisualTokenCount() const {
   if (!runner_ || !runner_->is_loaded()) {
     return 0;
   }
-  return runner_->get_visual_token_count();
+  return static_cast<int32_t>(runner_->get_visual_token_count());
 }
 
 int32_t LLM::countTextTokens(std::string text) const {
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.h b/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.h
index fcb93d0c18..5c9bc258d7 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.h
@@ -30,8 +30,8 @@ class LLM : public BaseModel {
   void interrupt();
   void reset();
   void unload() noexcept;
-  size_t getGeneratedTokenCount() const noexcept;
-  size_t getPromptTokenCount() const noexcept;
+  int32_t getGeneratedTokenCount() const noexcept;
+  int32_t getPromptTokenCount() const noexcept;
   int32_t countTextTokens(std::string text) const;
   int32_t getVisualTokenCount() const;
   size_t getMemoryLowerBound() const noexcept;

From dab77d7c59f9ac1bc4e1d8ad397e7d04166b46d8 Mon Sep 17 00:00:00 2001
From: Bartosz Hanc <bartosz.hanc02@gmail.com>
Date: Mon, 23 Mar 2026 13:25:58 +0100
Subject: [PATCH 9/9] fix: disable model picker when generating

---
 apps/computer-vision/components/ModelPicker.tsx | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/apps/computer-vision/components/ModelPicker.tsx b/apps/computer-vision/components/ModelPicker.tsx
index dc76f5b171..991793768b 100644
--- a/apps/computer-vision/components/ModelPicker.tsx
+++ b/apps/computer-vision/components/ModelPicker.tsx
@@ -1,4 +1,4 @@
-import React, { useState } from 'react';
+import React, { useEffect, useState } from 'react';
 import {
   View,
   StyleSheet,
@@ -30,6 +30,10 @@ export function ModelPicker<T>({
   const [open, setOpen] = useState(false);
   const selected = models.find((m) => m.value === selectedModel);
 
+  useEffect(() => {
+    if (disabled) setOpen(false);
+  }, [disabled]);
+
   return (
     <View style={styles.container}>
       <TouchableOpacity