From 60e6809091312aca9607c4e50e0a0aebdd5d09bb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mateusz=20S=C5=82uszniak?= <mateusz.sluszniak@swmansion.com>
Date: Thu, 21 May 2026 17:59:15 +0200
Subject: [PATCH 1/2] fix(computer-vision): align vision_camera segmentation
 overlay with preview

Replace the unconditional sensor-native W/H swap with an orientation-aware
one, drop the unreachable `argmax.length === screenW * screenH` branch
(runOnFrame is called with resizeToInput=false, so the mask is always at
model output resolution), and draw the SkiaImage into the camera preview's
cover-fit rect with fit="fill" instead of stretching it onto the whole
portrait canvas. Fixes #1158.
---
 .../vision_camera/tasks/SegmentationTask.tsx  | 59 ++++++++++++-------
 1 file changed, 39 insertions(+), 20 deletions(-)
diff --git a/apps/computer-vision/components/vision_camera/tasks/SegmentationTask.tsx b/apps/computer-vision/components/vision_camera/tasks/SegmentationTask.tsx
index b88e457b23..1465b88dcc 100644
--- a/apps/computer-vision/components/vision_camera/tasks/SegmentationTask.tsx
+++ b/apps/computer-vision/components/vision_camera/tasks/SegmentationTask.tsx
@@ -78,6 +78,7 @@ export default function SegmentationTask({
   }[activeModel];
 
   const [maskImage, setMaskImage] = useState<SkImage | null>(null);
+  const [imageSize, setImageSize] = useState({ width: 1, height: 1 });
   const lastFrameTimeRef = useRef(Date.now());
 
   useEffect(() => {
@@ -117,11 +118,12 @@ export default function SegmentationTask({
   const segRof = active.runOnFrame;
 
   const updateMask = useCallback(
-    (img: SkImage) => {
+    (p: { img: SkImage; screenW: number; screenH: number }) => {
       setMaskImage((prev) => {
         prev?.dispose();
-        return img;
+        return p.img;
       });
+      setImageSize({ width: p.screenW, height: p.screenH });
       const now = Date.now();
       const diff = now - lastFrameTimeRef.current;
       if (diff > 0) onFpsChange(Math.round(1000 / diff), diff);
@@ -151,18 +153,22 @@ export default function SegmentationTask({
           const result = segRof(frame, isFrontCamera, [], false);
           if (result?.ARGMAX) {
             const argmax: Int32Array = result.ARGMAX;
-            // Sensor frames are landscape-native, so width/height are swapped
-            // relative to portrait screen orientation.
-            const screenW = frame.height;
-            const screenH = frame.width;
-            const maskW =
-              argmax.length === screenW * screenH
-                ? screenW
-                : Math.round(Math.sqrt(argmax.length));
-            const maskH =
-              argmax.length === screenW * screenH
-                ? screenH
-                : Math.round(Math.sqrt(argmax.length));
+            // Native rotates the mask into screen-space (see
+            // `inverseRotateMat`). Derive screen-space dims from
+            // `frame.orientation`: portrait orientations ("left"/"right")
+            // swap sensor-native width/height, landscape ones keep them.
+            const orient = frame.orientation;
+            const isScreenPortrait = orient === 'left' || orient === 'right';
+            const screenW = isScreenPortrait ? frame.height : frame.width;
+            const screenH = isScreenPortrait ? frame.width : frame.height;
+            // Mask buffer dims: the C++ side returns the mask at model output
+            // resolution (the `resizeToInput=false` arg below). All built-in
+            // segmentation models output a square spatial map (e.g. 520×520),
+            // so sqrt(length) recovers the side. Non-square model outputs
+            // would need dims exposed from native.
+            const maskSide = Math.round(Math.sqrt(argmax.length));
+            const maskW = maskSide;
+            const maskH = maskSide;
             const pixels = new Uint8Array(maskW * maskH * 4);
             for (let i = 0; i < argmax.length; i++) {
               const color = colors[argmax[i]!] ?? [0, 0, 0, 0];
@@ -182,7 +188,7 @@ export default function SegmentationTask({
               skData,
               maskW * 4
             );
-            if (img) scheduleOnRN(updateMask, img);
+            if (img) scheduleOnRN(updateMask, { img, screenW, screenH });
           }
         } catch {
           // Frame may be disposed before processing completes — transient, safe to ignore.
@@ -200,16 +206,29 @@ export default function SegmentationTask({
 
   if (!maskImage) return null;
 
+  // Match the camera preview's cover-scale + center layout so the mask
+  // aligns pixel-for-pixel with what the user sees. `fit="fill"` lets the
+  // (square) mask stretch into the preview rect — which is computed in
+  // screen-space dims rather than the sensor-native ones.
+  const scale = Math.max(
+    canvasSize.width / imageSize.width,
+    canvasSize.height / imageSize.height
+  );
+  const dstW = imageSize.width * scale;
+  const dstH = imageSize.height * scale;
+  const offsetX = (canvasSize.width - dstW) / 2;
+  const offsetY = (canvasSize.height - dstH) / 2;
+
   return (
     <View style={StyleSheet.absoluteFill} pointerEvents="none">
       <Canvas style={StyleSheet.absoluteFill}>
         <SkiaImage
           image={maskImage}
-          fit="cover"
-          x={0}
-          y={0}
-          width={canvasSize.width}
-          height={canvasSize.height}
+          fit="fill"
+          x={offsetX}
+          y={offsetY}
+          width={dstW}
+          height={dstH}
         />
       </Canvas>
     </View>

From 6989c1d8109cebf04ae354d09fcb76ff138102ec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mateusz=20S=C5=82uszniak?= <mateusz.sluszniak@swmansion.com>
Date: Thu, 21 May 2026 22:16:08 +0200
Subject: [PATCH 2/2] fix(computer-vision): keep segmentation overlay aligned
 in landscape
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pin vision_camera's `orientationSource` to `'interface'` while the
segmentation task is active so the preview stays in the activity's
(portrait-locked) coord system — the same system the native side rotates
the mask into. Other tasks keep `'device'` since their coords (bboxes,
points) tolerate the device-rotated preview.

Also drop the unreachable `argmax.length === screenW * screenH` branch
(`runOnFrame` is called with `resizeToInput=false`, so the mask is at
model output resolution) and draw the SkiaImage into the camera
preview's cover-fit rect with `fit="fill"` instead of stretching it
across the whole portrait canvas. Refs #1158.
---
 .../app/vision_camera/index.tsx                |  9 ++++++++-
 .../vision_camera/tasks/SegmentationTask.tsx   | 18 ++++++++++--------
 2 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/apps/computer-vision/app/vision_camera/index.tsx b/apps/computer-vision/app/vision_camera/index.tsx
index 7a399f443f..99fe0b1ac7 100644
--- a/apps/computer-vision/app/vision_camera/index.tsx
+++ b/apps/computer-vision/app/vision_camera/index.tsx
@@ -234,7 +234,14 @@ export default function VisionCameraScreen() {
         device={device}
         outputs={frameOutput ? [frameOutput] : []}
         isActive={isFocused}
-        orientationSource="device"
+        // Segmentation draws a 2D mask that the native side rotates into the
+        // activity's coord system (portrait, since the activity is locked).
+        // Pin the preview to that same coord system so mask + preview can't
+        // drift apart when the phone is tilted. Other tasks render coords
+        // (bboxes/points) and tolerate the device-rotated preview fine.
+        orientationSource={
+          activeTask === 'segmentation' ? 'interface' : 'device'
+        }
         onError={(e) => {
           console.warn('[Camera] onError', e);
           setError(e.message);
diff --git a/apps/computer-vision/components/vision_camera/tasks/SegmentationTask.tsx b/apps/computer-vision/components/vision_camera/tasks/SegmentationTask.tsx
index 1465b88dcc..8c499d977a 100644
--- a/apps/computer-vision/components/vision_camera/tasks/SegmentationTask.tsx
+++ b/apps/computer-vision/components/vision_camera/tasks/SegmentationTask.tsx
@@ -153,14 +153,16 @@ export default function SegmentationTask({
           const result = segRof(frame, isFrontCamera, [], false);
           if (result?.ARGMAX) {
             const argmax: Int32Array = result.ARGMAX;
-            // Native rotates the mask into screen-space (see
-            // `inverseRotateMat`). Derive screen-space dims from
-            // `frame.orientation`: portrait orientations ("left"/"right")
-            // swap sensor-native width/height, landscape ones keep them.
-            const orient = frame.orientation;
-            const isScreenPortrait = orient === 'left' || orient === 'right';
-            const screenW = isScreenPortrait ? frame.height : frame.width;
-            const screenH = isScreenPortrait ? frame.width : frame.height;
+            // Both the preview and the mask live in a portrait coord system:
+            // the activity is portrait-locked (so CameraX's PreviewView always
+            // renders the preview in portrait orientation regardless of how
+            // the device is physically tilted), and the native side runs
+            // `inverseRotateMat` which always converts the mask into the same
+            // portrait coord system. Treat sensor-native dims as portrait by
+            // swapping height/width — same convention as the sibling OCR and
+            // ObjectDetection tasks.
+            const screenW = frame.height;
+            const screenH = frame.width;
             // Mask buffer dims: the C++ side returns the mask at model output
             // resolution (the `resizeToInput=false` arg below). All built-in
             // segmentation models output a square spatial map (e.g. 520×520),