From 60e6809091312aca9607c4e50e0a0aebdd5d09bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20S=C5=82uszniak?= Date: Thu, 21 May 2026 17:59:15 +0200 Subject: [PATCH 1/2] fix(computer-vision): align vision_camera segmentation overlay with preview Replace the unconditional sensor-native W/H swap with an orientation-aware one, drop the unreachable `argmax.length === screenW * screenH` branch (runOnFrame is called with resizeToInput=false, so the mask is always at model output resolution), and draw the SkiaImage into the camera preview's cover-fit rect with fit="fill" instead of stretching it onto the whole portrait canvas. Fixes #1158. --- .../vision_camera/tasks/SegmentationTask.tsx | 59 ++++++++++++------- 1 file changed, 39 insertions(+), 20 deletions(-) diff --git a/apps/computer-vision/components/vision_camera/tasks/SegmentationTask.tsx b/apps/computer-vision/components/vision_camera/tasks/SegmentationTask.tsx index b88e457b23..1465b88dcc 100644 --- a/apps/computer-vision/components/vision_camera/tasks/SegmentationTask.tsx +++ b/apps/computer-vision/components/vision_camera/tasks/SegmentationTask.tsx @@ -78,6 +78,7 @@ export default function SegmentationTask({ }[activeModel]; const [maskImage, setMaskImage] = useState(null); + const [imageSize, setImageSize] = useState({ width: 1, height: 1 }); const lastFrameTimeRef = useRef(Date.now()); useEffect(() => { @@ -117,11 +118,12 @@ export default function SegmentationTask({ const segRof = active.runOnFrame; const updateMask = useCallback( - (img: SkImage) => { + (p: { img: SkImage; screenW: number; screenH: number }) => { setMaskImage((prev) => { prev?.dispose(); - return img; + return p.img; }); + setImageSize({ width: p.screenW, height: p.screenH }); const now = Date.now(); const diff = now - lastFrameTimeRef.current; if (diff > 0) onFpsChange(Math.round(1000 / diff), diff); @@ -151,18 +153,22 @@ export default function SegmentationTask({ const result = segRof(frame, isFrontCamera, [], false); if (result?.ARGMAX) { const argmax: Int32Array = result.ARGMAX; - // Sensor frames are landscape-native, so width/height are swapped - // relative to portrait screen orientation. - const screenW = frame.height; - const screenH = frame.width; - const maskW = - argmax.length === screenW * screenH - ? screenW - : Math.round(Math.sqrt(argmax.length)); - const maskH = - argmax.length === screenW * screenH - ? screenH - : Math.round(Math.sqrt(argmax.length)); + // Native rotates the mask into screen-space (see + // `inverseRotateMat`). Derive screen-space dims from + // `frame.orientation`: portrait orientations ("left"/"right") + // swap sensor-native width/height, landscape ones keep them. + const orient = frame.orientation; + const isScreenPortrait = orient === 'left' || orient === 'right'; + const screenW = isScreenPortrait ? frame.height : frame.width; + const screenH = isScreenPortrait ? frame.width : frame.height; + // Mask buffer dims: the C++ side returns the mask at model output + // resolution (the `resizeToInput=false` arg below). All built-in + // segmentation models output a square spatial map (e.g. 520×520), + // so sqrt(length) recovers the side. Non-square model outputs + // would need dims exposed from native. + const maskSide = Math.round(Math.sqrt(argmax.length)); + const maskW = maskSide; + const maskH = maskSide; const pixels = new Uint8Array(maskW * maskH * 4); for (let i = 0; i < argmax.length; i++) { const color = colors[argmax[i]!] ?? [0, 0, 0, 0]; @@ -182,7 +188,7 @@ export default function SegmentationTask({ skData, maskW * 4 ); - if (img) scheduleOnRN(updateMask, img); + if (img) scheduleOnRN(updateMask, { img, screenW, screenH }); } } catch { // Frame may be disposed before processing completes — transient, safe to ignore. @@ -200,16 +206,29 @@ export default function SegmentationTask({ if (!maskImage) return null; + // Match the camera preview's cover-scale + center layout so the mask + // aligns pixel-for-pixel with what the user sees. `fit="fill"` lets the + // (square) mask stretch into the preview rect — which is computed in + // screen-space dims rather than the sensor-native ones. + const scale = Math.max( + canvasSize.width / imageSize.width, + canvasSize.height / imageSize.height + ); + const dstW = imageSize.width * scale; + const dstH = imageSize.height * scale; + const offsetX = (canvasSize.width - dstW) / 2; + const offsetY = (canvasSize.height - dstH) / 2; + return ( From 6989c1d8109cebf04ae354d09fcb76ff138102ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20S=C5=82uszniak?= Date: Thu, 21 May 2026 22:16:08 +0200 Subject: [PATCH 2/2] fix(computer-vision): keep segmentation overlay aligned in landscape MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pin vision_camera's `orientationSource` to `'interface'` while the segmentation task is active so the preview stays in the activity's (portrait-locked) coord system — the same system the native side rotates the mask into. Other tasks keep `'device'` since their coords (bboxes, points) tolerate the device-rotated preview. Also drop the unreachable `argmax.length === screenW * screenH` branch (`runOnFrame` is called with `resizeToInput=false`, so the mask is at model output resolution) and draw the SkiaImage into the camera preview's cover-fit rect with `fit="fill"` instead of stretching it across the whole portrait canvas. Refs #1158. --- .../app/vision_camera/index.tsx | 9 ++++++++- .../vision_camera/tasks/SegmentationTask.tsx | 18 ++++++++++-------- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/apps/computer-vision/app/vision_camera/index.tsx b/apps/computer-vision/app/vision_camera/index.tsx index 7a399f443f..99fe0b1ac7 100644 --- a/apps/computer-vision/app/vision_camera/index.tsx +++ b/apps/computer-vision/app/vision_camera/index.tsx @@ -234,7 +234,14 @@ export default function VisionCameraScreen() { device={device} outputs={frameOutput ? [frameOutput] : []} isActive={isFocused} - orientationSource="device" + // Segmentation draws a 2D mask that the native side rotates into the + // activity's coord system (portrait, since the activity is locked). + // Pin the preview to that same coord system so mask + preview can't + // drift apart when the phone is tilted. Other tasks render coords + // (bboxes/points) and tolerate the device-rotated preview fine. + orientationSource={ + activeTask === 'segmentation' ? 'interface' : 'device' + } onError={(e) => { console.warn('[Camera] onError', e); setError(e.message); diff --git a/apps/computer-vision/components/vision_camera/tasks/SegmentationTask.tsx b/apps/computer-vision/components/vision_camera/tasks/SegmentationTask.tsx index 1465b88dcc..8c499d977a 100644 --- a/apps/computer-vision/components/vision_camera/tasks/SegmentationTask.tsx +++ b/apps/computer-vision/components/vision_camera/tasks/SegmentationTask.tsx @@ -153,14 +153,16 @@ export default function SegmentationTask({ const result = segRof(frame, isFrontCamera, [], false); if (result?.ARGMAX) { const argmax: Int32Array = result.ARGMAX; - // Native rotates the mask into screen-space (see - // `inverseRotateMat`). Derive screen-space dims from - // `frame.orientation`: portrait orientations ("left"/"right") - // swap sensor-native width/height, landscape ones keep them. - const orient = frame.orientation; - const isScreenPortrait = orient === 'left' || orient === 'right'; - const screenW = isScreenPortrait ? frame.height : frame.width; - const screenH = isScreenPortrait ? frame.width : frame.height; + // Both the preview and the mask live in a portrait coord system: + // the activity is portrait-locked (so CameraX's PreviewView always + // renders the preview in portrait orientation regardless of how + // the device is physically tilted), and the native side runs + // `inverseRotateMat` which always converts the mask into the same + // portrait coord system. Treat sensor-native dims as portrait by + // swapping height/width — same convention as the sibling OCR and + // ObjectDetection tasks. + const screenW = frame.height; + const screenH = frame.width; // Mask buffer dims: the C++ side returns the mask at model output // resolution (the `resizeToInput=false` arg below). All built-in // segmentation models output a square spatial map (e.g. 520×520),