From 04146abb6c969e2eed1e730c85edf2c998f5296f Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 13 Apr 2026 15:30:41 +0000
Subject: [PATCH 1/5] Initial plan


From 698aaef19604d98df136a20d5030e57ab45ac166 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 13 Apr 2026 16:00:33 +0000
Subject: [PATCH 2/5] Implement GPU support for optical flow, face blurring,
 and pose estimation

Agent-Logs-Url: https://github.com/fourMs/MGT-python/sessions/37e34660-f746-453a-b428-74fc3f53285f

Co-authored-by: alexarje <114316+alexarje@users.noreply.github.com>
---
 musicalgestures/__init__.py    |  1 +
 musicalgestures/_blurfaces.py  |  4 +-
 musicalgestures/_centerface.py | 11 ++++-
 musicalgestures/_flow.py       | 78 ++++++++++++++++++++++++++++++++--
 musicalgestures/_pose.py       |  9 +---
 musicalgestures/_utils.py      | 15 +++++++
 tests/test_flow.py             | 33 ++++++++++++++
 7 files changed, 139 insertions(+), 12 deletions(-)

diff --git a/musicalgestures/__init__.py b/musicalgestures/__init__.py
index 756e8b5..e08932f 100644
--- a/musicalgestures/__init__.py
+++ b/musicalgestures/__init__.py
@@ -14,6 +14,7 @@
     ffmpeg_cmd,
     get_length,
     generate_outfilename,
+    get_cuda_device_count,
 )
 from musicalgestures._mglist import MgList
 
diff --git a/musicalgestures/_blurfaces.py b/musicalgestures/_blurfaces.py
index 2ebca4b..f9d5b7a 100644
--- a/musicalgestures/_blurfaces.py
+++ b/musicalgestures/_blurfaces.py
@@ -80,6 +80,7 @@ def mg_blurfaces(self,
                  save_data=True, 
                  data_format='csv', 
                  color=(0, 0, 0), 
+                 use_gpu=True,
                  target_name=None, 
                  overwrite=False):
     """
@@ -101,6 +102,7 @@ def mg_blurfaces(self,
         save_data (bool, optional): Whether to save the scaled coordinates of the face mask (time (ms), x1, y1, x2, y2) for each frame to a file. Defaults to True.
         data_format (str, optional): Specifies format of blur_faces-data. Accepted values are 'csv', 'tsv' and 'txt'. For multiple output formats, use list, e.g. ['csv', 'txt']. Defaults to 'csv'.
         color (tuple, optional): Customized color of the rectangle boxes. Defaults to black (0, 0, 0).
+        use_gpu (bool, optional): Whether to attempt GPU (CUDA) acceleration for face detection. Falls back to CPU automatically if CUDA is unavailable. Defaults to True.
         target_name (str, optional): Target output name. Defaults to None (which assumes that the input filename with the suffix "_blurred" should be used).
         overwrite (bool, optional): Whether to allow overwriting existing files or to automatically increment target filenames to avoid overwriting. Defaults to False.
 
@@ -123,7 +125,7 @@ def mg_blurfaces(self,
     pb = MgProgressbar(total=self.length, prefix='Blurring faces:')
 
     # Create an instance of the CenterFace class
-    centerface = CenterFace()
+    centerface = CenterFace(use_gpu=use_gpu)
     output_stream = cv2.VideoWriter(target_name, cv2.VideoWriter_fourcc('M','J','P','G'), self.fps, (self.width, self.height))
     # Create an empty list to append the mask coordinates
     data = []
diff --git a/musicalgestures/_centerface.py b/musicalgestures/_centerface.py
index 85d9de6..0f2dc9c 100644
--- a/musicalgestures/_centerface.py
+++ b/musicalgestures/_centerface.py
@@ -3,15 +3,24 @@
 import numpy as np
 
 import musicalgestures
+from musicalgestures._utils import get_cuda_device_count
 
 class CenterFace(object):
     
-    def __init__(self, landmarks=True):
+    def __init__(self, landmarks=True, use_gpu=False):
 
         module_path = os.path.abspath(os.path.dirname(musicalgestures.__file__))
         
         self.landmarks = landmarks
         self.net = cv2.dnn.readNetFromONNX(module_path + '/models/centerface.onnx')
+
+        if use_gpu:
+            if get_cuda_device_count() > 0:
+                self.net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
+                self.net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
+            else:
+                print('OpenCV CUDA backend is unavailable. CenterFace will use CPU.')
+
         self.img_h_new, self.img_w_new, self.scale_h, self.scale_w = 0, 0, 0, 0
 
     def __call__(self, img, height, width, threshold=0.5):
diff --git a/musicalgestures/_flow.py b/musicalgestures/_flow.py
index 7cca289..fea2eea 100644
--- a/musicalgestures/_flow.py
+++ b/musicalgestures/_flow.py
@@ -47,6 +47,7 @@ def dense(
             angle_of_view=0, 
             scaledown=1,      
             skip_empty=False,
+            use_gpu=True,
             target_name=None,
             overwrite=False):
         """
@@ -68,6 +69,7 @@ def dense(
             angle_of_view (int, optional): angle of view of camera, for reporting flow in meters per second. Defaults to 0.
             scaledown (int, optional): factor to scaledown frame size of the video. Defaults to 1.
             skip_empty (bool, optional): If True, repeats previous frame in the output when encounters an empty frame. Defaults to False.
+            use_gpu (bool, optional): Whether to attempt GPU (CUDA) acceleration using `cv2.cuda.FarnebackOpticalFlow`. Falls back to CPU automatically if CUDA is unavailable or the required OpenCV CUDA modules are not installed. Defaults to True.
             target_name (str, optional): Target output name for the video. Defaults to None (which assumes that the input filename with the suffix "_flow_dense" should be used).
             overwrite (bool, optional): Whether to allow overwriting existing files or to automatically increment target filenames to avoid overwriting. Defaults to False.
 
@@ -100,6 +102,28 @@ def dense(
 
         size = (int(width/scaledown), int(height/scaledown))
 
+        # Determine whether to use GPU-accelerated Farneback optical flow
+        from musicalgestures._utils import get_cuda_device_count
+        _use_gpu = False
+        farneback_gpu = None
+        if use_gpu:
+            if not hasattr(cv2.cuda, 'FarnebackOpticalFlow'):
+                print('cv2.cuda.FarnebackOpticalFlow is unavailable (requires opencv-contrib built with CUDA). Switching to CPU for dense optical flow.')
+            elif get_cuda_device_count() <= 0:
+                print('OpenCV CUDA backend is unavailable. Switching to CPU for dense optical flow.')
+            else:
+                _use_gpu = True
+                farneback_gpu = cv2.cuda.FarnebackOpticalFlow.create(
+                    numLevels=levels,
+                    pyrScale=pyr_scale,
+                    fastPyramids=False,
+                    winSize=winsize,
+                    numIters=iterations,
+                    polyN=poly_n,
+                    polySigma=poly_sigma,
+                    flags=flags,
+                )
+
         if velocity:
             pb = MgProgressbar(total=length, prefix='Rendering dense optical flow velocity:')
 
@@ -118,6 +142,10 @@ def dense(
 
         ret, frame1 = vidcap.read()
         prev_frame = cv2.cvtColor(cv2.resize(frame1, size), cv2.COLOR_BGR2GRAY)
+
+        if _use_gpu:
+            gpu_prev_frame = cv2.cuda_GpuMat()
+            gpu_prev_frame.upload(prev_frame)
         
         prev_rgb = None
         hsv = np.zeros_like(frame1)
@@ -134,7 +162,14 @@ def dense(
             if ret == True:
                 next_frame = cv2.cvtColor(cv2.resize(frame2, size), cv2.COLOR_BGR2GRAY)
 
-                flow = cv2.calcOpticalFlowFarneback(prev_frame, next_frame, None, pyr_scale, levels, winsize, iterations, poly_n, poly_sigma, flags)
+                if _use_gpu:
+                    gpu_next_frame = cv2.cuda_GpuMat()
+                    gpu_next_frame.upload(next_frame)
+                    gpu_flow_result = farneback_gpu.calc(gpu_prev_frame, gpu_next_frame, None)
+                    flow = gpu_flow_result.download()
+                    gpu_prev_frame = gpu_next_frame
+                else:
+                    flow = cv2.calcOpticalFlowFarneback(prev_frame, next_frame, None, pyr_scale, levels, winsize, iterations, poly_n, poly_sigma, flags)
 
                 if velocity:
                     # Cumulative sum of optical flow vectors        
@@ -285,6 +320,7 @@ def sparse(
             of_max_level=2,
             of_criteria=(cv2.TERM_CRITERIA_EPS |
                          cv2.TERM_CRITERIA_COUNT, 10, 0.03),
+            use_gpu=True,
             target_name=None,
             overwrite=False):
         """
@@ -299,6 +335,7 @@ def sparse(
             of_win_size (tuple, optional): Size of the search window at each pyramid level. Defaults to (15, 15).
             of_max_level (int, optional): 0-based maximal pyramid level number. If set to 0, pyramids are not used (single level), if set to 1, two levels are used, and so on. If pyramids are passed to input then the algorithm will use as many levels as pyramids have but no more than `maxLevel`. Defaults to 2.
             of_criteria (tuple, optional): Specifies the termination criteria of the iterative search algorithm (after the specified maximum number of iterations criteria.maxCount or when the search window moves by less than criteria.epsilon). Defaults to (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03).
+            use_gpu (bool, optional): Whether to attempt GPU (CUDA) acceleration using `cv2.cuda.SparsePyrLKOpticalFlow`. Falls back to CPU automatically if CUDA is unavailable or the required OpenCV CUDA modules are not installed. Defaults to True.
             target_name (str, optional): Target output name for the video. Defaults to None (which assumes that the input filename with the suffix "_flow_sparse" should be used).
             overwrite (bool, optional): Whether to allow overwriting existing files or to automatically increment target filenames to avoid overwriting. Defaults to False.
 
@@ -330,6 +367,24 @@ def sparse(
         height = int(vidcap.get(cv2.CAP_PROP_FRAME_HEIGHT))
         length = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
 
+        # Determine whether to use GPU-accelerated sparse optical flow
+        from musicalgestures._utils import get_cuda_device_count
+        _use_gpu = False
+        lk_gpu = None
+        if use_gpu:
+            if not hasattr(cv2.cuda, 'SparsePyrLKOpticalFlow'):
+                print('cv2.cuda.SparsePyrLKOpticalFlow is unavailable (requires opencv-contrib built with CUDA). Switching to CPU for sparse optical flow.')
+            elif get_cuda_device_count() <= 0:
+                print('OpenCV CUDA backend is unavailable. Switching to CPU for sparse optical flow.')
+            else:
+                _use_gpu = True
+                iters = of_criteria[1] if len(of_criteria) > 1 else 10
+                lk_gpu = cv2.cuda.SparsePyrLKOpticalFlow.create(
+                    winSize=of_win_size,
+                    maxLevel=of_max_level,
+                    iters=iters,
+                )
+
         pb = MgProgressbar(
             total=length, prefix='Rendering sparse optical flow video:')
 
@@ -362,6 +417,12 @@ def sparse(
         old_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY)
         p0 = cv2.goodFeaturesToTrack(old_gray, mask=None, **feature_params)
 
+        if _use_gpu:
+            gpu_old_gray = cv2.cuda_GpuMat()
+            gpu_old_gray.upload(old_gray)
+            gpu_p0 = cv2.cuda_GpuMat()
+            gpu_p0.upload(p0)
+
         # Create a mask image for drawing purposes
         mask = np.zeros_like(old_frame)
 
@@ -373,8 +434,16 @@ def sparse(
                 frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
 
                 # calculate optical flow
-                p1, st, err = cv2.calcOpticalFlowPyrLK(
-                    old_gray, frame_gray, p0, None, **lk_params)
+                if _use_gpu:
+                    gpu_frame_gray = cv2.cuda_GpuMat()
+                    gpu_frame_gray.upload(frame_gray)
+                    gpu_p1, gpu_st = lk_gpu.calc(gpu_old_gray, gpu_frame_gray, gpu_p0, None, None)
+                    p1 = gpu_p1.download()
+                    st = gpu_st.download()
+                    gpu_old_gray = gpu_frame_gray
+                else:
+                    p1, st, err = cv2.calcOpticalFlowPyrLK(
+                        old_gray, frame_gray, p0, None, **lk_params)
 
                 # Select good points
                 good_new = p1[st == 1]
@@ -400,6 +469,9 @@ def sparse(
                 # Now update the previous frame and previous points
                 old_gray = frame_gray.copy()
                 p0 = good_new.reshape(-1, 1, 2)
+                if _use_gpu:
+                    gpu_p0 = cv2.cuda_GpuMat()
+                    gpu_p0.upload(p0)
 
             else:
                 pb.progress(length)
diff --git a/musicalgestures/_pose.py b/musicalgestures/_pose.py
index fd60267..f2ec312 100644
--- a/musicalgestures/_pose.py
+++ b/musicalgestures/_pose.py
@@ -4,7 +4,7 @@
 import sys
 import numpy as np
 import pandas as pd
-from musicalgestures._utils import MgProgressbar, convert_to_avi, extract_wav, embed_audio_in_video, roundup, frame2ms, generate_outfilename, in_colab, ffmpeg_cmd
+from musicalgestures._utils import MgProgressbar, convert_to_avi, extract_wav, embed_audio_in_video, roundup, frame2ms, generate_outfilename, in_colab, get_cuda_device_count, ffmpeg_cmd
 import musicalgestures
 import itertools
 
@@ -104,12 +104,7 @@ def pose(
         print('Sorry, OpenCV GPU acceleration is not supported in Colab. Switching to CPU.')
         device = 'cpu'
     elif device == 'gpu':
-        cuda_devices = 0
-        try:
-            cuda_devices = cv2.cuda.getCudaEnabledDeviceCount()
-        except Exception:
-            cuda_devices = 0
-        if cuda_devices <= 0:
+        if get_cuda_device_count() <= 0:
             print('OpenCV CUDA backend is unavailable. Switching to CPU.')
             device = 'cpu'
 
diff --git a/musicalgestures/_utils.py b/musicalgestures/_utils.py
index 0c8bb6e..f47ed2d 100644
--- a/musicalgestures/_utils.py
+++ b/musicalgestures/_utils.py
@@ -1616,6 +1616,21 @@ def unwrap_str(string):
         return string
 
 
+def get_cuda_device_count():
+    """
+    Returns the number of CUDA-capable GPU devices visible to OpenCV.
+
+    Returns:
+        int: Number of available CUDA devices, or 0 if the OpenCV CUDA
+             module is unavailable or no devices are detected.
+    """
+    import cv2
+    try:
+        return cv2.cuda.getCudaEnabledDeviceCount()
+    except Exception:
+        return 0
+
+
 def in_colab():
     """
     Check's if the environment is a Google Colab document.
diff --git a/tests/test_flow.py b/tests/test_flow.py
index e7d692d..5e39337 100644
--- a/tests/test_flow.py
+++ b/tests/test_flow.py
@@ -54,6 +54,19 @@ def test_with_target_name(self, testvideo_avi):
         assert type(result) == musicalgestures.MgVideo
         assert os.path.isfile(result.filename) == True
 
+    def test_use_gpu_true(self, testvideo_avi):
+        # use_gpu=True should work (falls back to CPU when CUDA is unavailable)
+        mg = musicalgestures.MgVideo(testvideo_avi)
+        result = mg.flow.dense(use_gpu=True, overwrite=True)
+        assert type(result) == musicalgestures.MgVideo
+        assert os.path.isfile(result.filename) == True
+
+    def test_use_gpu_false(self, testvideo_avi):
+        mg = musicalgestures.MgVideo(testvideo_avi)
+        result = mg.flow.dense(use_gpu=False, overwrite=True)
+        assert type(result) == musicalgestures.MgVideo
+        assert os.path.isfile(result.filename) == True
+
 
 class Test_flow_sparse:
     def test_normal_case(self, testvideo_avi):
@@ -80,3 +93,23 @@ def test_with_target_name(self, testvideo_avi):
         result = mg.flow.sparse(target_name=target_name, overwrite=True)
         assert type(result) == musicalgestures.MgVideo
         assert os.path.isfile(result.filename) == True
+
+    def test_use_gpu_true(self, testvideo_avi):
+        # use_gpu=True should work (falls back to CPU when CUDA is unavailable)
+        mg = musicalgestures.MgVideo(testvideo_avi)
+        result = mg.flow.sparse(use_gpu=True, overwrite=True)
+        assert type(result) == musicalgestures.MgVideo
+        assert os.path.isfile(result.filename) == True
+
+    def test_use_gpu_false(self, testvideo_avi):
+        mg = musicalgestures.MgVideo(testvideo_avi)
+        result = mg.flow.sparse(use_gpu=False, overwrite=True)
+        assert type(result) == musicalgestures.MgVideo
+        assert os.path.isfile(result.filename) == True
+
+
+class Test_get_cuda_device_count:
+    def test_returns_int(self):
+        result = musicalgestures.get_cuda_device_count()
+        assert isinstance(result, int)
+        assert result >= 0

From 841efa615b09e58c1197c5c4ef09e689d6708c9d Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 13 Apr 2026 16:03:34 +0000
Subject: [PATCH 3/5] Address code review: improve docstrings and GPU memory
 management

Agent-Logs-Url: https://github.com/fourMs/MGT-python/sessions/37e34660-f746-453a-b428-74fc3f53285f

Co-authored-by: alexarje <114316+alexarje@users.noreply.github.com>
---
 musicalgestures/_flow.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/musicalgestures/_flow.py b/musicalgestures/_flow.py
index fea2eea..a0733c9 100644
--- a/musicalgestures/_flow.py
+++ b/musicalgestures/_flow.py
@@ -69,7 +69,7 @@ def dense(
             angle_of_view (int, optional): angle of view of camera, for reporting flow in meters per second. Defaults to 0.
             scaledown (int, optional): factor to scaledown frame size of the video. Defaults to 1.
             skip_empty (bool, optional): If True, repeats previous frame in the output when encounters an empty frame. Defaults to False.
-            use_gpu (bool, optional): Whether to attempt GPU (CUDA) acceleration using `cv2.cuda.FarnebackOpticalFlow`. Falls back to CPU automatically if CUDA is unavailable or the required OpenCV CUDA modules are not installed. Defaults to True.
+            use_gpu (bool, optional): Whether to attempt GPU (CUDA) acceleration using `cv2.cuda.FarnebackOpticalFlow`. When `True`, falls back to CPU automatically if CUDA is unavailable or the required OpenCV CUDA modules are not installed. When `False`, CPU processing is used unconditionally. Defaults to True.
             target_name (str, optional): Target output name for the video. Defaults to None (which assumes that the input filename with the suffix "_flow_dense" should be used).
             overwrite (bool, optional): Whether to allow overwriting existing files or to automatically increment target filenames to avoid overwriting. Defaults to False.
 
@@ -145,6 +145,7 @@ def dense(
 
         if _use_gpu:
             gpu_prev_frame = cv2.cuda_GpuMat()
+            gpu_next_frame = cv2.cuda_GpuMat()
             gpu_prev_frame.upload(prev_frame)
         
         prev_rgb = None
@@ -163,11 +164,12 @@ def dense(
                 next_frame = cv2.cvtColor(cv2.resize(frame2, size), cv2.COLOR_BGR2GRAY)
 
                 if _use_gpu:
-                    gpu_next_frame = cv2.cuda_GpuMat()
                     gpu_next_frame.upload(next_frame)
                     gpu_flow_result = farneback_gpu.calc(gpu_prev_frame, gpu_next_frame, None)
                     flow = gpu_flow_result.download()
-                    gpu_prev_frame = gpu_next_frame
+                    # Swap references so gpu_next_frame becomes the next prev without
+                    # allocating a new GpuMat object each frame
+                    gpu_prev_frame, gpu_next_frame = gpu_next_frame, gpu_prev_frame
                 else:
                     flow = cv2.calcOpticalFlowFarneback(prev_frame, next_frame, None, pyr_scale, levels, winsize, iterations, poly_n, poly_sigma, flags)
 
@@ -335,7 +337,7 @@ def sparse(
             of_win_size (tuple, optional): Size of the search window at each pyramid level. Defaults to (15, 15).
             of_max_level (int, optional): 0-based maximal pyramid level number. If set to 0, pyramids are not used (single level), if set to 1, two levels are used, and so on. If pyramids are passed to input then the algorithm will use as many levels as pyramids have but no more than `maxLevel`. Defaults to 2.
             of_criteria (tuple, optional): Specifies the termination criteria of the iterative search algorithm (after the specified maximum number of iterations criteria.maxCount or when the search window moves by less than criteria.epsilon). Defaults to (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03).
-            use_gpu (bool, optional): Whether to attempt GPU (CUDA) acceleration using `cv2.cuda.SparsePyrLKOpticalFlow`. Falls back to CPU automatically if CUDA is unavailable or the required OpenCV CUDA modules are not installed. Defaults to True.
+            use_gpu (bool, optional): Whether to attempt GPU (CUDA) acceleration using `cv2.cuda.SparsePyrLKOpticalFlow`. When `True`, falls back to CPU automatically if CUDA is unavailable or the required OpenCV CUDA modules are not installed. When `False`, CPU processing is used unconditionally. Defaults to True.
             target_name (str, optional): Target output name for the video. Defaults to None (which assumes that the input filename with the suffix "_flow_sparse" should be used).
             overwrite (bool, optional): Whether to allow overwriting existing files or to automatically increment target filenames to avoid overwriting. Defaults to False.
 
@@ -419,6 +421,7 @@ def sparse(
 
         if _use_gpu:
             gpu_old_gray = cv2.cuda_GpuMat()
+            gpu_frame_gray = cv2.cuda_GpuMat()
             gpu_old_gray.upload(old_gray)
             gpu_p0 = cv2.cuda_GpuMat()
             gpu_p0.upload(p0)
@@ -435,12 +438,12 @@ def sparse(
 
                 # calculate optical flow
                 if _use_gpu:
-                    gpu_frame_gray = cv2.cuda_GpuMat()
                     gpu_frame_gray.upload(frame_gray)
                     gpu_p1, gpu_st = lk_gpu.calc(gpu_old_gray, gpu_frame_gray, gpu_p0, None, None)
                     p1 = gpu_p1.download()
                     st = gpu_st.download()
-                    gpu_old_gray = gpu_frame_gray
+                    # Swap references to avoid allocating a new GpuMat each frame
+                    gpu_old_gray, gpu_frame_gray = gpu_frame_gray, gpu_old_gray
                 else:
                     p1, st, err = cv2.calcOpticalFlowPyrLK(
                         old_gray, frame_gray, p0, None, **lk_params)
@@ -470,7 +473,6 @@ def sparse(
                 old_gray = frame_gray.copy()
                 p0 = good_new.reshape(-1, 1, 2)
                 if _use_gpu:
-                    gpu_p0 = cv2.cuda_GpuMat()
                     gpu_p0.upload(p0)
 
             else:

From 27a32c53d036efc0f69522a6bb32d8bc39a5fa4b Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 13 Apr 2026 16:05:50 +0000
Subject: [PATCH 4/5] Clarify GPU GpuMat swap comments in flow.py

Agent-Logs-Url: https://github.com/fourMs/MGT-python/sessions/37e34660-f746-453a-b428-74fc3f53285f

Co-authored-by: alexarje <114316+alexarje@users.noreply.github.com>
---
 musicalgestures/_flow.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/musicalgestures/_flow.py b/musicalgestures/_flow.py
index a0733c9..6d33e36 100644
--- a/musicalgestures/_flow.py
+++ b/musicalgestures/_flow.py
@@ -167,8 +167,8 @@ def dense(
                     gpu_next_frame.upload(next_frame)
                     gpu_flow_result = farneback_gpu.calc(gpu_prev_frame, gpu_next_frame, None)
                     flow = gpu_flow_result.download()
-                    # Swap references so gpu_next_frame becomes the next prev without
-                    # allocating a new GpuMat object each frame
+                    # Swap references so gpu_next_frame becomes gpu_prev_frame for the
+                    # next iteration without allocating a new GpuMat object each frame
                     gpu_prev_frame, gpu_next_frame = gpu_next_frame, gpu_prev_frame
                 else:
                     flow = cv2.calcOpticalFlowFarneback(prev_frame, next_frame, None, pyr_scale, levels, winsize, iterations, poly_n, poly_sigma, flags)
@@ -442,7 +442,8 @@ def sparse(
                     gpu_p1, gpu_st = lk_gpu.calc(gpu_old_gray, gpu_frame_gray, gpu_p0, None, None)
                     p1 = gpu_p1.download()
                     st = gpu_st.download()
-                    # Swap references to avoid allocating a new GpuMat each frame
+                    # Swap references so current frame becomes old frame for next
+                    # iteration, avoiding new GpuMat allocation each frame
                     gpu_old_gray, gpu_frame_gray = gpu_frame_gray, gpu_old_gray
                 else:
                     p1, st, err = cv2.calcOpticalFlowPyrLK(

From 87ee4ed6091ab035aae09dd90081d91734df4cd7 Mon Sep 17 00:00:00 2001
From: Alexander Refsum Jensenius <arjensenius@gmail.com>
Date: Mon, 13 Apr 2026 19:23:17 +0200
Subject: [PATCH 5/5] Fix review issues: safe cv2.cuda guard, use_gpu defaults
 to False, move imports to module level, add blur_faces and pose GPU tests

---
 musicalgestures/_blurfaces.py |  4 ++--
 musicalgestures/_flow.py      | 16 +++++++---------
 musicalgestures/_utils.py     |  1 -
 tests/test_flow.py            | 30 ++++++++++++++++++++++++++++++
 4 files changed, 39 insertions(+), 12 deletions(-)

diff --git a/musicalgestures/_blurfaces.py b/musicalgestures/_blurfaces.py
index f9d5b7a..e558e85 100644
--- a/musicalgestures/_blurfaces.py
+++ b/musicalgestures/_blurfaces.py
@@ -80,7 +80,7 @@ def mg_blurfaces(self,
                  save_data=True, 
                  data_format='csv', 
                  color=(0, 0, 0), 
-                 use_gpu=True,
+                 use_gpu=False,
                  target_name=None, 
                  overwrite=False):
     """
@@ -102,7 +102,7 @@ def mg_blurfaces(self,
         save_data (bool, optional): Whether to save the scaled coordinates of the face mask (time (ms), x1, y1, x2, y2) for each frame to a file. Defaults to True.
         data_format (str, optional): Specifies format of blur_faces-data. Accepted values are 'csv', 'tsv' and 'txt'. For multiple output formats, use list, e.g. ['csv', 'txt']. Defaults to 'csv'.
         color (tuple, optional): Customized color of the rectangle boxes. Defaults to black (0, 0, 0).
-        use_gpu (bool, optional): Whether to attempt GPU (CUDA) acceleration for face detection. Falls back to CPU automatically if CUDA is unavailable. Defaults to True.
+        use_gpu (bool, optional): Whether to attempt GPU (CUDA) acceleration for face detection. Falls back to CPU automatically if CUDA is unavailable. Defaults to False.
         target_name (str, optional): Target output name. Defaults to None (which assumes that the input filename with the suffix "_blurred" should be used).
         overwrite (bool, optional): Whether to allow overwriting existing files or to automatically increment target filenames to avoid overwriting. Defaults to False.
 
diff --git a/musicalgestures/_flow.py b/musicalgestures/_flow.py
index 6d33e36..63b0ed4 100644
--- a/musicalgestures/_flow.py
+++ b/musicalgestures/_flow.py
@@ -7,7 +7,7 @@
 from scipy.stats import entropy
 
 import musicalgestures
-from musicalgestures._utils import MgFigure, extract_wav, embed_audio_in_video, MgProgressbar, convert_to_avi, generate_outfilename, ffmpeg_cmd
+from musicalgestures._utils import MgFigure, extract_wav, embed_audio_in_video, MgProgressbar, convert_to_avi, generate_outfilename, ffmpeg_cmd, get_cuda_device_count
 
 
 class Flow:
@@ -47,7 +47,7 @@ def dense(
             angle_of_view=0, 
             scaledown=1,      
             skip_empty=False,
-            use_gpu=True,
+            use_gpu=False,
             target_name=None,
             overwrite=False):
         """
@@ -69,7 +69,7 @@ def dense(
             angle_of_view (int, optional): angle of view of camera, for reporting flow in meters per second. Defaults to 0.
             scaledown (int, optional): factor to scaledown frame size of the video. Defaults to 1.
             skip_empty (bool, optional): If True, repeats previous frame in the output when encounters an empty frame. Defaults to False.
-            use_gpu (bool, optional): Whether to attempt GPU (CUDA) acceleration using `cv2.cuda.FarnebackOpticalFlow`. When `True`, falls back to CPU automatically if CUDA is unavailable or the required OpenCV CUDA modules are not installed. When `False`, CPU processing is used unconditionally. Defaults to True.
+            use_gpu (bool, optional): Whether to attempt GPU (CUDA) acceleration using `cv2.cuda.FarnebackOpticalFlow`. When `True`, falls back to CPU automatically if CUDA is unavailable or the required OpenCV CUDA modules are not installed. When `False`, CPU processing is used unconditionally. Defaults to False.
             target_name (str, optional): Target output name for the video. Defaults to None (which assumes that the input filename with the suffix "_flow_dense" should be used).
             overwrite (bool, optional): Whether to allow overwriting existing files or to automatically increment target filenames to avoid overwriting. Defaults to False.
 
@@ -103,11 +103,10 @@ def dense(
         size = (int(width/scaledown), int(height/scaledown))
 
         # Determine whether to use GPU-accelerated Farneback optical flow
-        from musicalgestures._utils import get_cuda_device_count
         _use_gpu = False
         farneback_gpu = None
         if use_gpu:
-            if not hasattr(cv2.cuda, 'FarnebackOpticalFlow'):
+            if not hasattr(cv2, 'cuda') or not hasattr(cv2.cuda, 'FarnebackOpticalFlow'):
                 print('cv2.cuda.FarnebackOpticalFlow is unavailable (requires opencv-contrib built with CUDA). Switching to CPU for dense optical flow.')
             elif get_cuda_device_count() <= 0:
                 print('OpenCV CUDA backend is unavailable. Switching to CPU for dense optical flow.')
@@ -322,7 +321,7 @@ def sparse(
             of_max_level=2,
             of_criteria=(cv2.TERM_CRITERIA_EPS |
                          cv2.TERM_CRITERIA_COUNT, 10, 0.03),
-            use_gpu=True,
+            use_gpu=False,
             target_name=None,
             overwrite=False):
         """
@@ -337,7 +336,7 @@ def sparse(
             of_win_size (tuple, optional): Size of the search window at each pyramid level. Defaults to (15, 15).
             of_max_level (int, optional): 0-based maximal pyramid level number. If set to 0, pyramids are not used (single level), if set to 1, two levels are used, and so on. If pyramids are passed to input then the algorithm will use as many levels as pyramids have but no more than `maxLevel`. Defaults to 2.
             of_criteria (tuple, optional): Specifies the termination criteria of the iterative search algorithm (after the specified maximum number of iterations criteria.maxCount or when the search window moves by less than criteria.epsilon). Defaults to (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03).
-            use_gpu (bool, optional): Whether to attempt GPU (CUDA) acceleration using `cv2.cuda.SparsePyrLKOpticalFlow`. When `True`, falls back to CPU automatically if CUDA is unavailable or the required OpenCV CUDA modules are not installed. When `False`, CPU processing is used unconditionally. Defaults to True.
+            use_gpu (bool, optional): Whether to attempt GPU (CUDA) acceleration using `cv2.cuda.SparsePyrLKOpticalFlow`. When `True`, falls back to CPU automatically if CUDA is unavailable or the required OpenCV CUDA modules are not installed. When `False`, CPU processing is used unconditionally. Defaults to False.
             target_name (str, optional): Target output name for the video. Defaults to None (which assumes that the input filename with the suffix "_flow_sparse" should be used).
             overwrite (bool, optional): Whether to allow overwriting existing files or to automatically increment target filenames to avoid overwriting. Defaults to False.
 
@@ -370,11 +369,10 @@ def sparse(
         length = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
 
         # Determine whether to use GPU-accelerated sparse optical flow
-        from musicalgestures._utils import get_cuda_device_count
         _use_gpu = False
         lk_gpu = None
         if use_gpu:
-            if not hasattr(cv2.cuda, 'SparsePyrLKOpticalFlow'):
+            if not hasattr(cv2, 'cuda') or not hasattr(cv2.cuda, 'SparsePyrLKOpticalFlow'):
                 print('cv2.cuda.SparsePyrLKOpticalFlow is unavailable (requires opencv-contrib built with CUDA). Switching to CPU for sparse optical flow.')
             elif get_cuda_device_count() <= 0:
                 print('OpenCV CUDA backend is unavailable. Switching to CPU for sparse optical flow.')
diff --git a/musicalgestures/_utils.py b/musicalgestures/_utils.py
index f47ed2d..18bd360 100644
--- a/musicalgestures/_utils.py
+++ b/musicalgestures/_utils.py
@@ -1624,7 +1624,6 @@ def get_cuda_device_count():
         int: Number of available CUDA devices, or 0 if the OpenCV CUDA
              module is unavailable or no devices are detected.
     """
-    import cv2
     try:
         return cv2.cuda.getCudaEnabledDeviceCount()
     except Exception:
diff --git a/tests/test_flow.py b/tests/test_flow.py
index 5e39337..76a74e8 100644
--- a/tests/test_flow.py
+++ b/tests/test_flow.py
@@ -113,3 +113,33 @@ def test_returns_int(self):
         result = musicalgestures.get_cuda_device_count()
         assert isinstance(result, int)
         assert result >= 0
+
+
+class Test_blur_faces_gpu:
+    def test_use_gpu_false(self, testvideo_avi):
+        mg = musicalgestures.MgVideo(testvideo_avi)
+        result = mg.blur_faces(use_gpu=False, overwrite=True)
+        assert type(result) == musicalgestures.MgVideo
+        assert os.path.isfile(result.filename) == True
+
+    def test_use_gpu_true(self, testvideo_avi):
+        # use_gpu=True should work (falls back to CPU when CUDA is unavailable)
+        mg = musicalgestures.MgVideo(testvideo_avi)
+        result = mg.blur_faces(use_gpu=True, overwrite=True)
+        assert type(result) == musicalgestures.MgVideo
+        assert os.path.isfile(result.filename) == True
+
+
+class Test_pose_gpu:
+    def test_device_cpu(self, testvideo_avi):
+        mg = musicalgestures.MgVideo(testvideo_avi)
+        result = mg.pose(device='cpu', overwrite=True)
+        assert type(result) == musicalgestures.MgVideo
+        assert os.path.isfile(result.filename) == True
+
+    def test_device_gpu_fallback(self, testvideo_avi):
+        # device='gpu' should fall back to CPU when CUDA is unavailable
+        mg = musicalgestures.MgVideo(testvideo_avi)
+        result = mg.pose(device='gpu', overwrite=True)
+        assert type(result) == musicalgestures.MgVideo
+        assert os.path.isfile(result.filename) == True