diff --git a/docs/musicalgestures/_pose.md b/docs/musicalgestures/_pose.md index 61d9b73..200dcc0 100644 --- a/docs/musicalgestures/_pose.md +++ b/docs/musicalgestures/_pose.md @@ -18,7 +18,7 @@ Helper function to automatically download model (.caffemodel) files. ## pose -[[find in source code]](https://github.com/fourMs/MGT-python/blob/master/musicalgestures/_pose.py#L14) +[[find in source code]](https://github.com/fourMs/MGT-python/blob/master/musicalgestures/_pose.py#L30) ```python def pose( @@ -37,15 +37,20 @@ def pose( ``` Renders a video with the pose estimation (aka. "keypoint detection" or "skeleton tracking") overlaid on it. -Outputs the predictions in a text file containing the normalized x and y coordinates of each keypoints -(default format is csv). Uses models from the [openpose](https://github.com/CMU-Perceptual-Computing-Lab/openpose) project. +Outputs the predictions in a text file containing the normalized x and y coordinates of each keypoint +(default format is csv). + +Supports two backends: + +- **MediaPipe** (`model='mediapipe'`): Uses Google's MediaPipe Pose which detects 33 landmarks entirely on CPU. Requires the optional `mediapipe` package (`pip install musicalgestures[pose]`). The model file (~8–28 MB) is auto-downloaded on first use and cached in `musicalgestures/models/`. +- **OpenPose** (`model='body_25'`, `'coco'`, or `'mpi'`): Uses Caffe-based OpenPose models. Model weights (~200 MB) are downloaded on first use. #### Arguments -- `model` *str, optional* - 'body_25' loads the model trained on the BODY_25 dataset, 'mpi' loads the model trained on the Multi-Person Dataset (MPII), 'coco' loads one trained on the COCO dataset. The BODY_25 model outputs 25 points, the MPII model outputs 15 points, while the COCO model produces 18 points. Defaults to 'body_25'. -- `device` *str, optional* - Sets the backend to use for the neural network ('cpu' or 'gpu'). Defaults to 'gpu'. +- `model` *str, optional* - Pose model to use. `'mediapipe'` uses MediaPipe Pose (33 landmarks, model auto-downloaded on first use). `'body_25'` loads the OpenPose BODY_25 model (25 keypoints), `'mpi'` loads the MPII model (15 keypoints), `'coco'` loads the COCO model (18 keypoints). Defaults to 'body_25'. +- `device` *str, optional* - Sets the backend to use for the neural network ('cpu' or 'gpu'). Ignored when `model='mediapipe'` (MediaPipe always runs on CPU). Defaults to 'gpu'. - `threshold` *float, optional* - The normalized confidence threshold that decides whether we keep or discard a predicted point. Discarded points get substituted with (0, 0) in the output data. Defaults to 0.1. -- `downsampling_factor` *int, optional* - Decides how much we downsample the video before we pass it to the neural network. For example `downsampling_factor=4` means that the input to the network is one-fourth the resolution of the source video. Heaviver downsampling reduces rendering time but produces lower quality pose estimation. Defaults to 2. +- `downsampling_factor` *int, optional* - Decides how much we downsample the video before we pass it to the neural network. Ignored when `model='mediapipe'`. Defaults to 2. - `save_data` *bool, optional* - Whether we save the predicted pose data to a file. Defaults to True. - `data_format` *str, optional* - Specifies format of pose-data. Accepted values are 'csv', 'tsv' and 'txt'. For multiple output formats, use list, eg. ['csv', 'txt']. Defaults to 'csv'. - `save_video` *bool, optional* - Whether we save the video with the estimated pose overlaid on it. Defaults to True. diff --git a/docs/musicalgestures/_pose_estimator.md b/docs/musicalgestures/_pose_estimator.md index 2ac7bb8..50b6b37 100644 --- a/docs/musicalgestures/_pose_estimator.md +++ b/docs/musicalgestures/_pose_estimator.md @@ -26,7 +26,7 @@ This module provides: * class `PoseEstimator` – an abstract base class (ABC) defining the common interface that all pose backends must implement. * class `MediaPipePoseEstimator` – a concrete backend powered by Google - MediaPipe Pose (33 landmarks, CPU-friendly, zero model download). + MediaPipe Pose (33 landmarks, CPU-friendly, auto-downloads model on first use). * class `OpenPosePoseEstimator` – a thin wrapper around the legacy OpenPose / Caffe-model implementation already present in :mod:[Pose](_pose.md#pose). @@ -56,30 +56,30 @@ class MediaPipePoseEstimator(PoseEstimator): model_complexity: int = 1, min_detection_confidence: float = 0.5, min_tracking_confidence: float = 0.5, - static_image_mode: bool = False, ) -> None: ``` -Pose estimator backed by Google MediaPipe Pose. +Pose estimator backed by Google MediaPipe Pose (Tasks API). -Requires the optional ``mediapipe`` package +Requires the optional ``mediapipe>=0.10`` package ```python pip install musicalgestures[pose] ``` +The first time you use a given complexity level the corresponding +`.task` model file (~8–28 MB) is downloaded from Google's model +storage and cached in `musicalgestures/models/`. + Parameters ---------- model_complexity: - MediaPipe model complexity (0, 1, or 2). Higher = more accurate - but slower. Default: 1. + MediaPipe model complexity (0 = lite, 1 = full, 2 = heavy). + Higher values are more accurate but slower. Default: 1. min_detection_confidence: Minimum confidence for initial body detection. Default: 0.5. min_tracking_confidence: Minimum confidence for landmark tracking. Default: 0.5. -static_image_mode: - If *True*, treat every frame as a static image (no tracking). - Default: False. Examples -------- diff --git a/musicalgestures/_pose.py b/musicalgestures/_pose.py index fd60267..30daea0 100644 --- a/musicalgestures/_pose.py +++ b/musicalgestures/_pose.py @@ -10,6 +10,22 @@ # implementation mainly inspired by: https://github.com/spmallick/learnopencv/blob/master/OpenPose/OpenPoseVideo.py +# MediaPipe Pose skeleton connections (pairs of landmark indices) +MEDIAPIPE_POSE_CONNECTIONS = [ + (0, 1), (1, 2), (2, 3), (3, 7), + (0, 4), (4, 5), (5, 6), (6, 8), + (9, 10), + (11, 12), + (11, 13), (13, 15), + (12, 14), (14, 16), + (15, 17), (15, 19), (15, 21), (17, 19), + (16, 18), (16, 20), (16, 22), (18, 20), + (11, 23), (12, 24), + (23, 24), + (23, 25), (25, 27), (27, 29), (27, 31), (29, 31), + (24, 26), (26, 28), (28, 30), (28, 32), (30, 32), +] + def pose( self, @@ -24,26 +40,61 @@ def pose( target_name_data=None, overwrite=False): """ - Renders a video with the pose estimation (aka. "keypoint detection" or "skeleton tracking") overlaid on it. - Outputs the predictions in a text file containing the normalized x and y coordinates of each keypoints - (default format is csv). Uses models from the [openpose](https://github.com/CMU-Perceptual-Computing-Lab/openpose) project. + Renders a video with the pose estimation (aka. "keypoint detection" or "skeleton tracking") overlaid on it. + Outputs the predictions in a text file containing the normalized x and y coordinates of each keypoint + (default format is csv). + + Supports two backends: + + * **MediaPipe** (``model='mediapipe'``): Uses Google's MediaPipe Pose which detects 33 + landmarks entirely on CPU. Requires the optional ``mediapipe`` package + (``pip install musicalgestures[pose]``). On first use, the model file + (~8–28 MB) is downloaded automatically and cached in ``musicalgestures/models/``. + * **OpenPose** (``model='body_25'``, ``'coco'``, or ``'mpi'``): Uses Caffe-based OpenPose + models. Model weights (~200 MB) are downloaded on first use. Args: - model (str, optional): 'body_25' loads the model trained on the BODY_25 dataset, 'mpi' loads the model trained on the Multi-Person Dataset (MPII), 'coco' loads one trained on the COCO dataset. The BODY_25 model outputs 25 points, the MPII model outputs 15 points, while the COCO model produces 18 points. Defaults to 'body_25'. - device (str, optional): Sets the backend to use for the neural network ('cpu' or 'gpu'). Defaults to 'gpu'. - threshold (float, optional): The normalized confidence threshold that decides whether we keep or discard a predicted point. Discarded points get substituted with (0, 0) in the output data. Defaults to 0.1. - downsampling_factor (int, optional): Decides how much we downsample the video before we pass it to the neural network. For example `downsampling_factor=4` means that the input to the network is one-fourth the resolution of the source video. Heaviver downsampling reduces rendering time but produces lower quality pose estimation. Defaults to 2. + model (str, optional): Pose model to use. ``'mediapipe'`` uses MediaPipe Pose (33 + landmarks, model auto-downloaded on first use). ``'body_25'`` loads the OpenPose BODY_25 model + (25 keypoints), ``'mpi'`` loads the MPII model (15 keypoints), ``'coco'`` loads + the COCO model (18 keypoints). Defaults to 'body_25'. + device (str, optional): Sets the backend to use for the neural network ('cpu' or 'gpu'). + Ignored when ``model='mediapipe'`` (MediaPipe always runs on CPU). Defaults to 'gpu'. + threshold (float, optional): The normalized confidence threshold that decides whether we + keep or discard a predicted point. Discarded points get substituted with (0, 0) in the + output data. Defaults to 0.1. + downsampling_factor (int, optional): Decides how much we downsample the video before we + pass it to the neural network. Ignored when ``model='mediapipe'``. Defaults to 2. save_data (bool, optional): Whether we save the predicted pose data to a file. Defaults to True. - data_format (str, optional): Specifies format of pose-data. Accepted values are 'csv', 'tsv' and 'txt'. For multiple output formats, use list, eg. ['csv', 'txt']. Defaults to 'csv'. - save_video (bool, optional): Whether we save the video with the estimated pose overlaid on it. Defaults to True. - target_name_video (str, optional): Target output name for the video. Defaults to None (which assumes that the input filename with the suffix "_pose" should be used). - target_name_data (str, optional): Target output name for the data. Defaults to None (which assumes that the input filename with the suffix "_pose" should be used). - overwrite (bool, optional): Whether to allow overwriting existing files or to automatically increment target filenames to avoid overwriting. Defaults to False. + data_format (str, optional): Specifies format of pose-data. Accepted values are 'csv', 'tsv' + and 'txt'. For multiple output formats, use list, eg. ['csv', 'txt']. Defaults to 'csv'. + save_video (bool, optional): Whether we save the video with the estimated pose overlaid on it. + Defaults to True. + target_name_video (str, optional): Target output name for the video. Defaults to None (which + assumes that the input filename with the suffix "_pose" should be used). + target_name_data (str, optional): Target output name for the data. Defaults to None (which + assumes that the input filename with the suffix "_pose" should be used). + overwrite (bool, optional): Whether to allow overwriting existing files or to automatically + increment target filenames to avoid overwriting. Defaults to False. Returns: MgVideo: An MgVideo pointing to the output video. """ + # --- MediaPipe backend --------------------------------------------------- + if model.lower() == 'mediapipe': + return _pose_mediapipe( + self, + threshold=threshold, + save_data=save_data, + data_format=data_format, + save_video=save_video, + target_name_video=target_name_video, + target_name_data=target_name_data, + overwrite=overwrite, + ) + # ------------------------------------------------------------------------- + module_path = os.path.abspath(os.path.dirname(musicalgestures.__file__)) if model.lower() == 'mpi': @@ -367,6 +418,172 @@ def save_single_file(of, width, height, model, data, data_format, target_name_da return self +def _pose_mediapipe( + self, + threshold=0.1, + save_data=True, + data_format='csv', + save_video=True, + target_name_video=None, + target_name_data=None, + overwrite=False): + """ + Internal helper: run MediaPipe Pose on a video and render/save the output. + Called by :func:`pose` when ``model='mediapipe'``. + """ + from musicalgestures._pose_estimator import MediaPipePoseEstimator, MEDIAPIPE_LANDMARK_NAMES + + of, fex = os.path.splitext(self.filename) + + if fex != '.avi': + if "as_avi" not in self.__dict__.keys(): + file_as_avi = convert_to_avi(of + fex, overwrite=overwrite) + self.as_avi = musicalgestures.MgVideo(file_as_avi) + of, fex = self.as_avi.of, self.as_avi.fex + filename = of + fex + else: + filename = self.filename + + pb = MgProgressbar(total=self.length, prefix='Rendering MediaPipe pose estimation:') + + if save_video: + if target_name_video is None: + target_name_video = of + '_pose' + fex + else: + target_name_video = os.path.splitext(target_name_video)[0] + fex + if not overwrite: + target_name_video = generate_outfilename(target_name_video) + + # Pipe video with FFmpeg for reading frame by frame + cmd = ['ffmpeg', '-y', '-i', filename] + process = ffmpeg_cmd(cmd, total_time=self.length, pipe='read') + video_out = None + + ii = 0 + data = [] + + estimator = MediaPipePoseEstimator() + + while True: + out = process.stdout.read(self.width * self.height * 3) + + if out == b'': + pb.progress(self.length) + break + + frame = np.frombuffer(out, dtype=np.uint8).reshape([self.height, self.width, 3]).copy() + + result = estimator.predict_frame(frame) + keypoints = result.keypoints # shape (33, 3): x, y, visibility + + # Collect data row: time + normalised (x, y) for every landmark + if save_data: + time_ms = frame2ms(ii, self.fps) + row = [time_ms] + for i in range(len(MEDIAPIPE_LANDMARK_NAMES)): + x, y, vis = keypoints[i] + if vis >= threshold: + row += [float(x), float(y)] + else: + row += [0.0, 0.0] + data.append(row) + + # Draw skeleton connections + for (a, b) in MEDIAPIPE_POSE_CONNECTIONS: + xa, ya, va = keypoints[a] + xb, yb, vb = keypoints[b] + if va >= threshold and vb >= threshold: + pt_a = (int(xa * self.width), int(ya * self.height)) + pt_b = (int(xb * self.width), int(yb * self.height)) + cv2.line(frame, pt_a, pt_b, (0, 255, 255), 2, lineType=cv2.LINE_AA) + + # Draw landmark circles + for i in range(len(MEDIAPIPE_LANDMARK_NAMES)): + x, y, vis = keypoints[i] + if vis >= threshold: + pt = (int(x * self.width), int(y * self.height)) + cv2.circle(frame, pt, 4, (0, 0, 255), thickness=-1, lineType=cv2.FILLED) + + if save_video: + if video_out is None: + cmd = ['ffmpeg', '-y', '-s', '{}x{}'.format(frame.shape[1], frame.shape[0]), + '-r', str(self.fps), '-f', 'rawvideo', '-pix_fmt', 'bgr24', + '-vcodec', 'rawvideo', '-i', '-', '-vcodec', 'libx264', + '-pix_fmt', 'yuv420p', target_name_video] + video_out = ffmpeg_cmd(cmd, total_time=self.length, pipe='write') + video_out.stdin.write(frame.astype(np.uint8)) + + process.stdout.flush() + pb.progress(ii) + ii += 1 + + estimator.close() + + if save_video: + video_out.stdin.close() + video_out.wait() + if self.has_audio: + source_audio = extract_wav(of + fex) + embed_audio_in_video(source_audio, target_name_video) + os.remove(source_audio) + + process.terminate() + + if save_data: + # Build column headers from landmark names + headers = ['Time'] + for name in MEDIAPIPE_LANDMARK_NAMES: + headers.append(name.replace('_', ' ').title() + ' X') + headers.append(name.replace('_', ' ').title() + ' Y') + _save_pose_txt(of, data, headers, data_format, target_name_data, overwrite) + + if save_video: + self.pose_video = musicalgestures.MgVideo(target_name_video, color=self.color, returned_by_process=True) + return self.pose_video + else: + return self + + +def _save_pose_txt(of, data, headers, data_format, target_name_data, overwrite): + """Save pose data to one or more text files (csv / tsv / txt).""" + + def _save_single(data_format): + ext = '.' + data_format.lower() + if target_name_data is None: + out_path = of + '_pose' + ext + else: + out_path = os.path.splitext(target_name_data)[0] + ext + if not overwrite: + out_path = generate_outfilename(out_path) + + df = pd.DataFrame(data=data, columns=headers) + + if data_format.lower() == 'csv': + df.to_csv(out_path, index=None) + elif data_format.lower() in ('tsv', 'txt'): + delimiter = '\t' if data_format.lower() == 'tsv' else ' ' + with open(out_path, 'wb') as f: + head_str = delimiter.join(headers) + '\n' + f.write(head_str.encode()) + fmt_list = ['%d'] + ['%.15f'] * (len(headers) - 1) + np.savetxt(f, df.values, delimiter=delimiter, fmt=fmt_list) + else: + print(f"Invalid data format: '{data_format}'.\nFalling back to '.csv'.") + _save_single('csv') + + if isinstance(data_format, str): + _save_single(data_format) + elif isinstance(data_format, list): + valid = [f for f in data_format if f.lower() in ('csv', 'tsv', 'txt')] + if len(valid) != len(data_format): + invalid = [f for f in data_format if f.lower() not in ('csv', 'tsv', 'txt')] + print(f"Unsupported formats {invalid}.\nFalling back to '.csv'.") + _save_single('csv') + else: + for fmt in list(set(valid)): + _save_single(fmt) + + def download_model(modeltype): """ Helper function to automatically download model (.caffemodel) files. diff --git a/musicalgestures/_pose_estimator.py b/musicalgestures/_pose_estimator.py index 6db6566..6c949fb 100644 --- a/musicalgestures/_pose_estimator.py +++ b/musicalgestures/_pose_estimator.py @@ -192,24 +192,25 @@ def __repr__(self) -> str: class MediaPipePoseEstimator(PoseEstimator): - """Pose estimator backed by Google MediaPipe Pose. + """Pose estimator backed by Google MediaPipe Pose (Tasks API). - Requires the optional ``mediapipe`` package:: + Requires the optional ``mediapipe>=0.10`` package:: pip install musicalgestures[pose] + The first time you use a given complexity level the corresponding + ``.task`` model file (~8–28 MB) is downloaded from Google's model + storage and cached in ``musicalgestures/models/``. + Parameters ---------- model_complexity: - MediaPipe model complexity (0, 1, or 2). Higher = more accurate - but slower. Default: 1. + MediaPipe model complexity (0 = lite, 1 = full, 2 = heavy). + Higher values are more accurate but slower. Default: 1. min_detection_confidence: Minimum confidence for initial body detection. Default: 0.5. min_tracking_confidence: Minimum confidence for landmark tracking. Default: 0.5. - static_image_mode: - If *True*, treat every frame as a static image (no tracking). - Default: False. Examples -------- @@ -220,22 +221,77 @@ class MediaPipePoseEstimator(PoseEstimator): >>> result.keypoints.shape # (33, 3) # doctest: +SKIP """ + # Model download URLs for each complexity level + _MODEL_URLS: dict[int, str] = { + 0: ( + "https://storage.googleapis.com/mediapipe-models/" + "pose_landmarker/pose_landmarker_lite/float16/1/pose_landmarker_lite.task" + ), + 1: ( + "https://storage.googleapis.com/mediapipe-models/" + "pose_landmarker/pose_landmarker_full/float16/1/pose_landmarker_full.task" + ), + 2: ( + "https://storage.googleapis.com/mediapipe-models/" + "pose_landmarker/pose_landmarker_heavy/float16/1/pose_landmarker_heavy.task" + ), + } + _MODEL_NAMES: dict[int, str] = { + 0: "pose_landmarker_lite.task", + 1: "pose_landmarker_full.task", + 2: "pose_landmarker_heavy.task", + } + def __init__( self, model_complexity: int = 1, min_detection_confidence: float = 0.5, min_tracking_confidence: float = 0.5, - static_image_mode: bool = False, ) -> None: super().__init__(model=PoseModel.MEDIAPIPE, device=PoseDevice.CPU) self.model_complexity = model_complexity self.min_detection_confidence = min_detection_confidence self.min_tracking_confidence = min_tracking_confidence - self.static_image_mode = static_image_mode - self._pose = None # lazy init + self._landmarker = None # lazy init + + def _get_model_path(self) -> Path: + """Return path to the cached model file, downloading if necessary.""" + import musicalgestures as mg + + module_dir = Path(mg.__file__).parent + models_dir = module_dir / "models" + models_dir.mkdir(exist_ok=True) + + complexity = self.model_complexity + if complexity not in self._MODEL_NAMES: + logger.warning( + "model_complexity %d is not valid (0-2); defaulting to 1.", + complexity, + ) + complexity = 1 + + model_path = models_dir / self._MODEL_NAMES[complexity] + if model_path.exists(): + return model_path + + url = self._MODEL_URLS[complexity] + logger.info("Downloading MediaPipe model from %s …", url) + print(f"Downloading MediaPipe pose model ({self._MODEL_NAMES[complexity]}) …") + try: + import urllib.request + + urllib.request.urlretrieve(url, model_path) + logger.info("Model saved to %s", model_path) + except Exception as exc: + raise MgDependencyError( + f"Failed to download MediaPipe pose model from {url}. " + "Please download it manually and place it at: " + f"{model_path}" + ) from exc + return model_path def _ensure_initialized(self) -> None: - if self._pose is not None: + if self._landmarker is not None: return try: import mediapipe as mp @@ -244,13 +300,25 @@ def _ensure_initialized(self) -> None: "mediapipe is required for MediaPipePoseEstimator. " "Install it with: pip install musicalgestures[pose]" ) from exc - self._pose = mp.solutions.pose.Pose( - static_image_mode=self.static_image_mode, - model_complexity=self.model_complexity, - min_detection_confidence=self.min_detection_confidence, + + model_path = self._get_model_path() + + BaseOptions = mp.tasks.BaseOptions + PoseLandmarker = mp.tasks.vision.PoseLandmarker + PoseLandmarkerOptions = mp.tasks.vision.PoseLandmarkerOptions + VisionRunningMode = mp.tasks.vision.RunningMode + + options = PoseLandmarkerOptions( + base_options=BaseOptions(model_asset_path=str(model_path)), + running_mode=VisionRunningMode.IMAGE, + min_pose_detection_confidence=self.min_detection_confidence, min_tracking_confidence=self.min_tracking_confidence, ) - logger.debug("MediaPipe Pose initialised (complexity=%d)", self.model_complexity) + self._landmarker = PoseLandmarker.create_from_options(options) + logger.debug( + "MediaPipe PoseLandmarker initialised (complexity=%d)", + self.model_complexity, + ) @property def landmark_names(self) -> list[str]: @@ -271,15 +339,17 @@ def predict_frame(self, frame: np.ndarray) -> PoseEstimatorResult: """ self._ensure_initialized() import cv2 + import mediapipe as mp rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) - results = self._pose.process(rgb) + mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb) + detection_result = self._landmarker.detect(mp_image) n = len(MEDIAPIPE_LANDMARK_NAMES) keypoints = np.zeros((n, 3), dtype=float) - if results.pose_landmarks: - for i, lm in enumerate(results.pose_landmarks.landmark): + if detection_result.pose_landmarks: + for i, lm in enumerate(detection_result.pose_landmarks[0]): keypoints[i] = [lm.x, lm.y, lm.visibility] return PoseEstimatorResult( @@ -289,9 +359,9 @@ def predict_frame(self, frame: np.ndarray) -> PoseEstimatorResult: def close(self) -> None: """Release MediaPipe resources.""" - if self._pose is not None: - self._pose.close() - self._pose = None + if self._landmarker is not None: + self._landmarker.close() + self._landmarker = None def __del__(self) -> None: try: diff --git a/tests/test_new_features.py b/tests/test_new_features.py index 2924929..6d5153b 100644 --- a/tests/test_new_features.py +++ b/tests/test_new_features.py @@ -360,7 +360,7 @@ def load_module(self, name): from musicalgestures._pose_estimator import MediaPipePoseEstimator from musicalgestures._exceptions import MgDependencyError est = MediaPipePoseEstimator() - est._pose = None # ensure not initialized + est._landmarker = None # ensure not initialized with pytest.raises(MgDependencyError): est._ensure_initialized() finally: @@ -369,6 +369,126 @@ def load_module(self, name): sys.modules.update(mp_modules) +# --------------------------------------------------------------------------- +# Phase 3b – MediaPipe pose integration (_pose.py) +# --------------------------------------------------------------------------- + +class TestMediaPipePoseIntegration: + """Tests for the MediaPipe path in musicalgestures._pose.""" + + def test_pose_connections_defined(self): + """MEDIAPIPE_POSE_CONNECTIONS should be a non-empty list of 2-tuples.""" + from musicalgestures._pose import MEDIAPIPE_POSE_CONNECTIONS + assert isinstance(MEDIAPIPE_POSE_CONNECTIONS, list) + assert len(MEDIAPIPE_POSE_CONNECTIONS) > 0 + for a, b in MEDIAPIPE_POSE_CONNECTIONS: + assert isinstance(a, int) + assert isinstance(b, int) + assert 0 <= a <= 32 + assert 0 <= b <= 32 + + def test_save_pose_txt_csv(self, tmp_path): + """_save_pose_txt should write a valid CSV file.""" + from musicalgestures._pose import _save_pose_txt + import pandas as pd + + headers = ['Time', 'Nose X', 'Nose Y', 'Left Eye X', 'Left Eye Y'] + data = [[0, 0.5, 0.3, 0.45, 0.28], [33, 0.51, 0.31, 0.46, 0.29]] + of = str(tmp_path / 'test_video') + _save_pose_txt(of, data, headers, 'csv', None, overwrite=True) + csv_path = tmp_path / 'test_video_pose.csv' + assert csv_path.exists() + df = pd.read_csv(csv_path) + assert list(df.columns) == headers + assert len(df) == 2 + + def test_save_pose_txt_tsv(self, tmp_path): + """_save_pose_txt should write a valid TSV file.""" + from musicalgestures._pose import _save_pose_txt + + headers = ['Time', 'Nose X', 'Nose Y'] + data = [[0, 0.5, 0.3]] + of = str(tmp_path / 'test_video') + _save_pose_txt(of, data, headers, 'tsv', None, overwrite=True) + tsv_path = tmp_path / 'test_video_pose.tsv' + assert tsv_path.exists() + + def test_save_pose_txt_txt(self, tmp_path): + """_save_pose_txt should write a valid TXT file.""" + from musicalgestures._pose import _save_pose_txt + + headers = ['Time', 'Nose X', 'Nose Y'] + data = [[0, 0.5, 0.3]] + of = str(tmp_path / 'test_video') + _save_pose_txt(of, data, headers, 'txt', None, overwrite=True) + txt_path = tmp_path / 'test_video_pose.txt' + assert txt_path.exists() + + def test_save_pose_txt_multiple_formats(self, tmp_path): + """_save_pose_txt with a list of formats should write all files.""" + from musicalgestures._pose import _save_pose_txt + + headers = ['Time', 'Nose X', 'Nose Y'] + data = [[0, 0.5, 0.3]] + of = str(tmp_path / 'test_video') + _save_pose_txt(of, data, headers, ['csv', 'tsv'], None, overwrite=True) + assert (tmp_path / 'test_video_pose.csv').exists() + assert (tmp_path / 'test_video_pose.tsv').exists() + + def test_save_pose_txt_custom_name(self, tmp_path): + """_save_pose_txt respects a custom target_name_data.""" + from musicalgestures._pose import _save_pose_txt + + headers = ['Time', 'Nose X', 'Nose Y'] + data = [[0, 0.5, 0.3]] + of = str(tmp_path / 'test_video') + target = str(tmp_path / 'my_custom_output.csv') + _save_pose_txt(of, data, headers, 'csv', target, overwrite=True) + assert (tmp_path / 'my_custom_output.csv').exists() + + def test_mediapipe_estimator_model_names(self): + """MediaPipePoseEstimator._MODEL_NAMES should map 0-2 to .task filenames.""" + from musicalgestures._pose_estimator import MediaPipePoseEstimator + est = MediaPipePoseEstimator() + assert est._MODEL_NAMES[0].endswith('.task') + assert est._MODEL_NAMES[1].endswith('.task') + assert est._MODEL_NAMES[2].endswith('.task') + + def test_mediapipe_estimator_model_urls(self): + """MediaPipePoseEstimator._MODEL_URLS should map 0-2 to HTTPS URLs.""" + from musicalgestures._pose_estimator import MediaPipePoseEstimator + est = MediaPipePoseEstimator() + for complexity in (0, 1, 2): + url = est._MODEL_URLS[complexity] + assert url.startswith('https://') + assert '.task' in url + + def test_mediapipe_estimator_close_noop(self): + """Calling close() on an uninitialised estimator should be a no-op.""" + from musicalgestures._pose_estimator import MediaPipePoseEstimator + est = MediaPipePoseEstimator() + est.close() # should not raise + + def test_pose_function_routes_mediapipe(self, monkeypatch): + """pose() with model='mediapipe' should call _pose_mediapipe, not OpenPose.""" + import musicalgestures._pose as pose_module + called_with = {} + + def fake_pose_mediapipe(self, **kwargs): + called_with.update(kwargs) + return "sentinel" + + monkeypatch.setattr(pose_module, "_pose_mediapipe", fake_pose_mediapipe) + + class FakeMgVideo: + filename = "dummy.avi" + color = True + + result = pose_module.pose(FakeMgVideo(), model="mediapipe", save_video=False) + assert result == "sentinel" + assert called_with.get("save_video") is False + + # --------------------------------------------------------------------------- # Phase 4 – MgPipeline # ---------------------------------------------------------------------------