diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst index ddb65ea94..549a803e7 100644 --- a/docs/source/whats_new.rst +++ b/docs/source/whats_new.rst @@ -21,6 +21,7 @@ Enhancements ~~~~~~~~~~~~ - Introduce a new logo for the MOABB library (:gh:`858` by `Pierre Guetschel`_ and community) - Better verbosity control for initialization of the library (:gh:`850` by `Bruno Aristimunha`_) +- Improved error messages for dataset compatibility checks in evaluations - now provides specific reasons when datasets are incompatible (e.g., "dataset has only 1 session(s), but CrossSessionEvaluation requires at least 2 sessions") by `Bruno Aristimunha`_ - Ability to join rows from the tables of MOABB predictive performance scores and detailed CodeCarbon compute profiling metrics by the column `codecarbon_task_name` in MOABB results and the column `task_name` in CodeCarbon results (:gh:`866` by `Ethan Davis`_). - Adding two c-VEP datasets: :class:`moabb.datasets.MartinezCagigal2023Checker` and :class:`moabb.datasets.MartinezCagigal2023Pary` by `Victor Martinez-Cagigal`_ - Allow custom paradigms to have multiple scores for evaluations (:gh:`948` by `Ethan Davis`_) diff --git a/moabb/evaluations/base.py b/moabb/evaluations/base.py index 2ade65574..04ad0e1c1 100644 --- a/moabb/evaluations/base.py +++ b/moabb/evaluations/base.py @@ -168,8 +168,11 @@ def __init__( ) rm.append(dataset) elif not valid_for_eval: + # Get specific reason for incompatibility + eval_type = self.__class__.__name__ + reason = self._get_incompatibility_reason(dataset) log.warning( - f"{dataset} not compatible with evaluation. " + f"{dataset} not compatible with {eval_type}: {reason}. " "Removing this dataset from the list." ) rm.append(dataset) @@ -324,6 +327,25 @@ def is_valid(self, dataset): The dataset to verify. """ + def _get_incompatibility_reason(self, dataset): + """Get a human-readable reason why dataset is incompatible. + + This method should be overridden by subclasses to provide + specific incompatibility reasons. + + Parameters + ---------- + dataset : dataset instance + The dataset to check. + + Returns + ------- + str + A human-readable reason for incompatibility. + + """ + return "requirements not met" + def _grid_search(self, param_grid, name, grid_clf, inner_cv): extra_params = {} if param_grid is not None: diff --git a/moabb/evaluations/evaluations.py b/moabb/evaluations/evaluations.py index 9f7c53188..8d667f8d6 100644 --- a/moabb/evaluations/evaluations.py +++ b/moabb/evaluations/evaluations.py @@ -525,7 +525,10 @@ def evaluate( self, dataset, pipelines, param_grid, process_pipeline, postprocess_pipeline=None ): if not self.is_valid(dataset): - raise AssertionError("Dataset is not appropriate for evaluation") + reason = self._get_incompatibility_reason(dataset) + raise AssertionError( + f"Dataset '{dataset.code}' is not appropriate for {self.__class__.__name__}: {reason}" + ) # Progressbar at subject level for subject in tqdm(dataset.subject_list, desc=f"{dataset.code}-CrossSession"): # check if we already have result for this subject/pipeline @@ -638,6 +641,16 @@ def evaluate( def is_valid(self, dataset): return dataset.n_sessions > 1 + def _get_incompatibility_reason(self, dataset): + """Get specific reason for dataset incompatibility.""" + n_sessions = dataset.n_sessions + if n_sessions <= 1: + return ( + f"dataset has only {n_sessions} session(s), " + f"but {self.__class__.__name__} requires at least 2 sessions" + ) + return "requirements not met" + class CrossSubjectEvaluation(BaseEvaluation): """Cross-subject evaluation performance. @@ -692,7 +705,10 @@ def evaluate( self, dataset, pipelines, param_grid, process_pipeline, postprocess_pipeline=None ): if not self.is_valid(dataset): - raise AssertionError("Dataset is not appropriate for evaluation") + reason = self._get_incompatibility_reason(dataset) + raise AssertionError( + f"Dataset '{dataset.code}' is not appropriate for {self.__class__.__name__}: {reason}" + ) # this is a bit awkward, but we need to check if at least one pipe # have to be run before loading the data. If at least one pipeline # need to be run, we have to load all the data. @@ -828,3 +844,13 @@ def evaluate( def is_valid(self, dataset): return len(dataset.subject_list) > 1 + + def _get_incompatibility_reason(self, dataset): + """Get specific reason for dataset incompatibility.""" + n_subjects = len(dataset.subject_list) + if n_subjects <= 1: + return ( + f"dataset has only {n_subjects} subject(s), " + f"but {self.__class__.__name__} requires at least 2 subjects" + ) + return "requirements not met" diff --git a/moabb/evaluations/splitters.py b/moabb/evaluations/splitters.py index 2f35bb63d..760594a36 100644 --- a/moabb/evaluations/splitters.py +++ b/moabb/evaluations/splitters.py @@ -359,7 +359,7 @@ def split(self, y, metadata): if len(sessions) <= 1: log.info( - f"Skipping subject {subject}: Only one session available" + f"Skipping subject {subject}: Only one session available. " f"Cross-session evaluation requires at least two sessions." ) continue # Skip subjects with only one session diff --git a/moabb/tests/test_evaluations.py b/moabb/tests/test_evaluations.py index 6beb73edb..1901a36dc 100644 --- a/moabb/tests/test_evaluations.py +++ b/moabb/tests/test_evaluations.py @@ -382,6 +382,17 @@ def test_compatible_dataset(self): ds = FakeDataset(["left_hand", "right_hand"], n_sessions=2) assert self.eval.is_valid(dataset=ds) + def test_incompatibility_error_message(self): + """Test that incompatibility error message is clear and informative.""" + ds = FakeDataset(["left_hand", "right_hand"], n_sessions=1) + # Test that the error message includes the dataset code and reason + with pytest.raises(AssertionError) as exc_info: + list(self.eval.evaluate(ds, pipelines, None, None)) + error_msg = str(exc_info.value) + assert "CrossSessionEvaluation" in error_msg + assert "1 session" in error_msg + assert "requires at least 2 sessions" in error_msg + class UtilEvaluation: def test_save_model_cv(self): diff --git a/moabb/tests/test_verbose.py b/moabb/tests/test_verbose.py index aae29cbaf..2d7151f01 100644 --- a/moabb/tests/test_verbose.py +++ b/moabb/tests/test_verbose.py @@ -19,8 +19,9 @@ def test_verbose_warning(caplog): with caplog.at_level(logging.WARNING): CrossSessionEvaluation(paradigm=paradigm, datasets=[dataset]) - # Check if warning was logged - assert "not compatible with evaluation" in caplog.text + # Check if warning was logged with specific incompatibility reason + assert "not compatible with CrossSessionEvaluation" in caplog.text + assert "requires at least 2 sessions" in caplog.text def test_verbose_error_suppression(caplog): @@ -35,7 +36,7 @@ def test_verbose_error_suppression(caplog): CrossSessionEvaluation(paradigm=paradigm, datasets=[dataset], verbose="ERROR") # Check if warning was suppressed - assert "not compatible with evaluation" not in caplog.text + assert "not compatible with CrossSessionEvaluation" not in caplog.text def test_verbose_false_warning(caplog): @@ -50,5 +51,6 @@ def test_verbose_false_warning(caplog): ): # Set to INFO to see if behavior is consistent CrossSessionEvaluation(paradigm=paradigm, datasets=[dataset], verbose=False) - # Check if warning was logged (since verbose=False -> WARNING) - assert "not compatible with evaluation" in caplog.text + # Check if warning was logged (since verbose=False -> WARNING) with new specific message + assert "not compatible with CrossSessionEvaluation" in caplog.text + assert "requires at least 2 sessions" in caplog.text