From 419d0cb425b72b36cb7477d2e6453f72eb8246ba Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Thu, 19 Mar 2026 09:02:34 +0100 Subject: [PATCH 1/7] Do not raise an exception if a facet is missing --- esmvalcore/io/local.py | 50 ++++++++++++++++++++---- tests/unit/io/local/test_replace_tags.py | 24 +++++++++--- 2 files changed, 61 insertions(+), 13 deletions(-) diff --git a/esmvalcore/io/local.py b/esmvalcore/io/local.py index aad0d7dccf..8df2096ea1 100644 --- a/esmvalcore/io/local.py +++ b/esmvalcore/io/local.py @@ -58,11 +58,10 @@ from netCDF4 import Dataset import esmvalcore.io.protocol -from esmvalcore.exceptions import RecipeError from esmvalcore.iris_helpers import ignore_warnings_context if TYPE_CHECKING: - from collections.abc import Iterable + from collections.abc import Collection, Iterable from netCDF4 import Variable @@ -421,6 +420,30 @@ def _select_files( return selection +class MissingFacetError(KeyError): + """Error raised when a facet required for filling the template is missing.""" + + +def format_collection(collection: Collection[Any]) -> str: + """Format a collection of items as a string for use in messages. + + Parameters + ---------- + collection: + The collection of items to format. + + Returns + ------- + : + The formatted string. + """ + items = [f"'{item}'" for item in sorted(collection)] + if len(items) > 1: + items[-1] = f"and {items[-1]}" + txt = ", ".join(items) + return f"s {txt}" if len(collection) > 1 else f" {txt}" + + def _replace_tags( paths: str | list[str], variable: Facets, @@ -446,6 +469,7 @@ def _replace_tags( tlist.add("sub_experiment") pathset = new_paths + failed = set() for original_tag in tlist: tag, _, _ = _get_caps_options(original_tag) @@ -454,12 +478,17 @@ def _replace_tags( elif tag == "version": replacewith = "*" else: - msg = ( - f"Dataset key '{tag}' must be specified for {variable}, check " - f"your recipe entry and/or extra facet file(s)" - ) - raise RecipeError(msg) + failed.add(tag) + continue pathset = _replace_tag(pathset, original_tag, replacewith) + if failed: + msg = ( + f"Unable to complete path{format_collection(pathset)} because " + f"the facet{format_collection(failed)}" + + (" has" if len(failed) == 1 else " have") + + " not been specified." + ) + raise MissingFacetError(msg) return [Path(p) for p in pathset] @@ -566,7 +595,12 @@ def find_data(self, **facets: FacetValue) -> list[LocalFile]: if "original_short_name" in facets: facets["short_name"] = facets["original_short_name"] - globs = self._get_glob_patterns(**facets) + try: + globs = self._get_glob_patterns(**facets) + except MissingFacetError as exc: + self.debug_info = str(exc) + logger.debug(self.debug_info) + return [] self.debug_info = "No files found matching glob pattern " + "\n".join( str(g) for g in globs ) diff --git a/tests/unit/io/local/test_replace_tags.py b/tests/unit/io/local/test_replace_tags.py index 8adb8a83dc..2f1d3cf11f 100644 --- a/tests/unit/io/local/test_replace_tags.py +++ b/tests/unit/io/local/test_replace_tags.py @@ -1,11 +1,11 @@ """Tests for `_replace_tags` in `esmvalcore.io.local`.""" +import re from pathlib import Path import pytest -from esmvalcore.exceptions import RecipeError -from esmvalcore.io.local import _replace_tags +from esmvalcore.io.local import MissingFacetError, _replace_tags VARIABLE = { "project": "CMIP6", @@ -58,13 +58,27 @@ def test_replace_tags_with_caps(): def test_replace_tags_missing_facet(): - """Check that a RecipeError is raised if a required facet is missing.""" + """Check that a MissingFacetError is raised if a required facet is missing.""" paths = ["{short_name}_{missing}_*.nc"] variable = {"short_name": "tas"} - with pytest.raises(RecipeError) as exc: + expected_message = ( + "Unable to complete path 'tas_{missing}_*.nc' because the facet " + "'missing' has not been specified." + ) + with pytest.raises(MissingFacetError, match=re.escape(expected_message)): _replace_tags(paths, variable) - assert "Dataset key 'missing' must be specified" in exc.value.message + +def test_replace_tags_missing_facets(): + """Check that a MissingFacetError is raised if multiple facets are missing.""" + paths = ["{short_name}_{missing}_{other}_*.nc"] + variable = {"short_name": "tas"} + expected_message = ( + "Unable to complete path 'tas_{missing}_{other}_*.nc' because the facets " + "'missing', and 'other' have not been specified." + ) + with pytest.raises(MissingFacetError, match=re.escape(expected_message)): + _replace_tags(paths, variable) def test_replace_tags_list_of_str(): From 3b30e7b9779ffb7f35309033714f4bfa906c50b5 Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Thu, 19 Mar 2026 09:25:14 +0100 Subject: [PATCH 2/7] Improve error message and add another test --- esmvalcore/io/local.py | 3 +-- tests/integration/io/test_local.py | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/esmvalcore/io/local.py b/esmvalcore/io/local.py index 8df2096ea1..384070c336 100644 --- a/esmvalcore/io/local.py +++ b/esmvalcore/io/local.py @@ -598,8 +598,7 @@ def find_data(self, **facets: FacetValue) -> list[LocalFile]: try: globs = self._get_glob_patterns(**facets) except MissingFacetError as exc: - self.debug_info = str(exc) - logger.debug(self.debug_info) + self.debug_info = exc.args[0] return [] self.debug_info = "No files found matching glob pattern " + "\n".join( str(g) for g in globs diff --git a/tests/integration/io/test_local.py b/tests/integration/io/test_local.py index 71b114f35f..2fd2b36fb0 100644 --- a/tests/integration/io/test_local.py +++ b/tests/integration/io/test_local.py @@ -216,6 +216,31 @@ def test_find_data(root, cfg): assert str(pattern) in data_source.debug_info +def test_find_data_facet_missing() -> None: + """Test that a MissingFacetError is raised if a required facet is missing.""" + data_source = LocalDataSource( + name="test-data-source", + project="CMIP6", + rootpath=Path("/data/cmip6"), + priority=1, + dirname_template="{dataset}/{exp}/{ensemble}", + filename_template="{short_name}.nc", + ) + facets = { + "short_name": "tas", + "dataset": "test-dataset", + "exp": ["historical", "ssp585"], + } + expected_message = ( + "Unable to complete paths 'test-dataset/historical/{ensemble}', and " + "'test-dataset/ssp585/{ensemble}' because the facet 'ensemble' has " + "not been specified." + ) + files = data_source.find_data(**facets) + assert not files + assert data_source.debug_info == expected_message + + def test_select_invalid_drs_structure(monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setattr(esmvalcore.cmor.table, "CMOR_TABLES", {}) monkeypatch.setitem( From c8d922566a0d6a637009263115aa2f152a5ba2d3 Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Thu, 19 Mar 2026 12:22:03 +0100 Subject: [PATCH 3/7] Improve debug info message --- esmvalcore/io/local.py | 26 ++++++++++++------------ tests/integration/io/test_local.py | 2 +- tests/unit/io/local/test_replace_tags.py | 16 +++++++++------ 3 files changed, 24 insertions(+), 20 deletions(-) diff --git a/esmvalcore/io/local.py b/esmvalcore/io/local.py index 384070c336..b0ae5b81d5 100644 --- a/esmvalcore/io/local.py +++ b/esmvalcore/io/local.py @@ -61,7 +61,7 @@ from esmvalcore.iris_helpers import ignore_warnings_context if TYPE_CHECKING: - from collections.abc import Collection, Iterable + from collections.abc import Iterable from netCDF4 import Variable @@ -420,28 +420,28 @@ def _select_files( return selection -class MissingFacetError(KeyError): +class _MissingFacetError(KeyError): """Error raised when a facet required for filling the template is missing.""" -def format_collection(collection: Collection[Any]) -> str: - """Format a collection of items as a string for use in messages. +def _format_iterable(iterable: Iterable[Any]) -> str: + """Format an iterable as a string for use in messages. Parameters ---------- - collection: - The collection of items to format. + iterable: + The iterable to format. Returns ------- : The formatted string. """ - items = [f"'{item}'" for item in sorted(collection)] + items = [f"'{item}'" for item in sorted(iterable)] if len(items) > 1: items[-1] = f"and {items[-1]}" - txt = ", ".join(items) - return f"s {txt}" if len(collection) > 1 else f" {txt}" + txt = " ".join(items) if len(items) == 2 else ", ".join(items) + return f"s {txt}" if len(items) > 1 else f" {txt}" def _replace_tags( @@ -483,12 +483,12 @@ def _replace_tags( pathset = _replace_tag(pathset, original_tag, replacewith) if failed: msg = ( - f"Unable to complete path{format_collection(pathset)} because " - f"the facet{format_collection(failed)}" + f"Unable to complete path{_format_iterable(pathset)} because " + f"the facet{_format_iterable(failed)}" + (" has" if len(failed) == 1 else " have") + " not been specified." ) - raise MissingFacetError(msg) + raise _MissingFacetError(msg) return [Path(p) for p in pathset] @@ -597,7 +597,7 @@ def find_data(self, **facets: FacetValue) -> list[LocalFile]: try: globs = self._get_glob_patterns(**facets) - except MissingFacetError as exc: + except _MissingFacetError as exc: self.debug_info = exc.args[0] return [] self.debug_info = "No files found matching glob pattern " + "\n".join( diff --git a/tests/integration/io/test_local.py b/tests/integration/io/test_local.py index 2fd2b36fb0..c971d0fbb0 100644 --- a/tests/integration/io/test_local.py +++ b/tests/integration/io/test_local.py @@ -232,7 +232,7 @@ def test_find_data_facet_missing() -> None: "exp": ["historical", "ssp585"], } expected_message = ( - "Unable to complete paths 'test-dataset/historical/{ensemble}', and " + "Unable to complete paths 'test-dataset/historical/{ensemble}' and " "'test-dataset/ssp585/{ensemble}' because the facet 'ensemble' has " "not been specified." ) diff --git a/tests/unit/io/local/test_replace_tags.py b/tests/unit/io/local/test_replace_tags.py index 2f1d3cf11f..b2f4ff8056 100644 --- a/tests/unit/io/local/test_replace_tags.py +++ b/tests/unit/io/local/test_replace_tags.py @@ -5,7 +5,10 @@ import pytest -from esmvalcore.io.local import MissingFacetError, _replace_tags +from esmvalcore.io.local import ( + _MissingFacetError, + _replace_tags, +) VARIABLE = { "project": "CMIP6", @@ -65,19 +68,20 @@ def test_replace_tags_missing_facet(): "Unable to complete path 'tas_{missing}_*.nc' because the facet " "'missing' has not been specified." ) - with pytest.raises(MissingFacetError, match=re.escape(expected_message)): + with pytest.raises(_MissingFacetError, match=re.escape(expected_message)): _replace_tags(paths, variable) def test_replace_tags_missing_facets(): """Check that a MissingFacetError is raised if multiple facets are missing.""" - paths = ["{short_name}_{missing}_{other}_*.nc"] + paths = ["{missing1}_{short_name}_{missing2}_{missing3}_*.nc"] variable = {"short_name": "tas"} expected_message = ( - "Unable to complete path 'tas_{missing}_{other}_*.nc' because the facets " - "'missing', and 'other' have not been specified." + "Unable to complete path '{missing1}_tas_{missing2}_{missing3}_*.nc' " + "because the facets 'missing1', 'missing2', and 'missing3' have not " + "been specified." ) - with pytest.raises(MissingFacetError, match=re.escape(expected_message)): + with pytest.raises(_MissingFacetError, match=re.escape(expected_message)): _replace_tags(paths, variable) From c64c373a51cd60aea22561bab26b6bc1ee4be6ef Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Thu, 19 Mar 2026 14:22:02 +0100 Subject: [PATCH 4/7] Handle case of missing facets with legacy function _get_output_file --- esmvalcore/local.py | 7 ++++++- tests/integration/io/test_local.py | 25 +++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/esmvalcore/local.py b/esmvalcore/local.py index 97c3822412..88587a4e9e 100644 --- a/esmvalcore/local.py +++ b/esmvalcore/local.py @@ -21,10 +21,12 @@ get_project_config, load_config_developer, ) +from esmvalcore.exceptions import RecipeError from esmvalcore.io.local import ( LocalDataSource, LocalFile, _filter_versions_called_latest, + _MissingFacetError, _replace_tags, _select_latest_version, ) @@ -300,7 +302,10 @@ def _get_output_file(variable: dict[str, Any], preproc_dir: Path) -> Path: if isinstance(variable.get("exp"), (list, tuple)): variable = dict(variable) variable["exp"] = "-".join(variable["exp"]) - outfile = _replace_tags(cfg["output_file"], variable)[0] + try: + outfile = _replace_tags(cfg["output_file"], variable)[0] + except _MissingFacetError as exc: + raise RecipeError(exc.args[0]) from exc if "timerange" in variable: timerange = variable["timerange"].replace("/", "-") outfile = Path(f"{outfile}_{timerange}") diff --git a/tests/integration/io/test_local.py b/tests/integration/io/test_local.py index c971d0fbb0..080afed5c7 100644 --- a/tests/integration/io/test_local.py +++ b/tests/integration/io/test_local.py @@ -14,6 +14,7 @@ import esmvalcore.config._config import esmvalcore.local from esmvalcore.config import CFG +from esmvalcore.exceptions import RecipeError from esmvalcore.io.local import ( LocalDataSource, LocalFile, @@ -83,6 +84,30 @@ def test_get_output_file(monkeypatch, cfg): assert output_file == expected +def test_get_output_file_missing_facets( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Test that a RecipeError is raised if a required facet is missing.""" + monkeypatch.setattr(esmvalcore.cmor.table, "CMOR_TABLES", {}) + monkeypatch.setitem( + CFG, + "config_developer_file", + Path(esmvalcore.__path__[0], "config-developer.yml"), + ) + facets = { + "project": "CMIP6", + "mip": "Amon", + "short_name": "tas", + } + expected_message = ( + "Unable to complete path 'CMIP6_{dataset}_Amon_{exp}_{ensemble}_tas" + "_{grid}' because the facets 'dataset', 'ensemble', 'exp', and 'grid' " + "have not been specified." + ) + with pytest.raises(RecipeError, match=expected_message): + _get_output_file(facets, Path("/preproc/dir")) + + @pytest.mark.parametrize("cfg", CONFIG["get_output_file"]) def test_get_output_file_no_config_developer(monkeypatch, cfg): """Test getting output name for preprocessed files.""" From 3ddc1e44987279affeb1e7ebe8dbd3d2915c3864 Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Fri, 20 Mar 2026 10:15:41 +0100 Subject: [PATCH 5/7] Avoid raising _MissingFacetError from other places --- esmvalcore/cmor/_fixes/icon/_base_fixes.py | 16 +++++++--------- esmvalcore/local.py | 10 ++++++++-- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/esmvalcore/cmor/_fixes/icon/_base_fixes.py b/esmvalcore/cmor/_fixes/icon/_base_fixes.py index 124b56de5d..f993fbd671 100644 --- a/esmvalcore/cmor/_fixes/icon/_base_fixes.py +++ b/esmvalcore/cmor/_fixes/icon/_base_fixes.py @@ -2,6 +2,7 @@ from __future__ import annotations +import copy import logging import os import shutil @@ -326,17 +327,14 @@ def _get_grid_from_cube_attr(self, cube: Cube) -> Cube: def _get_grid_from_rootpath(self, grid_name: str) -> CubeList | None: """Try to get grid from the ICON rootpath.""" - glob_patterns: list[Path] = [] for data_source in _get_data_sources(self.session, "ICON"): # type: ignore[arg-type] if isinstance(data_source, esmvalcore.io.local.LocalDataSource): - glob_patterns.extend( - data_source._get_glob_patterns(**self.extra_facets), # noqa: SLF001 - ) - possible_grid_paths = [d.parent / grid_name for d in glob_patterns] - for grid_path in possible_grid_paths: - if grid_path.is_file(): - logger.debug("Using ICON grid file '%s'", grid_path) - return self._load_cubes(grid_path) + ds = copy.deepcopy(data_source) + ds.filename_template = grid_name + files = ds.find_data(**self.extra_facets) + if files: + logger.debug("Using ICON grid file '%s'", files[0]) + return files[0].to_iris() return None def _get_downloaded_grid(self, grid_url: str, grid_name: str) -> CubeList: diff --git a/esmvalcore/local.py b/esmvalcore/local.py index 88587a4e9e..f4b8ede78b 100644 --- a/esmvalcore/local.py +++ b/esmvalcore/local.py @@ -164,7 +164,10 @@ def regex_pattern(self) -> str: def get_glob_patterns(self, **facets: FacetValue) -> list[Path]: """Compose the globs that will be used to look for files.""" - return self._get_glob_patterns(**facets) + try: + return self._get_glob_patterns(**facets) + except _MissingFacetError as exc: + raise RecipeError(exc.args[0]) from exc def path2facets(self, path: Path, add_timerange: bool) -> dict[str, str]: """Extract facets from path.""" @@ -273,7 +276,10 @@ def find_files( if debug: globs = [] for data_source in data_sources: - globs.extend(data_source._get_glob_patterns(**facets)) # noqa: SLF001 + try: + globs.extend(data_source._get_glob_patterns(**facets)) # noqa: SLF001 + except _MissingFacetError as exc: + raise RecipeError(exc.args[0]) from exc return files, sorted(globs) return files From ef0d328c83c8caaaceb90eed203590cf8541c929 Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Fri, 20 Mar 2026 10:31:50 +0100 Subject: [PATCH 6/7] Add test --- tests/integration/io/test_local.py | 33 ++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/tests/integration/io/test_local.py b/tests/integration/io/test_local.py index 080afed5c7..91717759d5 100644 --- a/tests/integration/io/test_local.py +++ b/tests/integration/io/test_local.py @@ -4,7 +4,9 @@ import os import pprint +import re from pathlib import Path +from typing import TYPE_CHECKING import pytest import yaml @@ -22,6 +24,9 @@ ) from esmvalcore.local import _get_output_file, _select_drs, find_files +if TYPE_CHECKING: + import pytest_mock + # Load test configuration with open( os.path.join(os.path.dirname(__file__), "data_finder.yml"), @@ -173,6 +178,34 @@ def test_find_files(monkeypatch, root, cfg, mocker): esmvalcore.local._ensure_config_developer_drs.assert_called_once() +def test_find_files_missing_facets( + monkeypatch: pytest.MonkeyPatch, + mocker: pytest_mock.MockerFixture, +) -> None: + """Test that a RecipeError is raised if a required facet is missing.""" + mocker.patch.object(esmvalcore.local, "_ensure_config_developer_drs") + monkeypatch.setattr(esmvalcore.cmor.table, "CMOR_TABLES", {}) + monkeypatch.setitem( + CFG, + "config_developer_file", + Path(esmvalcore.__path__[0], "config-developer.yml"), + ) + monkeypatch.setitem(CFG, "drs", {"CMIP6": "default"}) + monkeypatch.setitem(CFG, "rootpath", {"CMIP6": "/data/cmip6"}) + facets = { + "project": "CMIP6", + "mip": "Amon", + "short_name": "tas", + } + expected_message = ( + "Unable to complete path 'tas_Amon_{dataset}_{exp}_{ensemble}_{grid}*." + "nc' because the facets 'dataset', 'ensemble', 'exp', and 'grid' have " + "not been specified." + ) + with pytest.raises(RecipeError, match=re.escape(expected_message)): + find_files(debug=True, **facets) + + def test_find_files_with_facets(monkeypatch, root): """Test that a LocalFile with populated `facets` is returned.""" for cfg in CONFIG["get_input_filelist"]: From dfc2076e5beb7ab6296038a51d725aa841756044 Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Fri, 20 Mar 2026 10:39:41 +0100 Subject: [PATCH 7/7] Add another test --- tests/unit/test_local.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 tests/unit/test_local.py diff --git a/tests/unit/test_local.py b/tests/unit/test_local.py new file mode 100644 index 0000000000..656624bb27 --- /dev/null +++ b/tests/unit/test_local.py @@ -0,0 +1,29 @@ +"""Tests for the (deprecated) esmvalcore.local module.""" + +from pathlib import Path + +import pytest + +from esmvalcore.exceptions import RecipeError +from esmvalcore.local import DataSource + + +def test_get_glob_patterns_missing_facets() -> None: + """Test that get_glob_patterns raises when required facets are missing.""" + local_data_source = DataSource( + name="test", + project="test", + priority=1, + rootpath=Path("/climate_data"), + dirname_template="{dataset}", + filename_template="{short_name}*nc", + ) + facets = { + "short_name": "tas", + } + expected_message = ( + "Unable to complete path '{dataset}' because the facet 'dataset' has " + "not been specified." + ) + with pytest.raises(RecipeError, match=expected_message): + local_data_source.get_glob_patterns(**facets)