From ccd7f2337c2ded6f42b30a6cdb49d915bc143c78 Mon Sep 17 00:00:00 2001 From: Dhinakaran Suriyah Date: Tue, 24 Feb 2026 17:27:22 +0100 Subject: [PATCH 1/6] solving the issue #743, cube:dimension and fallback --- openeo/metadata.py | 191 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 170 insertions(+), 21 deletions(-) diff --git a/openeo/metadata.py b/openeo/metadata.py index 49edef428..9cb74a26b 100644 --- a/openeo/metadata.py +++ b/openeo/metadata.py @@ -35,8 +35,6 @@ class DimensionAlreadyExistsException(MetadataException): # TODO: make these dimension classes immutable data classes -# TODO: align better with STAC datacube extension -# TODO: align/adapt/integrate with pystac's datacube extension implementation? class Dimension: """Base class for dimensions.""" @@ -71,7 +69,6 @@ def rename_labels(self, target, source) -> Dimension: class SpatialDimension(Dimension): # TODO: align better with STAC datacube extension: e.g. support "axis" (x or y) - DEFAULT_CRS = 4326 def __init__( @@ -678,31 +675,161 @@ def __str__(self) -> str: def metadata_from_stac(url: str) -> CubeMetadata: """ Reads the band metadata a static STAC catalog or a STAC API Collection and returns it as a :py:class:`CubeMetadata` - :param url: The URL to a static STAC catalog (STAC Item, STAC Collection, or STAC Catalog) or a specific STAC API Collection + :return: A :py:class:`CubeMetadata` containing the DataCube band metadata from the url. + Philosophy: + - If cube:dimensions exists: treat it as source of truth (it may omit x/y/t/bands). + - Otherwise: apply openEO-style defaults (x, y, t) and optionally bands if discovered. """ stac_object = pystac.read_file(href=url) - bands = _StacMetadataParser().bands_from_stac_object(stac_object) + parser = _StacMetadataParser() + bands = parser.bands_from_stac_object(stac_object) + + # aligning better with STAC datacube extension + def _cube_dimensions_dict(obj: pystac.STACObject) -> dict: + """ + Return raw cube:dimensions dict from a Collection/Item, or {}. + """ + if isinstance(obj, pystac.Item): + return obj.properties.get("cube:dimensions", {}) or {} + if isinstance(obj, pystac.Collection): + return obj.extra_fields.get("cube:dimensions", {}) or {} + return {} + + # Standardize fallback temporal dimension name to "t" + # (openEO convention). We don't try to guess backend-specific naming here; we only infer temporal extent + # From Collection extent or Item datetime/start/end when available. + def _infer_temporal_extent(obj: pystac.STACObject) -> list: + """ + Best-effort temporal extent: + - Collection: extent.temporal interval + - Item: datetime or start/end + """ + if isinstance(obj, pystac.Collection) and obj.extent and obj.extent.temporal: + interval = obj.extent.temporal.intervals[0] + return [Rfc3339(propagate_none=True).normalize(d) for d in interval] + + if isinstance(obj, pystac.Item): + props = getattr(obj, "properties", {}) or {} + dt_ = props.get("datetime") + if dt_: + norm = Rfc3339(propagate_none=True).normalize(dt_) + return [norm, norm] + start = props.get("start_datetime") + end = props.get("end_datetime") + if start or end: + return [ + Rfc3339(propagate_none=True).normalize(start), + Rfc3339(propagate_none=True).normalize(end), + ] + + return [None, None] - # At least assume there are spatial dimensions - # TODO #743: are there conditions in which we even should not assume the presence of spatial dimensions? - dimensions = [ - SpatialDimension(name="x", extent=[None, None]), - SpatialDimension(name="y", extent=[None, None]), - ] + def _safe_extent_from_pystac_cube_dim(dim) -> list: + """ + PySTAC cube dimension wrapper may raise if 'extent' is missing. + Also, depending on serialization/version, extent might live in extra_fields. + """ + try: + ext = dim.extent + except Exception: + ext = None - # TODO #743: conditionally include band dimension when there was actual indication of band metadata? - band_dimension = BandDimension(name="bands", bands=bands) - dimensions.append(band_dimension) + if not ext: + extra = getattr(dim, "extra_fields", {}) or {} + ext = extra.get("extent") + + return ext or [None, None] + + def _parse_cube_dimensions_from_pystac_extension(obj: pystac.STACObject) -> list: + """ + Parse dimensions from PySTAC's cube extension wrapper (when present). + Important: PySTAC DimensionType only has SPATIAL + TEMPORAL. + Everything else is treated as band-like. + """ + dims = [] + # Iterate in declared order (dict insertion order preserved in Python 3.7+) + for name, d in obj.ext.cube.dimensions.items(): + dt = getattr(d, "dim_type", None) - # TODO: is it possible to derive the actual name of temporal dimension that the backend will use? - temporal_dimension = _StacMetadataParser().get_temporal_dimension(stac_object) - if temporal_dimension: - dimensions.append(temporal_dimension) + if dt == pystac.extensions.datacube.DimensionType.SPATIAL: + dims.append(SpatialDimension(name=name, extent=_safe_extent_from_pystac_cube_dim(d))) - metadata = CubeMetadata(dimensions=dimensions) - return metadata + elif dt == pystac.extensions.datacube.DimensionType.TEMPORAL: + dims.append(TemporalDimension(name=name, extent=_safe_extent_from_pystac_cube_dim(d))) + + else: + # No SPECTRAL in PySTAC DimensionType: treat as band-like dimension + dims.append(BandDimension(name=name, bands=bands)) + + return dims + + def _parse_cube_dimensions_from_raw_dict(obj: pystac.STACObject) -> list: + """ + Parse dimensions from raw cube:dimensions dict. + Supports 'spatial', 'temporal', and ('bands' or 'spectral' as an alias). + """ + dims = [] + cube_dims = _cube_dimensions_dict(obj) + + for name, d in cube_dims.items(): + if not isinstance(d, dict): + continue + + t = d.get("type") + extent = d.get("extent", [None, None]) + + if t == "spatial": + dims.append(SpatialDimension(name=name, extent=extent)) + + elif t == "temporal": + dims.append(TemporalDimension(name=name, extent=extent)) + + elif t in ("bands", "spectral"): + dims.append(BandDimension(name=name, bands=bands)) + + else: + # Preserve unknown dimension types as generic Dimension + dims.append(Dimension(name=name, type=t)) + + return dims + + # - If cube:dimensions is present, we do NOT assume spatial dims; we parse declared dimensions in order. + # - Only when cube:dimensions is missing do we apply openEO-style defaults (x/y/t). + # Resolving #743 + cube_dims_raw = _cube_dimensions_dict(stac_object) + cube_dims_present = isinstance(cube_dims_raw, dict) and len(cube_dims_raw) > 0 + + dimensions: List[Dimension] = [] + + if cube_dims_present: + # Prefer PySTAC cube extension wrapper when available (more structured), + # but fall back to raw dict parsing if wrapper not present. + if ( + _PYSTAC_1_9_EXTENSION_INTERFACE + and getattr(stac_object, "ext", None) is not None + and stac_object.ext.has("cube") + and hasattr(stac_object.ext, "cube") + ): + dimensions = _parse_cube_dimensions_from_pystac_extension(stac_object) + else: + dimensions = _parse_cube_dimensions_from_raw_dict(stac_object) + + else: + # No cube:dimensions: openEO-style defaults. + dimensions = [ + SpatialDimension(name="x", extent=[None, None]), + SpatialDimension(name="y", extent=[None, None]), + TemporalDimension(name="t", extent=_infer_temporal_extent(stac_object)), + ] + # Only include bands if STAC provided band metadata. + # Handling #743 + # only add BandDimension in fallback mode when STAC actually provided band metadata (i.e., `bands` is non-empty). + if bands: + dimensions.append(BandDimension(name="bands", bands=bands)) + + return CubeMetadata(dimensions=dimensions) # Sniff for PySTAC extension API since version 1.9.0 (which is not available below Python 3.9) # TODO: remove this once support for Python 3.7 and 3.8 is dropped @@ -780,6 +907,17 @@ def get_temporal_dimension(self, stac_obj: pystac.STACObject) -> Union[TemporalD # with dimension name "t" (openEO API recommendation). extent = [Rfc3339(propagate_none=True).normalize(d) for d in stac_obj.extent.temporal.intervals[0]] return TemporalDimension(name="t", extent=extent) + elif isinstance(stac_obj, pystac.Item): + props = getattr(stac_obj, "properties", {}) or {} + dt = props.get("datetime") + if dt: + norm = Rfc3339(propagate_none=True).normalize(dt) + return TemporalDimension(name="t", extent=[norm, norm]) + start = props.get("start_datetime") + end = props.get("end_datetime") + if start or end: + extent = [Rfc3339(propagate_none=True).normalize(d) for d in [start, end]] + return TemporalDimension(name="t", extent=extent) else: if isinstance(stac_obj, pystac.Item): cube_dimensions = stac_obj.properties.get("cube:dimensions", {}) @@ -793,6 +931,17 @@ def get_temporal_dimension(self, stac_obj: pystac.STACObject) -> Union[TemporalD if len(temporal_dims) == 1: name, extent = temporal_dims[0] return TemporalDimension(name=name, extent=extent) + if isinstance(stac_obj, pystac.Item): + props = getattr(stac_obj, "properties", {}) or {} + dt = props.get("datetime") + if dt: + norm = Rfc3339(propagate_none=True).normalize(dt) + return TemporalDimension(name="t", extent=[norm, norm]) + start = props.get("start_datetime") + end = props.get("end_datetime") + if start or end: + extent = [Rfc3339(propagate_none=True).normalize(d) for d in [start, end]] + return TemporalDimension(name="t", extent=extent) def _band_from_eo_bands_metadata(self, band: Union[dict, pystac.extensions.eo.Band]) -> Band: """Construct band from metadata in eo v1.1 style""" @@ -1015,4 +1164,4 @@ def _bands_from_item_assets( """ self._warn("Deriving band listing from unordered `item_assets`") # TODO: filter on asset roles? - return _BandList.merge(self._bands_from_item_asset_definition(a) for a in item_assets.values()) + return _BandList.merge(self._bands_from_item_asset_definition(a) for a in item_assets.values()) \ No newline at end of file From b49026b998816902f1405887346b2c0a4849c4f8 Mon Sep 17 00:00:00 2001 From: Dhinakaran Suriyah Date: Thu, 26 Feb 2026 10:11:10 +0100 Subject: [PATCH 2/6] temporal policy tests modified along with cube:dimension --- tests/test_metadata.py | 95 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 92 insertions(+), 3 deletions(-) diff --git a/tests/test_metadata.py b/tests/test_metadata.py index ef8fbf65e..b9e73c17a 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -1156,7 +1156,10 @@ def test_metadata_from_stac_bands(tmp_path, test_stac, expected): # TODO #738 real request mocking of STAC resources compatible with pystac? path.write_text(json.dumps(test_stac)) metadata = metadata_from_stac(str(path)) - assert metadata.band_names == expected + if expected: + assert metadata.band_names == expected + else: + assert not metadata.has_band_dimension() @@ -1253,7 +1256,93 @@ def test_metadata_from_stac_temporal_dimension(tmp_path, stac_dict, expected): assert isinstance(dim, TemporalDimension) assert (dim.name, dim.extent) == expected else: - assert not metadata.has_temporal_dimension() + # With openEO defaults, a temporal dimension name ('t') can exist even when extent is unknown. + # Depending on STAC input, pystac/datacube parsing can produce: + # - a degenerate extent [d, d] when a single `datetime` is present + # - an "unknown" extent [None, None] when there is no temporal info + assert metadata.has_temporal_dimension() + extent = metadata.temporal_dimension.extent + if extent is None: + pass + else: + assert isinstance(extent, list) and len(extent) == 2 + # Allow "unknown" temporal extent representation + if extent == [None, None]: + pass + else: + # Allow degenerate interval when a single datetime is available + assert extent[0] == extent[1] + + + +# Dimension name resolution policy (STAC cube:dimensions vs openEO defaults) +_DEFAULT_OPENEO_DIMS_SET = {"t", "bands", "y", "x"} + + +@pytest.mark.parametrize( + ["stac_dict", "expected_dims", "must_have_cube_dims"], + [ + ( + # No cube:dimensions -> fall back to openEO default naming convention + StacDummyBuilder.collection(summaries={"eo:bands": [{"name": "B01"}]}), + {"t", "bands", "y", "x"}, + False, + ), + ( + # No cube:dimensions (item) -> fall back to openEO default naming convention + StacDummyBuilder.item( + properties={"datetime": "2020-05-22T00:00:00Z", "eo:bands": [{"name": "B01"}]} + ), + {"t", "bands", "y", "x"}, + False, + ), + ( + # cube:dimensions present -> use the dimension names as suggested by cube:dimensions keys + StacDummyBuilder.collection( + cube_dimensions={ + "time": {"type": "temporal", "axis": "t", "extent": ["2024-04-04", "2024-06-06"]}, + "band": {"type": "bands", "axis": "bands", "values": ["B01"]}, + "y": {"type": "spatial", "axis": "y", "extent": [0, 1]}, + "x": {"type": "spatial", "axis": "x", "extent": [0, 1]}, + } + ), + {"time", "band", "y", "x"}, + True, + ), + ], +) +def test_metadata_from_stac_dimension_policy_cube_dimensions_vs_default(tmp_path, stac_dict, expected_dims, must_have_cube_dims): + path = tmp_path / "stac.json" + # TODO #738 real request mocking of STAC resources compatible with pystac? + path.write_text(json.dumps(stac_dict)) + metadata = metadata_from_stac(str(path)) + + got = tuple(metadata.dimension_names() or ()) + + # Order-insensitive check: names only + assert set(got) == expected_dims, ( + f"Unexpected dimension names for STAC dict\n" + f"Expected set: {expected_dims}\n" + f"Got: {got} (set={set(got)})" + ) + + # Ensure the policy logic is exercised correctly: + # cube:dimensions can be located at root (collection) or in properties (item) + cube_dims = stac_dict.get("cube:dimensions") or (stac_dict.get("properties") or {}).get("cube:dimensions") + if cube_dims is None: + assert set(got) == _DEFAULT_OPENEO_DIMS_SET, ( + f"STAC dict has no cube:dimensions but did not fall back to openEO defaults.\n" + f"Got: {got}, expected defaults: {_DEFAULT_OPENEO_DIMS_SET}" + ) + else: + assert set(got) == set(cube_dims.keys()), ( + f"STAC dict has cube:dimensions but resolved dims do not match it.\n" + f"cube:dimensions keys: {tuple(cube_dims.keys())}\n" + f"resolved dims: {got}" + ) + + if must_have_cube_dims: + assert cube_dims is not None, "Test case expected cube:dimensions but it was missing." @pytest.mark.parametrize( @@ -2232,4 +2321,4 @@ def test_bands_from_collection_examples(self, test_data, path, expected): data = test_data.load_json(path) collection = pystac.Collection.from_dict(data) bands = _StacMetadataParser().bands_from_stac_collection(collection) - assert bands.band_names() == expected + assert bands.band_names() == expected \ No newline at end of file From b9abf3be60abab567a3081d795f5693cf5ee171c Mon Sep 17 00:00:00 2001 From: Dhinakaran Suriyah Date: Tue, 3 Mar 2026 11:10:06 +0100 Subject: [PATCH 3/6] lint run successfull --- openeo/metadata.py | 8 ++++---- tests/test_metadata.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/openeo/metadata.py b/openeo/metadata.py index 9cb74a26b..3d69ec80f 100644 --- a/openeo/metadata.py +++ b/openeo/metadata.py @@ -685,7 +685,7 @@ def metadata_from_stac(url: str) -> CubeMetadata: stac_object = pystac.read_file(href=url) parser = _StacMetadataParser() bands = parser.bands_from_stac_object(stac_object) - + # aligning better with STAC datacube extension def _cube_dimensions_dict(obj: pystac.STACObject) -> dict: """ @@ -802,7 +802,7 @@ def _parse_cube_dimensions_from_raw_dict(obj: pystac.STACObject) -> list: cube_dims_present = isinstance(cube_dims_raw, dict) and len(cube_dims_raw) > 0 dimensions: List[Dimension] = [] - + if cube_dims_present: # Prefer PySTAC cube extension wrapper when available (more structured), # but fall back to raw dict parsing if wrapper not present. @@ -823,7 +823,7 @@ def _parse_cube_dimensions_from_raw_dict(obj: pystac.STACObject) -> list: SpatialDimension(name="y", extent=[None, None]), TemporalDimension(name="t", extent=_infer_temporal_extent(stac_object)), ] - # Only include bands if STAC provided band metadata. + # Only include bands if STAC provided band metadata. # Handling #743 # only add BandDimension in fallback mode when STAC actually provided band metadata (i.e., `bands` is non-empty). if bands: @@ -1164,4 +1164,4 @@ def _bands_from_item_assets( """ self._warn("Deriving band listing from unordered `item_assets`") # TODO: filter on asset roles? - return _BandList.merge(self._bands_from_item_asset_definition(a) for a in item_assets.values()) \ No newline at end of file + return _BandList.merge(self._bands_from_item_asset_definition(a) for a in item_assets.values()) diff --git a/tests/test_metadata.py b/tests/test_metadata.py index b9e73c17a..53afebd42 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -2321,4 +2321,4 @@ def test_bands_from_collection_examples(self, test_data, path, expected): data = test_data.load_json(path) collection = pystac.Collection.from_dict(data) bands = _StacMetadataParser().bands_from_stac_collection(collection) - assert bands.band_names() == expected \ No newline at end of file + assert bands.band_names() == expected From d83208cc78696c19ad1fc0c9c095daa9d673b40d Mon Sep 17 00:00:00 2001 From: Dhinakaran Suriyah Date: Tue, 3 Mar 2026 11:48:21 +0100 Subject: [PATCH 4/6] rolling back changes to the band dimension when left empty - causing legacy unit tests fail --- openeo/metadata.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/openeo/metadata.py b/openeo/metadata.py index 3d69ec80f..1b1faafd6 100644 --- a/openeo/metadata.py +++ b/openeo/metadata.py @@ -680,7 +680,7 @@ def metadata_from_stac(url: str) -> CubeMetadata: :return: A :py:class:`CubeMetadata` containing the DataCube band metadata from the url. Philosophy: - If cube:dimensions exists: treat it as source of truth (it may omit x/y/t/bands). - - Otherwise: apply openEO-style defaults (x, y, t) and optionally bands if discovered. + - Otherwise: apply openEO-style defaults (x, y, t) and (for Collection/Item) keep bands dimension even if empty. """ stac_object = pystac.read_file(href=url) parser = _StacMetadataParser() @@ -816,6 +816,13 @@ def _parse_cube_dimensions_from_raw_dict(obj: pystac.STACObject) -> list: else: dimensions = _parse_cube_dimensions_from_raw_dict(stac_object) + # If cube:dimensions exists but has no bands dimension: + # for Collection/Item keep openEO behavior: still expose a (possibly empty) band dimension. + if not any(isinstance(d, BandDimension) for d in dimensions) and isinstance( + stac_object, (pystac.Collection, pystac.Item) + ): + dimensions.append(BandDimension(name="bands", bands=bands)) + else: # No cube:dimensions: openEO-style defaults. dimensions = [ @@ -823,10 +830,10 @@ def _parse_cube_dimensions_from_raw_dict(obj: pystac.STACObject) -> list: SpatialDimension(name="y", extent=[None, None]), TemporalDimension(name="t", extent=_infer_temporal_extent(stac_object)), ] - # Only include bands if STAC provided band metadata. - # Handling #743 - # only add BandDimension in fallback mode when STAC actually provided band metadata (i.e., `bands` is non-empty). - if bands: + + # For Collection/Item keep a bands dimension (possibly empty). + # For Catalog keep old behavior (no band dimension when unknown). + if isinstance(stac_object, (pystac.Collection, pystac.Item)): dimensions.append(BandDimension(name="bands", bands=bands)) return CubeMetadata(dimensions=dimensions) From ad706de034df50264677751d5cef515763fd2e89 Mon Sep 17 00:00:00 2001 From: Suriyah Dhinakaran <39941494+suriyahgit@users.noreply.github.com> Date: Tue, 10 Mar 2026 10:10:36 +0100 Subject: [PATCH 5/6] Apply suggestions from code review - Assertion message removal and case-correction for Dict to support python3.8 Co-authored-by: Stefaan Lippens --- openeo/metadata.py | 2 +- tests/test_metadata.py | 17 +++-------------- 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/openeo/metadata.py b/openeo/metadata.py index 1b1faafd6..3c3ac539c 100644 --- a/openeo/metadata.py +++ b/openeo/metadata.py @@ -687,7 +687,7 @@ def metadata_from_stac(url: str) -> CubeMetadata: bands = parser.bands_from_stac_object(stac_object) # aligning better with STAC datacube extension - def _cube_dimensions_dict(obj: pystac.STACObject) -> dict: + def _cube_dimensions_dict(obj: pystac.STACObject) -> Dict: """ Return raw cube:dimensions dict from a Collection/Item, or {}. """ diff --git a/tests/test_metadata.py b/tests/test_metadata.py index 53afebd42..3ebdfd4c0 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -1320,26 +1320,15 @@ def test_metadata_from_stac_dimension_policy_cube_dimensions_vs_default(tmp_path got = tuple(metadata.dimension_names() or ()) # Order-insensitive check: names only - assert set(got) == expected_dims, ( - f"Unexpected dimension names for STAC dict\n" - f"Expected set: {expected_dims}\n" - f"Got: {got} (set={set(got)})" - ) + assert set(got) == expected_dims # Ensure the policy logic is exercised correctly: # cube:dimensions can be located at root (collection) or in properties (item) cube_dims = stac_dict.get("cube:dimensions") or (stac_dict.get("properties") or {}).get("cube:dimensions") if cube_dims is None: - assert set(got) == _DEFAULT_OPENEO_DIMS_SET, ( - f"STAC dict has no cube:dimensions but did not fall back to openEO defaults.\n" - f"Got: {got}, expected defaults: {_DEFAULT_OPENEO_DIMS_SET}" - ) + assert set(got) == _DEFAULT_OPENEO_DIMS_SET else: - assert set(got) == set(cube_dims.keys()), ( - f"STAC dict has cube:dimensions but resolved dims do not match it.\n" - f"cube:dimensions keys: {tuple(cube_dims.keys())}\n" - f"resolved dims: {got}" - ) + assert set(got) == set(cube_dims.keys()) if must_have_cube_dims: assert cube_dims is not None, "Test case expected cube:dimensions but it was missing." From 833d1e74b5004408f228bdc4fadb8766834d1090 Mon Sep 17 00:00:00 2001 From: Dhinakaran Suriyah Date: Tue, 10 Mar 2026 13:41:57 +0100 Subject: [PATCH 6/6] solving reviews flagged by soxofaan in the PR #867 --- openeo/metadata.py | 370 ++++++++++++++++++----------------------- tests/test_metadata.py | 15 +- 2 files changed, 168 insertions(+), 217 deletions(-) diff --git a/openeo/metadata.py b/openeo/metadata.py index 3c3ac539c..8a94d3cea 100644 --- a/openeo/metadata.py +++ b/openeo/metadata.py @@ -675,168 +675,17 @@ def __str__(self) -> str: def metadata_from_stac(url: str) -> CubeMetadata: """ Reads the band metadata a static STAC catalog or a STAC API Collection and returns it as a :py:class:`CubeMetadata` - :param url: The URL to a static STAC catalog (STAC Item, STAC Collection, or STAC Catalog) or a specific STAC API Collection - :return: A :py:class:`CubeMetadata` containing the DataCube band metadata from the url. - Philosophy: + Policy: - If cube:dimensions exists: treat it as source of truth (it may omit x/y/t/bands). - Otherwise: apply openEO-style defaults (x, y, t) and (for Collection/Item) keep bands dimension even if empty. + + :param url: The URL to a static STAC catalog (STAC Item, STAC Collection, or STAC Catalog) or a specific STAC API Collection + :return: A :py:class:`CubeMetadata` containing the DataCube band metadata from the url. """ stac_object = pystac.read_file(href=url) parser = _StacMetadataParser() - bands = parser.bands_from_stac_object(stac_object) - - # aligning better with STAC datacube extension - def _cube_dimensions_dict(obj: pystac.STACObject) -> Dict: - """ - Return raw cube:dimensions dict from a Collection/Item, or {}. - """ - if isinstance(obj, pystac.Item): - return obj.properties.get("cube:dimensions", {}) or {} - if isinstance(obj, pystac.Collection): - return obj.extra_fields.get("cube:dimensions", {}) or {} - return {} - - # Standardize fallback temporal dimension name to "t" - # (openEO convention). We don't try to guess backend-specific naming here; we only infer temporal extent - # From Collection extent or Item datetime/start/end when available. - def _infer_temporal_extent(obj: pystac.STACObject) -> list: - """ - Best-effort temporal extent: - - Collection: extent.temporal interval - - Item: datetime or start/end - """ - if isinstance(obj, pystac.Collection) and obj.extent and obj.extent.temporal: - interval = obj.extent.temporal.intervals[0] - return [Rfc3339(propagate_none=True).normalize(d) for d in interval] - - if isinstance(obj, pystac.Item): - props = getattr(obj, "properties", {}) or {} - dt_ = props.get("datetime") - if dt_: - norm = Rfc3339(propagate_none=True).normalize(dt_) - return [norm, norm] - start = props.get("start_datetime") - end = props.get("end_datetime") - if start or end: - return [ - Rfc3339(propagate_none=True).normalize(start), - Rfc3339(propagate_none=True).normalize(end), - ] - - return [None, None] - - def _safe_extent_from_pystac_cube_dim(dim) -> list: - """ - PySTAC cube dimension wrapper may raise if 'extent' is missing. - Also, depending on serialization/version, extent might live in extra_fields. - """ - try: - ext = dim.extent - except Exception: - ext = None - - if not ext: - extra = getattr(dim, "extra_fields", {}) or {} - ext = extra.get("extent") - - return ext or [None, None] - - def _parse_cube_dimensions_from_pystac_extension(obj: pystac.STACObject) -> list: - """ - Parse dimensions from PySTAC's cube extension wrapper (when present). - Important: PySTAC DimensionType only has SPATIAL + TEMPORAL. - Everything else is treated as band-like. - """ - dims = [] - # Iterate in declared order (dict insertion order preserved in Python 3.7+) - for name, d in obj.ext.cube.dimensions.items(): - dt = getattr(d, "dim_type", None) - - if dt == pystac.extensions.datacube.DimensionType.SPATIAL: - dims.append(SpatialDimension(name=name, extent=_safe_extent_from_pystac_cube_dim(d))) - - elif dt == pystac.extensions.datacube.DimensionType.TEMPORAL: - dims.append(TemporalDimension(name=name, extent=_safe_extent_from_pystac_cube_dim(d))) - - else: - # No SPECTRAL in PySTAC DimensionType: treat as band-like dimension - dims.append(BandDimension(name=name, bands=bands)) - - return dims - - def _parse_cube_dimensions_from_raw_dict(obj: pystac.STACObject) -> list: - """ - Parse dimensions from raw cube:dimensions dict. - Supports 'spatial', 'temporal', and ('bands' or 'spectral' as an alias). - """ - dims = [] - cube_dims = _cube_dimensions_dict(obj) - - for name, d in cube_dims.items(): - if not isinstance(d, dict): - continue - - t = d.get("type") - extent = d.get("extent", [None, None]) - - if t == "spatial": - dims.append(SpatialDimension(name=name, extent=extent)) - - elif t == "temporal": - dims.append(TemporalDimension(name=name, extent=extent)) - - elif t in ("bands", "spectral"): - dims.append(BandDimension(name=name, bands=bands)) - - else: - # Preserve unknown dimension types as generic Dimension - dims.append(Dimension(name=name, type=t)) - - return dims - - # - If cube:dimensions is present, we do NOT assume spatial dims; we parse declared dimensions in order. - # - Only when cube:dimensions is missing do we apply openEO-style defaults (x/y/t). - # Resolving #743 - cube_dims_raw = _cube_dimensions_dict(stac_object) - cube_dims_present = isinstance(cube_dims_raw, dict) and len(cube_dims_raw) > 0 - - dimensions: List[Dimension] = [] - - if cube_dims_present: - # Prefer PySTAC cube extension wrapper when available (more structured), - # but fall back to raw dict parsing if wrapper not present. - if ( - _PYSTAC_1_9_EXTENSION_INTERFACE - and getattr(stac_object, "ext", None) is not None - and stac_object.ext.has("cube") - and hasattr(stac_object.ext, "cube") - ): - dimensions = _parse_cube_dimensions_from_pystac_extension(stac_object) - else: - dimensions = _parse_cube_dimensions_from_raw_dict(stac_object) - - # If cube:dimensions exists but has no bands dimension: - # for Collection/Item keep openEO behavior: still expose a (possibly empty) band dimension. - if not any(isinstance(d, BandDimension) for d in dimensions) and isinstance( - stac_object, (pystac.Collection, pystac.Item) - ): - dimensions.append(BandDimension(name="bands", bands=bands)) - - else: - # No cube:dimensions: openEO-style defaults. - dimensions = [ - SpatialDimension(name="x", extent=[None, None]), - SpatialDimension(name="y", extent=[None, None]), - TemporalDimension(name="t", extent=_infer_temporal_extent(stac_object)), - ] - - # For Collection/Item keep a bands dimension (possibly empty). - # For Catalog keep old behavior (no band dimension when unknown). - if isinstance(stac_object, (pystac.Collection, pystac.Item)): - dimensions.append(BandDimension(name="bands", bands=bands)) - - return CubeMetadata(dimensions=dimensions) + return parser.metadata_from_stac_object(stac_object) # Sniff for PySTAC extension API since version 1.9.0 (which is not available below Python 3.9) # TODO: remove this once support for Python 3.7 and 3.8 is dropped @@ -894,61 +743,172 @@ def __init__(self, *, logger=_log, log_level=logging.DEBUG, supress_duplicate_wa # Use caching trick to avoid duplicate warnings self._warn = functools.lru_cache(maxsize=1000)(self._warn) + def metadata_from_stac_object(self, stac_object: pystac.STACObject) -> CubeMetadata: + """ + Build cube metadata from a STAC object. + """ + bands = self.bands_from_stac_object(stac_object) + dimensions = self.dimensions_from_stac_object(stac_object=stac_object, bands=bands) + return CubeMetadata(dimensions=dimensions) + + def dimensions_from_stac_object(self, stac_object: pystac.STACObject, bands: _BandList) -> List[Dimension]: + """ + Build dimension metadata from a STAC object. + + Philosophy: + - If cube:dimensions exists: treat it as source of truth (it may omit x/y/t/bands). + - Otherwise: apply openEO-style defaults (x, y, t) and (for Collection/Item) keep bands dimension even if empty. + """ + if self.has_cube_dimensions(stac_object): + dimensions = self.parse_declared_dimensions(stac_object=stac_object, bands=bands) + if not any(isinstance(d, BandDimension) for d in dimensions) and isinstance( + stac_object, (pystac.Collection, pystac.Item) + ): + dimensions.append(BandDimension(name="bands", bands=list(bands))) + return dimensions + + dimensions: List[Dimension] = [ + SpatialDimension(name="x", extent=[None, None]), + SpatialDimension(name="y", extent=[None, None]), + TemporalDimension(name="t", extent=self.infer_temporal_extent(stac_object)), + ] + if isinstance(stac_object, (pystac.Collection, pystac.Item)): + dimensions.append(BandDimension(name="bands", bands=list(bands))) + return dimensions + def get_temporal_dimension(self, stac_obj: pystac.STACObject) -> Union[TemporalDimension, None]: """ Extract the temporal dimension from a STAC Collection/Item (if any) """ - # TODO: also extract temporal dimension from assets? - if _PYSTAC_1_9_EXTENSION_INTERFACE: - if stac_obj.ext.has("cube") and hasattr(stac_obj.ext, "cube"): - temporal_dims = [ - (n, d.extent or [None, None]) - for (n, d) in stac_obj.ext.cube.dimensions.items() - if d.dim_type == pystac.extensions.datacube.DimensionType.TEMPORAL + if self.has_cube_dimensions(stac_obj): + temporal_dimensions = [ + d + for d in self.parse_declared_dimensions(stac_object=stac_obj, bands=_BandList([])) + if isinstance(d, TemporalDimension) + ] + if len(temporal_dimensions) == 1: + return temporal_dimensions[0] + + if isinstance(stac_obj, (pystac.Collection, pystac.Item)): + return TemporalDimension(name="t", extent=self.infer_temporal_extent(stac_obj)) + + def has_cube_dimensions(self, stac_object: pystac.STACObject) -> bool: + cube_dimensions = self.cube_dimensions_dict(stac_object) + return isinstance(cube_dimensions, dict) and len(cube_dimensions) > 0 + + def cube_dimensions_dict(self, stac_object: pystac.STACObject) -> Dict[str, dict]: + """ + Return raw cube:dimensions dict from a Collection/Item, or {}. + """ + if isinstance(stac_object, pystac.Item): + return stac_object.properties.get("cube:dimensions", {}) or {} + if isinstance(stac_object, pystac.Collection): + return stac_object.extra_fields.get("cube:dimensions", {}) or {} + return {} + + def infer_temporal_extent(self, stac_object: pystac.STACObject) -> List[Optional[str]]: + """ + Best-effort temporal extent: + - Collection: extent.temporal interval + - Item: datetime or start/end + """ + if isinstance(stac_object, pystac.Collection) and stac_object.extent and stac_object.extent.temporal: + interval = stac_object.extent.temporal.intervals[0] + return [Rfc3339(propagate_none=True).normalize(d) for d in interval] + + if isinstance(stac_object, pystac.Item): + props = getattr(stac_object, "properties", {}) or {} + dt_ = props.get("datetime") + if dt_: + norm = Rfc3339(propagate_none=True).normalize(dt_) + return [norm, norm] + start = props.get("start_datetime") + end = props.get("end_datetime") + if start or end: + return [ + Rfc3339(propagate_none=True).normalize(start), + Rfc3339(propagate_none=True).normalize(end), ] - if len(temporal_dims) == 1: - name, extent = temporal_dims[0] - return TemporalDimension(name=name, extent=extent) - elif isinstance(stac_obj, pystac.Collection) and stac_obj.extent.temporal: - # No explicit "cube:dimensions": build fallback from "extent.temporal", - # with dimension name "t" (openEO API recommendation). - extent = [Rfc3339(propagate_none=True).normalize(d) for d in stac_obj.extent.temporal.intervals[0]] - return TemporalDimension(name="t", extent=extent) - elif isinstance(stac_obj, pystac.Item): - props = getattr(stac_obj, "properties", {}) or {} - dt = props.get("datetime") - if dt: - norm = Rfc3339(propagate_none=True).normalize(dt) - return TemporalDimension(name="t", extent=[norm, norm]) - start = props.get("start_datetime") - end = props.get("end_datetime") - if start or end: - extent = [Rfc3339(propagate_none=True).normalize(d) for d in [start, end]] - return TemporalDimension(name="t", extent=extent) - else: - if isinstance(stac_obj, pystac.Item): - cube_dimensions = stac_obj.properties.get("cube:dimensions", {}) - elif isinstance(stac_obj, pystac.Collection): - cube_dimensions = stac_obj.extra_fields.get("cube:dimensions", {}) + + return [None, None] + + @staticmethod + def _safe_extent_from_pystac_cube_dim(dim) -> list: + """ + PySTAC cube dimension wrapper may raise if 'extent' is missing. + Also, depending on serialization/version, extent might live in extra_fields. + """ + try: + ext = dim.extent + except Exception: + ext = None + + if not ext: + extra = getattr(dim, "extra_fields", {}) or {} + ext = extra.get("extent") + + return ext or [None, None] + + def parse_declared_dimensions(self, stac_object: pystac.STACObject, bands: _BandList) -> List[Dimension]: + """ + Parse dimensions declared through cube:dimensions. + """ + if ( + _PYSTAC_1_9_EXTENSION_INTERFACE + and getattr(stac_object, "ext", None) is not None + and stac_object.ext.has("cube") + and hasattr(stac_object.ext, "cube") + ): + return self._parse_cube_dimensions_from_pystac_extension(stac_object=stac_object, bands=bands) + return self._parse_cube_dimensions_from_raw_dict(stac_object=stac_object, bands=bands) + + def _parse_cube_dimensions_from_pystac_extension( + self, stac_object: pystac.STACObject, bands: _BandList + ) -> List[Dimension]: + """ + Parse dimensions from PySTAC's cube extension wrapper (when present). + Important: PySTAC DimensionType only has SPATIAL + TEMPORAL. + Everything else is treated as band-like. + """ + dimensions = [] + for name, dim in stac_object.ext.cube.dimensions.items(): + dim_type = getattr(dim, "dim_type", None) + extent = self._safe_extent_from_pystac_cube_dim(dim) + + if dim_type == pystac.extensions.datacube.DimensionType.SPATIAL: + dimensions.append(SpatialDimension(name=name, extent=extent)) + elif dim_type == pystac.extensions.datacube.DimensionType.TEMPORAL: + dimensions.append(TemporalDimension(name=name, extent=extent)) else: - cube_dimensions = {} - temporal_dims = [ - (n, d.get("extent", [None, None])) for (n, d) in cube_dimensions.items() if d.get("type") == "temporal" - ] - if len(temporal_dims) == 1: - name, extent = temporal_dims[0] - return TemporalDimension(name=name, extent=extent) - if isinstance(stac_obj, pystac.Item): - props = getattr(stac_obj, "properties", {}) or {} - dt = props.get("datetime") - if dt: - norm = Rfc3339(propagate_none=True).normalize(dt) - return TemporalDimension(name="t", extent=[norm, norm]) - start = props.get("start_datetime") - end = props.get("end_datetime") - if start or end: - extent = [Rfc3339(propagate_none=True).normalize(d) for d in [start, end]] - return TemporalDimension(name="t", extent=extent) + dimensions.append(BandDimension(name=name, bands=list(bands))) + + return dimensions + + def _parse_cube_dimensions_from_raw_dict(self, stac_object: pystac.STACObject, bands: _BandList) -> List[Dimension]: + """ + Parse dimensions from raw cube:dimensions dict. + Supports 'spatial', 'temporal', and ('bands' or 'spectral' as an alias). + """ + dimensions = [] + cube_dimensions = self.cube_dimensions_dict(stac_object) + + for name, dim in cube_dimensions.items(): + if not isinstance(dim, dict): + continue + + dim_type = dim.get("type") + extent = dim.get("extent", [None, None]) + + if dim_type == "spatial": + dimensions.append(SpatialDimension(name=name, extent=extent)) + elif dim_type == "temporal": + dimensions.append(TemporalDimension(name=name, extent=extent)) + elif dim_type in ("bands", "spectral"): + dimensions.append(BandDimension(name=name, bands=list(bands))) + else: + dimensions.append(Dimension(name=name, type=dim_type)) + + return dimensions def _band_from_eo_bands_metadata(self, band: Union[dict, pystac.extensions.eo.Band]) -> Band: """Construct band from metadata in eo v1.1 style""" diff --git a/tests/test_metadata.py b/tests/test_metadata.py index 3ebdfd4c0..a633ef87a 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -1276,17 +1276,13 @@ def test_metadata_from_stac_temporal_dimension(tmp_path, stac_dict, expected): # Dimension name resolution policy (STAC cube:dimensions vs openEO defaults) -_DEFAULT_OPENEO_DIMS_SET = {"t", "bands", "y", "x"} - - @pytest.mark.parametrize( - ["stac_dict", "expected_dims", "must_have_cube_dims"], + ["stac_dict", "expected_dims"], [ ( # No cube:dimensions -> fall back to openEO default naming convention StacDummyBuilder.collection(summaries={"eo:bands": [{"name": "B01"}]}), {"t", "bands", "y", "x"}, - False, ), ( # No cube:dimensions (item) -> fall back to openEO default naming convention @@ -1294,7 +1290,6 @@ def test_metadata_from_stac_temporal_dimension(tmp_path, stac_dict, expected): properties={"datetime": "2020-05-22T00:00:00Z", "eo:bands": [{"name": "B01"}]} ), {"t", "bands", "y", "x"}, - False, ), ( # cube:dimensions present -> use the dimension names as suggested by cube:dimensions keys @@ -1307,11 +1302,10 @@ def test_metadata_from_stac_temporal_dimension(tmp_path, stac_dict, expected): } ), {"time", "band", "y", "x"}, - True, ), ], ) -def test_metadata_from_stac_dimension_policy_cube_dimensions_vs_default(tmp_path, stac_dict, expected_dims, must_have_cube_dims): +def test_metadata_from_stac_dimension_policy_cube_dimensions_vs_default(tmp_path, stac_dict, expected_dims): path = tmp_path / "stac.json" # TODO #738 real request mocking of STAC resources compatible with pystac? path.write_text(json.dumps(stac_dict)) @@ -1326,13 +1320,10 @@ def test_metadata_from_stac_dimension_policy_cube_dimensions_vs_default(tmp_path # cube:dimensions can be located at root (collection) or in properties (item) cube_dims = stac_dict.get("cube:dimensions") or (stac_dict.get("properties") or {}).get("cube:dimensions") if cube_dims is None: - assert set(got) == _DEFAULT_OPENEO_DIMS_SET + assert set(got) == {"t", "bands", "y", "x"} else: assert set(got) == set(cube_dims.keys()) - if must_have_cube_dims: - assert cube_dims is not None, "Test case expected cube:dimensions but it was missing." - @pytest.mark.parametrize( ["kwargs", "expected_x", "expected_y"],