Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .claude/sweep-metadata-state.csv
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
module,last_inspected,issue,severity_max,categories_found,notes
geotiff,2026-05-11,1580;1582,HIGH,1;3;4;5,"write_geotiff_gpu silently mis-ordered (band, y, x) DataArrays (#1580). to_geotiff ignored rioxarray nodatavals and CF _FillValue attrs (#1582). Both fixed in same branch."
reproject,2026-05-10,1572;1573,HIGH,1;3;4,geoid_height_raster dropped input attrs and used dims[-2:] for 3D inputs (#1572). reproject/merge ignored nodatavals (rasterio convention) when rioxarray absent (#1573). Fixed in same branch.
67 changes: 65 additions & 2 deletions xrspatial/geotiff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -653,6 +653,61 @@ def _apply_nodata_mask_gpu(arr_gpu, nodata):
_TIFF_SHORT = 3


def _resolve_nodata_attr(attrs: dict):
"""Resolve a NoData sentinel from DataArray attrs.

xrspatial's own readers always emit ``attrs['nodata']`` (a scalar),
so that key is checked first for a clean intra-library round-trip.
Falls back to two ecosystem conventions on miss:

* ``attrs['nodatavals']`` -- rioxarray's per-band tuple. Returns
the first entry that is not None, not non-numeric, and not NaN.
In practice this is band 0 for almost every real file; the skip
logic only matters when band 0 is missing a sentinel (NaN /
None) while a later band declares one. Bands with mixed concrete
sentinels are uncommon and would need an explicit ``nodata=``
argument anyway.
* ``attrs['_FillValue']`` -- CF-style xarray pipelines.

Returns ``None`` when none of the keys carry a usable value. NaN
entries in ``nodatavals`` are skipped rather than treated as a
sentinel (NaN means "the float NaN is the sentinel", which is
already the default and doesn't need a GDAL_NODATA tag).
"""
nodata = attrs.get('nodata')
if nodata is not None:
return nodata

vals = attrs.get('nodatavals')
if vals is not None:
try:
seq = list(vals)
except TypeError:
seq = [vals]
for v in seq:
if v is None:
continue
try:
fv = float(v)
except (TypeError, ValueError):
continue
if np.isnan(fv):
continue
return v

fill = attrs.get('_FillValue')
if fill is not None:
try:
ffv = float(fill)
except (TypeError, ValueError):
return fill # non-numeric -- pass through verbatim
if np.isnan(ffv):
return None
return fill

return None


def _merge_friendly_extra_tags(extra_tags_list, attrs: dict) -> list | None:
"""Combine ``attrs['extra_tags']`` with friendly tag attrs.

Expand Down Expand Up @@ -954,7 +1009,7 @@ def to_geotiff(data: xr.DataArray | np.ndarray, path, *,
if epsg is None and wkt_fallback is None:
wkt_fallback = wkt
if nodata is None:
nodata = data.attrs.get('nodata')
nodata = _resolve_nodata_attr(data.attrs)
if data.attrs.get('raster_type') == 'point':
raster_type = RASTER_PIXEL_IS_POINT
gdal_meta_xml = data.attrs.get('gdal_metadata_xml')
Expand Down Expand Up @@ -2237,6 +2292,14 @@ def write_geotiff_gpu(data, path: str, *,
else:
arr = cupy.asarray(np.asarray(arr)) # numpy -> GPU

# Handle band-first dimension order (band, y, x) -> (y, x, band).
# rioxarray and CF-style multi-band rasters land here; without
# this remap the writer treats arr.shape[2] as the band axis and
# produces a transposed file (issue #1580). The CPU writer does
# the same remap at the matching step in to_geotiff().
if arr.ndim == 3 and data.dims[0] in ('band', 'bands', 'channel'):
arr = cupy.ascontiguousarray(cupy.moveaxis(arr, 0, -1))

# Prefer attrs['transform'] over the coord-derived transform: it
# is bit-stable across round-trips, while _coords_to_transform
# can drift on fractional pixel sizes (the same reasoning the
Expand Down Expand Up @@ -2264,7 +2327,7 @@ def write_geotiff_gpu(data, path: str, *,
if epsg is None and wkt_fallback is None:
wkt_fallback = wkt
if nodata is None:
nodata = data.attrs.get('nodata')
nodata = _resolve_nodata_attr(data.attrs)
if data.attrs.get('raster_type') == 'point':
raster_type = RASTER_PIXEL_IS_POINT
# Mirror the CPU writer's pass-through of GDAL metadata, the
Expand Down
169 changes: 169 additions & 0 deletions xrspatial/geotiff/tests/test_gpu_writer_band_first_1580.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
"""Regression test for issue #1580.

``write_geotiff_gpu`` used to assume the CuPy-backed DataArray was
``(y, x[, band])`` and computed ``height, width = arr.shape[:2]``
unconditionally. A rioxarray-style ``(band, y, x)`` 3D DataArray --
auto-dispatched into the GPU writer from ``to_geotiff`` -- ended up
written with the band axis stored as image width and the actual width
stored as samples-per-pixel.

The CPU eager path in ``to_geotiff`` already handles this with a
``np.moveaxis(arr, 0, -1)`` when ``data.dims[0] in ('band', 'bands',
'channel')``; the GPU writer now mirrors that remap so both backends
produce the same file for the same input.
"""
from __future__ import annotations

import importlib.util

import numpy as np
import pytest
import xarray as xr

from xrspatial.geotiff import open_geotiff, to_geotiff, write_geotiff_gpu


def _gpu_available() -> bool:
if importlib.util.find_spec("cupy") is None:
return False
try:
import cupy
return bool(cupy.cuda.is_available())
except Exception:
return False


_HAS_GPU = _gpu_available()
_gpu_only = pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required")


@_gpu_only
@pytest.mark.parametrize("band_dim_name", ["band", "bands", "channel"])
def test_band_first_layout_written_correctly_via_write_geotiff_gpu(
tmp_path, band_dim_name):
"""Direct call to write_geotiff_gpu with a (band, y, x) CuPy
DataArray must produce the same file dimensions as the CPU writer
would (height=y, width=x, samples=band).
"""
import cupy
rng = np.random.default_rng(seed=1580)
arr_bhw = rng.integers(0, 255, (3, 16, 32), dtype=np.uint8)
da = xr.DataArray(
cupy.asarray(arr_bhw),
dims=[band_dim_name, "y", "x"],
coords={
band_dim_name: np.arange(3),
"y": np.arange(16, dtype=np.float64),
"x": np.arange(32, dtype=np.float64),
},
attrs={"crs": 4326},
)

out = str(tmp_path / f"band_first_1580_{band_dim_name}.tif")
write_geotiff_gpu(da, out, compression="none")

rd = open_geotiff(out)
assert rd.sizes["y"] == 16, (
f"expected height=16, got {rd.sizes}; the writer is treating the "
f"band axis as height"
)
assert rd.sizes["x"] == 32, f"expected width=32, got {rd.sizes}"
assert rd.sizes["band"] == 3, (
f"expected 3 bands, got {rd.sizes}; the writer is treating the "
f"width as samples-per-pixel"
)

# Pixel values should match the source after the (band, y, x) ->
# (y, x, band) remap.
np.testing.assert_array_equal(rd.values, np.moveaxis(arr_bhw, 0, -1))


@_gpu_only
def test_band_first_layout_via_to_geotiff_auto_dispatch(tmp_path):
"""The user-facing path: pass a CuPy (band, y, x) DataArray to
``to_geotiff`` and let auto-detection pick the GPU writer.
"""
import cupy
rng = np.random.default_rng(seed=1580 + 1)
arr_bhw = rng.integers(0, 255, (2, 8, 12), dtype=np.uint8)
da = xr.DataArray(
cupy.asarray(arr_bhw),
dims=["band", "y", "x"],
coords={
"band": np.arange(2),
"y": np.arange(8, dtype=np.float64),
"x": np.arange(12, dtype=np.float64),
},
attrs={"crs": 4326},
)

out = str(tmp_path / "band_first_1580_autodispatch.tif")
to_geotiff(da, out, compression="none")

rd = open_geotiff(out)
assert rd.sizes == {"y": 8, "x": 12, "band": 2}, (
f"auto-dispatched GPU write produced wrong dims: {rd.sizes}"
)
np.testing.assert_array_equal(rd.values, np.moveaxis(arr_bhw, 0, -1))


@_gpu_only
def test_yxbands_layout_unchanged(tmp_path):
"""Regression guard: the original (y, x, band) layout must still
write correctly after the band-first remap was added.
"""
import cupy
rng = np.random.default_rng(seed=1580 + 2)
arr_yxb = rng.integers(0, 255, (8, 12, 2), dtype=np.uint8)
da = xr.DataArray(
cupy.asarray(arr_yxb),
dims=["y", "x", "band"],
coords={
"y": np.arange(8, dtype=np.float64),
"x": np.arange(12, dtype=np.float64),
"band": np.arange(2),
},
attrs={"crs": 4326},
)

out = str(tmp_path / "yxb_1580.tif")
write_geotiff_gpu(da, out, compression="none")

rd = open_geotiff(out)
assert rd.sizes == {"y": 8, "x": 12, "band": 2}
np.testing.assert_array_equal(rd.values, arr_yxb)


@_gpu_only
def test_gpu_band_first_matches_cpu_byte_for_byte_on_pixel_values(tmp_path):
"""Cross-backend parity: GPU and CPU writers must emit the same
pixel values for the same (band, y, x) input.
"""
import cupy
rng = np.random.default_rng(seed=1580 + 3)
arr_bhw = rng.integers(0, 255, (3, 24, 40), dtype=np.uint8)
da_cpu = xr.DataArray(
arr_bhw,
dims=["band", "y", "x"],
coords={"band": np.arange(3),
"y": np.arange(24, dtype=np.float64),
"x": np.arange(40, dtype=np.float64)},
attrs={"crs": 4326},
)
da_gpu = xr.DataArray(
cupy.asarray(arr_bhw),
dims=["band", "y", "x"],
coords={"band": np.arange(3),
"y": np.arange(24, dtype=np.float64),
"x": np.arange(40, dtype=np.float64)},
attrs={"crs": 4326},
)

cpu_path = str(tmp_path / "band_first_1580_cpu.tif")
gpu_path = str(tmp_path / "band_first_1580_gpu.tif")
to_geotiff(da_cpu, cpu_path, compression="none", gpu=False)
write_geotiff_gpu(da_gpu, gpu_path, compression="none")

cpu_rd = open_geotiff(cpu_path).values
gpu_rd = open_geotiff(gpu_path).values
np.testing.assert_array_equal(cpu_rd, gpu_rd)
Loading
Loading