From 292721ecd94ada4688f073818540d8e56b259398 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Wed, 13 May 2026 21:08:24 -0700 Subject: [PATCH] geotiff: VRT writer emits Int64/UInt64 for 64-bit integer sources (#1833) The dtype lookup in write_vrt had no entry for bps=64, so signed 64-bit sources fell back to Int32 and unsigned 64-bit sources fell back to Byte. The VRT reader has UInt64/Int64 support already (issue #1783), so the loss happened on write -- uint64 values silently truncated to [0, 255]. Add 64-bit entries to both lookups so write_vrt and to_geotiff(..., '*.vrt') round-trip 64-bit integer rasters losslessly. --- xrspatial/geotiff/_vrt.py | 8 +- .../tests/test_vrt_writer_int64_1833.py | 75 +++++++++++++++++++ 2 files changed, 81 insertions(+), 2 deletions(-) create mode 100644 xrspatial/geotiff/tests/test_vrt_writer_int64_1833.py diff --git a/xrspatial/geotiff/_vrt.py b/xrspatial/geotiff/_vrt.py index 49e38708..553d939f 100644 --- a/xrspatial/geotiff/_vrt.py +++ b/xrspatial/geotiff/_vrt.py @@ -1363,9 +1363,13 @@ def _pixel_size_mismatch(a: float, b: float) -> bool: if sf == 3: vrt_dtype_name = 'Float64' if bps == 64 else 'Float32' elif sf == 2: - vrt_dtype_name = {8: 'Int8', 16: 'Int16', 32: 'Int32'}.get(bps, 'Int32') + vrt_dtype_name = { + 8: 'Int8', 16: 'Int16', 32: 'Int32', 64: 'Int64', + }.get(bps, 'Int32') else: - vrt_dtype_name = {8: 'Byte', 16: 'UInt16', 32: 'UInt32'}.get(bps, 'Byte') + vrt_dtype_name = { + 8: 'Byte', 16: 'UInt16', 32: 'UInt32', 64: 'UInt64', + }.get(bps, 'Byte') srs = crs_wkt or first.get('crs_wkt') or '' nd = nodata if nodata is not None else first.get('nodata') diff --git a/xrspatial/geotiff/tests/test_vrt_writer_int64_1833.py b/xrspatial/geotiff/tests/test_vrt_writer_int64_1833.py new file mode 100644 index 00000000..7e4b0751 --- /dev/null +++ b/xrspatial/geotiff/tests/test_vrt_writer_int64_1833.py @@ -0,0 +1,75 @@ +"""Regression tests for VRT writer 64-bit integer dtype handling. + +``write_vrt`` (and ``to_geotiff(da, "*.vrt")`` by extension) previously +mapped signed 64-bit source rasters to ``Int32`` and unsigned 64-bit +source rasters to ``Byte`` because the dtype lookup had no entry for +``bps=64`` and fell back to the small-int default. The VRT reader has +explicit ``UInt64`` / ``Int64`` support (see issue #1783), so the loss +happened on write -- silently truncating uint64 values to ``[0, 255]``. + +See issue #1833. +""" +from __future__ import annotations + +import re + +import numpy as np +import xarray as xr + +from xrspatial.geotiff import open_geotiff, to_geotiff + + +def _da(arr: np.ndarray) -> xr.DataArray: + h, w = arr.shape + return xr.DataArray( + arr, + dims=('y', 'x'), + coords={'y': np.arange(h, dtype=np.float64), + 'x': np.arange(w, dtype=np.float64)}, + attrs={'res': (1.0, 1.0)}, + ) + + +def _read_vrt_dtype_attr(vrt_path: str) -> str: + """Extract the ``dataType`` attribute from the emitted VRT XML.""" + with open(vrt_path) as f: + xml = f.read() + m = re.search(r'dataType="([^"]+)"', xml) + assert m is not None, f"no dataType attribute in VRT:\n{xml}" + return m.group(1) + + +def test_uint64_vrt_writer_declares_uint64(tmp_path): + big = np.iinfo(np.uint64).max + arr = np.array([[1, 2], [big - 7, big]], dtype=np.uint64) + vrt = tmp_path / 'u64_1833.vrt' + to_geotiff(_da(arr), str(vrt)) + assert _read_vrt_dtype_attr(str(vrt)) == 'UInt64' + + +def test_int64_vrt_writer_declares_int64(tmp_path): + info = np.iinfo(np.int64) + arr = np.array([[info.min, -1], [0, info.max]], dtype=np.int64) + vrt = tmp_path / 'i64_1833.vrt' + to_geotiff(_da(arr), str(vrt)) + assert _read_vrt_dtype_attr(str(vrt)) == 'Int64' + + +def test_uint64_vrt_round_trip(tmp_path): + big = np.iinfo(np.uint64).max + arr = np.array([[1, 2], [big - 7, big]], dtype=np.uint64) + vrt = tmp_path / 'u64_rt_1833.vrt' + to_geotiff(_da(arr), str(vrt)) + r = open_geotiff(str(vrt)) + assert r.dtype == np.uint64 + np.testing.assert_array_equal(np.asarray(r.values), arr) + + +def test_int64_vrt_round_trip(tmp_path): + info = np.iinfo(np.int64) + arr = np.array([[info.min, -1], [0, info.max]], dtype=np.int64) + vrt = tmp_path / 'i64_rt_1833.vrt' + to_geotiff(_da(arr), str(vrt)) + r = open_geotiff(str(vrt)) + assert r.dtype == np.int64 + np.testing.assert_array_equal(np.asarray(r.values), arr)