diff --git a/ci/doc.yml b/ci/doc.yml index 414cd07..2f829e6 100644 --- a/ci/doc.yml +++ b/ci/doc.yml @@ -4,7 +4,7 @@ channels: dependencies: - cupy-core - pip - - python=3.10 + - python=3.13 - sphinx - sphinx-design - sphinx-copybutton @@ -18,3 +18,4 @@ dependencies: - pip: # relative to this file. Needs to be editable to be accepted. - --editable .. + - cog3pio[cuda] @ git+https://github.com/weiji14/cog3pio.git@178a3ffb8163c97f7af9e71bc68b6545a4e8e192 # https://github.com/weiji14/cog3pio/pull/71 diff --git a/cupy_xarray/__init__.py b/cupy_xarray/__init__.py index 5c3a06c..7581f95 100644 --- a/cupy_xarray/__init__.py +++ b/cupy_xarray/__init__.py @@ -1,4 +1,5 @@ from . import _version -from .accessors import CupyDataArrayAccessor, CupyDatasetAccessor # noqa +from .accessors import CupyDataArrayAccessor, CupyDatasetAccessor # noqa: F401 +from .cog3pio import Cog3pioBackendEntrypoint # noqa: F401 __version__ = _version.get_versions()["version"] diff --git a/cupy_xarray/cog3pio.py b/cupy_xarray/cog3pio.py new file mode 100644 index 0000000..7c2456a --- /dev/null +++ b/cupy_xarray/cog3pio.py @@ -0,0 +1,104 @@ +""" +`cog3pio` backend for xarray to read TIFF files directly into CuPy arrays in GPU memory. +""" + +import os +from collections.abc import Iterable + +import cupy as cp # type: ignore[import-untyped] +import numpy as np +import xarray as xr +from cog3pio import CudaCogReader +from xarray.backends import BackendEntrypoint + + +# %% +class Cog3pioBackendEntrypoint(BackendEntrypoint): + """ + Xarray backend to read GeoTIFF files using 'cog3pio' engine. + + When using :py:func:`xarray.open_dataarray` with ``engine="cog3pio"``, the optional + ``device_id`` parameter can be set to the CUDA GPU id to do the decoding on. + + Examples + -------- + Read a GeoTIFF from a HTTP url into an [xarray.DataArray][]: + + >>> import xarray as xr + >>> # Read GeoTIFF into an xarray.DataArray + >>> dataarray: xr.DataArray = xr.open_dataarray( + ... filename_or_obj="https://github.com/OSGeo/gdal/raw/v3.11.0/autotest/gcore/data/byte_zstd.tif", + ... engine="cog3pio", + ... device_id=0, # cuda:0 + ... ) + >>> dataarray.sizes + Frozen({'band': 1, 'y': 20, 'x': 20}) + >>> dataarray.dtype + dtype('uint8') + + """ + + description = "Use .tif files in Xarray" + open_dataset_parameters = ("filename_or_obj", "drop_variables", "device_id") + url = "https://github.com/weiji14/cog3pio" + + def open_dataset( # type: ignore[override] + self, + filename_or_obj: str, + *, + drop_variables: str | Iterable[str] | None = None, + device_id: int | None = None, + # other backend specific keyword arguments + # `chunks` and `cache` DO NOT go here, they are handled by xarray + mask_and_scale=None, + ) -> xr.Dataset: + """ + Backend open_dataset method used by Xarray in [xarray.open_dataset][]. + + Parameters + ---------- + filename_or_obj : str + File path or url to a TIFF (.tif) image file that can be read by the + nvTIFF or image-tiff backend library. + device_id : int | None + CUDA device ID on which to place the created cupy array. Default is None, + which means device_id will be inferred via + :py:func:`cupy.cuda.runtime.getDevice`. + + Returns + ------- + xarray.Dataset + + """ + if device_id is None: + device_id: int = cp.cuda.runtime.getDevice() + + with cp.cuda.Stream(ptds=True): + cog = CudaCogReader(path=filename_or_obj, device_id=device_id) + array_: cp.ndarray = cp.from_dlpack(cog) # 1-D Array + x_coords, y_coords = cog.xy_coords() # TODO consider using rasterix + height, width = (len(y_coords), len(x_coords)) + channels: int = len(array_) // (height * width) + # TODO make API to get proper 3-D shape directly, or use cuTENSOR + array_ = array_.reshape(height, width, channels) # HWC + array = array_.transpose(2, 0, 1) # CHW + + dataarray: xr.DataArray = xr.DataArray( + data=array, + coords={ + "band": np.arange(channels, dtype=np.uint8), + "y": y_coords, + "x": x_coords, + }, + name=None, + attrs=None, + ) + + return dataarray.to_dataset(name="raster") + + def guess_can_open(self, filename_or_obj): + try: + _, ext = os.path.splitext(filename_or_obj) + except TypeError: + return False + return ext in {".tif", ".tiff"} diff --git a/cupy_xarray/tests/test_cog3pio.py b/cupy_xarray/tests/test_cog3pio.py new file mode 100644 index 0000000..b82ea96 --- /dev/null +++ b/cupy_xarray/tests/test_cog3pio.py @@ -0,0 +1,59 @@ +""" +Tests for xarray 'cog3pio' backend engine. +""" + +import cupy as cp +import pytest +import xarray as xr + +from cupy_xarray.cog3pio import Cog3pioBackendEntrypoint + +cog3pio = pytest.importorskip("cog3pio") + + +# %% +def test_entrypoint(): + assert "cog3pio" in xr.backends.list_engines() + + +def test_xarray_backend_open_dataarray(): + """ + Ensure that passing engine='cog3pio' to xarray.open_dataarray works to read a + Cloud-optimized GeoTIFF from a http url. + """ + with xr.open_dataarray( + filename_or_obj="https://github.com/developmentseed/titiler/raw/1.2.0/src/titiler/mosaic/tests/fixtures/TCI.tif", + engine=Cog3pioBackendEntrypoint, + device_id=0, + ) as da: + assert isinstance(da.data, cp.ndarray) + assert da.sizes == {"band": 3, "y": 1098, "x": 1098} + assert da.x.min() == 700010.0 + assert da.x.max() == 809710.0 + assert da.y.min() == 3490250.0 + assert da.y.max() == 3599950.0 + assert da.dtype == "uint8" + + +def test_xarray_backend_open_mfdataset(): + """ + Ensure that passing engine='cog3pio' to xarray.open_mfdataset works to read multiple + Cloud-optimized GeoTIFF files from http urls. Also testing that `device_id=None` + works. + """ + ds: xr.Dataset = xr.open_mfdataset( + paths=[ + "https://github.com/developmentseed/titiler/raw/1.2.0/src/titiler/mosaic/tests/fixtures/B01.tif", + "https://github.com/developmentseed/titiler/raw/1.2.0/src/titiler/mosaic/tests/fixtures/B09.tif", + ], + engine=Cog3pioBackendEntrypoint, + concat_dim="band", + combine="nested", + device_id=None, + ) + assert ds.sizes == {"band": 2, "y": 183, "x": 183} + assert ds.x.min() == 700260.0 + assert ds.x.max() == 809460.0 + assert ds.y.min() == 3490500.0 + assert ds.y.max() == 3599700.0 + assert ds.raster.dtype == "uint16" diff --git a/docs/api.rst b/docs/api.rst index 70d22b0..4aef63a 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -51,3 +51,19 @@ Methods Dataset.cupy.as_cupy Dataset.cupy.as_numpy + + +Backends +-------- + +cog3pio +~~~~~~~ + +.. currentmodule:: cupy_xarray + +.. automodule:: cupy_xarray.cog3pio + +.. autosummary:: + :toctree: generated/ + + Cog3pioBackendEntrypoint diff --git a/docs/conf.py b/docs/conf.py index 2dffa80..9b920fa 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -54,9 +54,10 @@ nb_execution_mode = "off" intersphinx_mapping = { - "python": ("https://docs.python.org/3/", None), - "dask": ("https://docs.dask.org/en/latest", None), + "cog3pio": ("https://cog3pio.readthedocs.io/en/latest", None), "cupy": ("https://docs.cupy.dev/en/latest", None), + "dask": ("https://docs.dask.org/en/latest", None), + "python": ("https://docs.python.org/3/", None), "xarray": ("http://docs.xarray.dev/en/latest/", None), } diff --git a/pyproject.toml b/pyproject.toml index 106967b..71ea26a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,9 @@ dependencies = [ "xarray>=2024.02.0", ] +[project.entry-points."xarray.backends"] +cog3pio = "cupy_xarray.cog3pio:Cog3pioBackendEntrypoint" + [project.optional-dependencies] test = [ "dask",