Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion ci/doc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ channels:
dependencies:
- cupy-core
- pip
- python=3.10
- python=3.13
- sphinx
- sphinx-design
- sphinx-copybutton
Expand All @@ -18,3 +18,4 @@ dependencies:
- pip:
# relative to this file. Needs to be editable to be accepted.
- --editable ..
- cog3pio[cuda] @ git+https://github.com/weiji14/cog3pio.git@178a3ffb8163c97f7af9e71bc68b6545a4e8e192 # https://github.com/weiji14/cog3pio/pull/71
3 changes: 2 additions & 1 deletion cupy_xarray/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from . import _version
from .accessors import CupyDataArrayAccessor, CupyDatasetAccessor # noqa
from .accessors import CupyDataArrayAccessor, CupyDatasetAccessor # noqa: F401
from .cog3pio import Cog3pioBackendEntrypoint # noqa: F401

__version__ = _version.get_versions()["version"]
100 changes: 100 additions & 0 deletions cupy_xarray/cog3pio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
"""
`cog3pio` backend for xarray to read TIFF files directly into CuPy arrays in GPU memory.
"""

import os
from collections.abc import Iterable

import cupy as cp # type: ignore[import-untyped]
import numpy as np
import xarray as xr
from cog3pio import CudaCogReader
from xarray.backends import BackendEntrypoint


# %%
class Cog3pioBackendEntrypoint(BackendEntrypoint):
"""
Xarray backend to read GeoTIFF files using 'cog3pio' engine.

When using :py:func:`xarray.open_dataarray` with ``engine="cog3pio"``, the
``device_id`` parameter can be set to the CUDA GPU id to do the decoding on.

Examples
--------
Read a GeoTIFF from a HTTP url into an [xarray.DataArray][]:

>>> import xarray as xr
>>> # Read GeoTIFF into an xarray.DataArray
>>> dataarray: xr.DataArray = xr.open_dataarray(
... filename_or_obj="https://github.com/OSGeo/gdal/raw/v3.11.0/autotest/gcore/data/byte_zstd.tif",
... engine="cog3pio",
... device_id=0, # cuda:0
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How would this be handles on a multi-GPU system? You may want to load many tif files into a dask-cupy-xarray object where different chunks are on different GPUs. This API feels a little inflexible for this use case.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Exactly the feedback I needed! Short answer is: I'm probably gonna change the signature of this parameter to device_id: int | None = None. Where the default of None means to get the 'current device' from cp.cuda.runtime.getDevice().

Longer answer is: I'm currently using nvtiffDecoderCreateSimple() which uses the default memory allocator. The multi-gpu case would probably mean I need to use nvtiffDecoderCreate instead that allows a custom device allocator, which I presume dask will have some way of handling. I see dask's scope as more to do with parallel compute, not I/O from a file format, so would appreciate any advice here (the xarray <-> dask integration piece has always felt very CPU-centric to me 🙂)

Note

Alternatively, I also considered having the parameter as just device to take in a cupy.cuda.Device object. I didn't go with this option (yet) because I'd prefer to have something more cross-framework (e.g. allow torch.cuda.device or tf.device) to get the device_id, something touched on in data-apis/array-api#972 which proposes a __dlpack_device__() protocol.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the default of None means to get the 'current device'

This would probably be fine for a multi-GPU setup. Generally the NVIDIA_VISIBLE_DEVICES env var is set to a unique index for each worker (in Dask this is something dask_cuda.LocalCUDACluser and dask_cuda.CUDAWorker handle), so when a worker uses the "current device" it would be different for each worker.

I see dask's scope as more to do with parallel compute, not I/O from a file format

It's just a task scheduler with some high-level collections. It doesn't matter if the task is compute, IO or anything else (is there anything else? 😅). But overall you need to think about how the high-level collection object filters down to the lower level Dask calls.

If I have a VM with four GPUs, and I call something along the lines of xr.open_mfdataset(filename_or_obj="mytiffs/*.tiff", engine="cog3pio") you want to avoid being explicit with the device otherwise everything will end up on one device and wasting the other three.

the xarray <-> dask integration piece has always felt very CPU-centric to me 🙂

It's true that dask-cuda is a separate package that adds GPU logic to Dask. But GPUs are well supported in Dask today. There may just be work to be done wiring things up to collections like xarray.

... )
>>> dataarray.sizes
Frozen({'band': 1, 'y': 20, 'x': 20})
>>> dataarray.dtype
dtype('uint8')

"""

description = "Use .tif files in Xarray"
open_dataset_parameters = ("filename_or_obj", "drop_variables", "device_id")
url = "https://github.com/weiji14/cog3pio"

def open_dataset( # type: ignore[override]
self,
filename_or_obj: str,
*,
drop_variables: str | Iterable[str] | None = None,
device_id: int,
# other backend specific keyword arguments
# `chunks` and `cache` DO NOT go here, they are handled by xarray
mask_and_scale=None,
) -> xr.Dataset:
"""
Backend open_dataset method used by Xarray in [xarray.open_dataset][].

Parameters
----------
filename_or_obj : str
File path or url to a TIFF (.tif) image file that can be read by the
nvTIFF or image-tiff backend library.
device_id : int
CUDA device ID on which to place the created cupy array.

Returns
-------
xarray.Dataset

"""

with cp.cuda.Stream(ptds=True):
cog = CudaCogReader(path=filename_or_obj, device_id=device_id)
array_: cp.ndarray = cp.from_dlpack(cog) # 1-D Array
x_coords, y_coords = cog.xy_coords() # TODO consider using rasterix
height, width = (len(y_coords), len(x_coords))
channels: int = len(array_) // (height * width)
# TODO make API to get proper 3-D shape directly, or use cuTENSOR
array_ = array_.reshape(height, width, channels) # HWC
array = array_.transpose(2, 0, 1) # CHW

dataarray: xr.DataArray = xr.DataArray(
data=array,
coords={
"band": np.arange(channels, dtype=np.uint8),
"y": y_coords,
"x": x_coords,
},
name=None,
attrs=None,
)

return dataarray.to_dataset(name="raster")

def guess_can_open(self, filename_or_obj):
try:
_, ext = os.path.splitext(filename_or_obj)
except TypeError:
return False
return ext in {".tif", ".tiff"}
34 changes: 34 additions & 0 deletions cupy_xarray/tests/test_cog3pio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""
Tests for xarray 'cog3pio' backend engine.
"""

import cupy as cp
import pytest
import xarray as xr

from cupy_xarray.cog3pio import Cog3pioBackendEntrypoint

cog3pio = pytest.importorskip("cog3pio")


def test_entrypoint():
assert "cog3pio" in xr.backends.list_engines()


def test_xarray_backend_open_dataarray():
"""
Ensure that passing engine='cog3pio' to xarray.open_dataarray works to read a
Cloud-optimized GeoTIFF from a http url.
"""
with xr.open_dataarray(
filename_or_obj="https://github.com/developmentseed/titiler/raw/1.2.0/src/titiler/mosaic/tests/fixtures/TCI.tif",
engine=Cog3pioBackendEntrypoint,
device_id=0,
) as da:
assert isinstance(da.data, cp.ndarray)
assert da.sizes == {"band": 3, "y": 1098, "x": 1098}
assert da.x.min() == 700010.0
assert da.x.max() == 809710.0
assert da.y.min() == 3490250.0
assert da.y.max() == 3599950.0
assert da.dtype == "uint8"
16 changes: 16 additions & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,19 @@ Methods

Dataset.cupy.as_cupy
Dataset.cupy.as_numpy


Backends
--------

cog3pio
~~~~~~~

.. currentmodule:: cupy_xarray

.. automodule:: cupy_xarray.cog3pio

.. autosummary::
:toctree: generated/

Cog3pioBackendEntrypoint
5 changes: 3 additions & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,10 @@
nb_execution_mode = "off"

intersphinx_mapping = {
"python": ("https://docs.python.org/3/", None),
"dask": ("https://docs.dask.org/en/latest", None),
"cog3pio": ("https://cog3pio.readthedocs.io/en/latest", None),
"cupy": ("https://docs.cupy.dev/en/latest", None),
"dask": ("https://docs.dask.org/en/latest", None),
"python": ("https://docs.python.org/3/", None),
"xarray": ("http://docs.xarray.dev/en/latest/", None),
}

Expand Down
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ dependencies = [
"xarray>=2024.02.0",
]

[project.entry-points."xarray.backends"]
cog3pio = "cupy_xarray.cog3pio:Cog3pioBackendEntrypoint"

[project.optional-dependencies]
test = [
"dask",
Expand Down