From 19fbcb92bfad0201e40a3e6493d8f5afa92a66d4 Mon Sep 17 00:00:00 2001 From: Sean Date: Mon, 1 Jun 2026 11:08:06 -0400 Subject: [PATCH 1/3] First pass at zarr ZipStore io support --- src/spatialdata/_io/_utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/spatialdata/_io/_utils.py b/src/spatialdata/_io/_utils.py index 3be56d67..1c9ec287 100644 --- a/src/spatialdata/_io/_utils.py +++ b/src/spatialdata/_io/_utils.py @@ -23,7 +23,7 @@ from upath import UPath from upath.implementations.local import PosixUPath, WindowsUPath from xarray import DataArray, DataTree -from zarr.storage import FsspecStore, LocalStore +from zarr.storage import FsspecStore, LocalStore, ZipStore from spatialdata._core.spatialdata import SpatialData from spatialdata._io.format import RasterFormatType, RasterFormatV01, RasterFormatV02, RasterFormatV03 @@ -498,6 +498,9 @@ def _resolve_zarr_store( # if the input is a local path, use LocalStore return LocalStore(path.path) + if isinstance(path.store, ZipStore): + path = zarr.open_group(store=store, mode='r') + if isinstance(path, zarr.Group): # if the input is a zarr.Group, wrap it with a store if isinstance(path.store, LocalStore): From eb1ea7ea55bdfc6dd202d8eb13f10f2975aef37b Mon Sep 17 00:00:00 2001 From: pennycuda <141153875+pennycuda@users.noreply.github.com> Date: Mon, 1 Jun 2026 14:44:01 -0400 Subject: [PATCH 2/3] allow zipped zarr files as inputs for read_zarr() --- src/spatialdata/_core/spatialdata.py | 5 +++- src/spatialdata/_io/_utils.py | 12 ++++++++-- src/spatialdata/_io/io_points.py | 9 ++++++-- src/spatialdata/_io/io_raster.py | 15 ++++++++---- src/spatialdata/_io/io_shapes.py | 34 ++++++++++++++++++++++++---- src/spatialdata/_io/io_table.py | 16 +++++++++---- src/spatialdata/_io/io_zarr.py | 18 +++++++++++---- 7 files changed, 86 insertions(+), 23 deletions(-) diff --git a/src/spatialdata/_core/spatialdata.py b/src/spatialdata/_core/spatialdata.py index fb55ab08..396f10dc 100644 --- a/src/spatialdata/_core/spatialdata.py +++ b/src/spatialdata/_core/spatialdata.py @@ -7,6 +7,7 @@ from collections.abc import Generator, Mapping from itertools import chain from pathlib import Path +import shutil from typing import TYPE_CHECKING, Any, Literal import pandas as pd @@ -1175,10 +1176,11 @@ def write( if isinstance(file_path, str): file_path = Path(file_path) + self._validate_can_safely_write_to_path(file_path, overwrite=overwrite) + store = _resolve_zarr_store(file_path) self._validate_all_elements() - store = _resolve_zarr_store(file_path) zarr_format = parsed["SpatialData"].zarr_format zarr_group = zarr.create_group(store=store, overwrite=overwrite, zarr_format=zarr_format) self.write_attrs(zarr_group=zarr_group, sdata_format=parsed["SpatialData"]) @@ -1202,6 +1204,7 @@ def write( if consolidate_metadata: self.write_consolidated_metadata() + def _write_element( self, element: SpatialElement | AnnData, diff --git a/src/spatialdata/_io/_utils.py b/src/spatialdata/_io/_utils.py index 1c9ec287..8822ba61 100644 --- a/src/spatialdata/_io/_utils.py +++ b/src/spatialdata/_io/_utils.py @@ -495,11 +495,17 @@ def _resolve_zarr_store( path = UPath(path) if isinstance(path, PosixUPath | WindowsUPath): + # if it is a zipped store, use ZipStore + if path.suffix == '.zip': + store = ZipStore(path.path) + store.root = path.path + return store # if the input is a local path, use LocalStore return LocalStore(path.path) - if isinstance(path.store, ZipStore): - path = zarr.open_group(store=store, mode='r') + if isinstance(path, ZipStore): + path.root =path.path + return path if isinstance(path, zarr.Group): # if the input is a zarr.Group, wrap it with a store @@ -513,6 +519,8 @@ def _resolve_zarr_store( if isinstance(path.store, zarr.storage.ConsolidatedMetadataStore): # if the store is a ConsolidatedMetadataStore, just return the underlying FSSpec store return path.store.store + if isinstance(path.store, ZipStore): + return ZipStore(path.store) raise ValueError(f"Unsupported store type or zarr.Group: {type(path.store)}") if isinstance(path, zarr.storage.StoreLike): # if the input already a store, wrap it in an FSStore diff --git a/src/spatialdata/_io/io_points.py b/src/spatialdata/_io/io_points.py index 03ef3338..107da85f 100644 --- a/src/spatialdata/_io/io_points.py +++ b/src/spatialdata/_io/io_points.py @@ -6,6 +6,7 @@ from dask.dataframe import DataFrame as DaskDataFrame from dask.dataframe import read_parquet from ome_zarr.format import Format +from zarr.core.group import Group as ZarrGroup from spatialdata._io._utils import ( _get_transformations_from_ngff_dict, @@ -21,10 +22,14 @@ def _read_points( - store: str | Path, + store: str | Path | ZarrGroup, ) -> DaskDataFrame: """Read points from a zarr store.""" - f = zarr.open(Path(store), mode="r") # Path avoids zarr v3 URL-parsing special chars (e.g. #) in names + # fix for zipstore + if isinstance(store, ZarrGroup): + f = store + else: + f = zarr.open(Path(store), mode="r") # Path avoids zarr v3 URL-parsing special chars (e.g. #) in names version = _parse_version(f, expect_attrs_key=True) assert version is not None diff --git a/src/spatialdata/_io/io_raster.py b/src/spatialdata/_io/io_raster.py index 2feb7a77..018f481c 100644 --- a/src/spatialdata/_io/io_raster.py +++ b/src/spatialdata/_io/io_raster.py @@ -8,7 +8,7 @@ import numpy as np import zarr from ome_zarr.format import Format -from ome_zarr.io import ZarrLocation +from ome_zarr.io import ZarrLocation, parse_url from ome_zarr.reader import Multiscales, Node, Reader from ome_zarr.types import JSONDict from ome_zarr.writer import _get_valid_axes @@ -160,13 +160,18 @@ def _prepare_storage_options( def _read_multiscale( - store: str | Path, raster_type: Literal["image", "labels"], reader_format: Format + store: str | Path | zarr.storage.ZipStore, raster_type: Literal["image", "labels"], reader_format: Format ) -> DataArray | DataTree: - assert isinstance(store, str | Path) + assert isinstance(store, str | Path | zarr.storage.ZipStore | zarr.Group) assert raster_type in ["image", "labels"] - nodes: list[Node] = [] - image_loc = ZarrLocation(store, fmt=reader_format) + # instantiate an internal subpath for zipstores + internal_subpath = "" + + image_loc = parse_url(store, fmt=reader_format) + + if internal_subpath: + image_loc.internal_subpath = internal_subpath if exists := image_loc.exists(): image_reader = Reader(image_loc)() image_nodes = list(image_reader) diff --git a/src/spatialdata/_io/io_shapes.py b/src/spatialdata/_io/io_shapes.py index 3b6e18e3..2c5669ab 100644 --- a/src/spatialdata/_io/io_shapes.py +++ b/src/spatialdata/_io/io_shapes.py @@ -9,6 +9,7 @@ from natsort import natsorted from ome_zarr.format import Format from shapely import from_ragged_array, to_ragged_array +from zarr.core.group import Group as ZarrGroup from spatialdata._io._utils import ( _get_transformations_from_ngff_dict, @@ -31,10 +32,14 @@ def _read_shapes( - store: str | Path, + store: str | Path | ZarrGroup, ) -> GeoDataFrame: """Read shapes from a zarr store.""" - f = zarr.open(Path(store), mode="r") # Path avoids zarr v3 URL-parsing special chars (e.g. #) in names + # fix for zipstore + if isinstance(store, ZarrGroup): + f = store + else: + f = zarr.open(Path(store), mode="r") # Path avoids zarr v3 URL-parsing special chars (e.g. #) in names version = _parse_version(f, expect_attrs_key=True) assert version is not None shape_format = ShapesFormats[version] @@ -54,9 +59,28 @@ def _read_shapes( geometry = from_ragged_array(typ, coords, offsets) geo_df = GeoDataFrame({"geometry": geometry}, index=index) elif isinstance(shape_format, ShapesFormatV02 | ShapesFormatV03): - store_root = f.store_path.store.root - path = Path(store_root) / f.path / "shapes.parquet" - geo_df = read_parquet(path) + # fix for zipstores + if isinstance(f.store, zarr.storage.ZipStore): + import io + + target_key = f"{f.path}/shapes.parquet" if f.path else "shapes.parquet" + target_key = target_key.strip('/') + if hasattr(f.store, "_zf") and f.store._zf is not None: + parquet_bytes = f.store._zf.read(target_key) + else: + from zarr.core.buffer import default_buffer_prototype + from zarr.core.sync import sync + + buffer_obj = sync(f.store.get(target_key, prototype=default_buffer_prototype())) + parquet_bytes = buffer_obj.to_bytes() if buffer_obj else None + if parquet_bytes is None: + raise FileNotFoundError(f"Could not extract shapes.parquet inside zipped group path: {target_key}") + geo_df = read_parquet(io.BytesIO(parquet_bytes)) + # original method + else: + store_root = f.store_path.store.root + path = Path(store_root) / f.path / "shapes.parquet" + geo_df = read_parquet(path) else: raise ValueError( f"Unsupported shapes format {shape_format} from version {version}. Please update the spatialdata library." diff --git a/src/spatialdata/_io/io_table.py b/src/spatialdata/_io/io_table.py index 3eb4b092..c9376eba 100644 --- a/src/spatialdata/_io/io_table.py +++ b/src/spatialdata/_io/io_table.py @@ -8,6 +8,7 @@ from anndata import read_zarr as read_anndata_zarr from anndata._io.specs import write_elem as write_adata from ome_zarr.format import Format +from zarr.core.group import Group as ZarrGroup from spatialdata._io.format import ( CurrentTablesFormat, @@ -19,15 +20,22 @@ from spatialdata.models import TableModel, get_table_keys -def _read_table(store: str | Path) -> AnnData: - table = read_anndata_zarr(str(store)) +def _read_table(store: str | Path | ZarrGroup) -> AnnData: + # fix for zipstore + if isinstance(store, ZarrGroup): + f = store + table = read_anndata_zarr(f) + else: + table = read_anndata_zarr(str(store)) + f = zarr.open(Path(store), mode="r") # Path avoids zarr v3 URL-parsing special chars (e.g. #) in names - f = zarr.open(Path(store), mode="r") # Path avoids zarr v3 URL-parsing special chars (e.g. #) in names version = _parse_version(f, expect_attrs_key=False) assert version is not None table_format = TablesFormats[version] - f.store.close() + # safely close non zipstores + if not isinstance(store, ZarrGroup): + f.store.close() if isinstance(table_format, TablesFormatV01 | TablesFormatV02): if TableModel.ATTRS_KEY in table.uns: diff --git a/src/spatialdata/_io/io_zarr.py b/src/spatialdata/_io/io_zarr.py index 4c410fab..a8cd4f67 100644 --- a/src/spatialdata/_io/io_zarr.py +++ b/src/spatialdata/_io/io_zarr.py @@ -32,7 +32,7 @@ def _read_zarr_group_spatialdata_element( root_group: zarr.Group, - root_store_path: str, + root_store_path: str | zarr.storage.Store, sdata_version: Literal["0.1", "0.2"], selector: set[str], read_func: Callable[..., Any], @@ -54,7 +54,12 @@ def _read_zarr_group_spatialdata_element( # skip hidden files like .zgroup or .zmetadata continue elem_group = group[subgroup_name] - elem_group_path = os.path.join(root_store_path, elem_group.path) + # fix for zipstores + if isinstance(root_store_path, zarr.storage.ZipStore): + elem_group_path = elem_group + # original functionality + else: + elem_group_path = os.path.join(root_store_path, elem_group.path) with handle_read_errors( on_bad_files, location=f"{group.path}/{subgroup_name}", @@ -202,9 +207,10 @@ def read_zarr( element_type, element_container, ) in group_readers.items(): + path_or_store = root_group.store if isinstance(root_group.store, zarr.storage.ZipStore) else root_store_path _read_zarr_group_spatialdata_element( root_group=root_group, - root_store_path=root_store_path, + root_store_path=path_or_store, sdata_version=sdata_version, selector=selector, read_func=read_func, @@ -231,7 +237,11 @@ def read_zarr( tables=tables, attrs=attrs, ) - sdata.path = resolved_store.root + # fix for zipstores + if isinstance(resolved_store.root, str): + sdata.path = Path(resolved_store.root) + else: + sdata.path = resolved_store.root return sdata From b6f99829b0350b38697b4066ce4b946d3524ec75 Mon Sep 17 00:00:00 2001 From: pennycuda <141153875+pennycuda@users.noreply.github.com> Date: Wed, 3 Jun 2026 16:37:44 -0400 Subject: [PATCH 3/3] run pre-commit --- src/spatialdata/_core/spatialdata.py | 2 -- src/spatialdata/_io/_utils.py | 4 ++-- src/spatialdata/_io/io_points.py | 7 +++---- src/spatialdata/_io/io_raster.py | 2 +- src/spatialdata/_io/io_shapes.py | 9 ++++----- 5 files changed, 10 insertions(+), 14 deletions(-) diff --git a/src/spatialdata/_core/spatialdata.py b/src/spatialdata/_core/spatialdata.py index 396f10dc..5ca3065c 100644 --- a/src/spatialdata/_core/spatialdata.py +++ b/src/spatialdata/_core/spatialdata.py @@ -7,7 +7,6 @@ from collections.abc import Generator, Mapping from itertools import chain from pathlib import Path -import shutil from typing import TYPE_CHECKING, Any, Literal import pandas as pd @@ -1204,7 +1203,6 @@ def write( if consolidate_metadata: self.write_consolidated_metadata() - def _write_element( self, element: SpatialElement | AnnData, diff --git a/src/spatialdata/_io/_utils.py b/src/spatialdata/_io/_utils.py index 8822ba61..4cf9c938 100644 --- a/src/spatialdata/_io/_utils.py +++ b/src/spatialdata/_io/_utils.py @@ -496,7 +496,7 @@ def _resolve_zarr_store( if isinstance(path, PosixUPath | WindowsUPath): # if it is a zipped store, use ZipStore - if path.suffix == '.zip': + if path.suffix == ".zip": store = ZipStore(path.path) store.root = path.path return store @@ -504,7 +504,7 @@ def _resolve_zarr_store( return LocalStore(path.path) if isinstance(path, ZipStore): - path.root =path.path + path.root = path.path return path if isinstance(path, zarr.Group): diff --git a/src/spatialdata/_io/io_points.py b/src/spatialdata/_io/io_points.py index 107da85f..c56ac41e 100644 --- a/src/spatialdata/_io/io_points.py +++ b/src/spatialdata/_io/io_points.py @@ -26,10 +26,9 @@ def _read_points( ) -> DaskDataFrame: """Read points from a zarr store.""" # fix for zipstore - if isinstance(store, ZarrGroup): - f = store - else: - f = zarr.open(Path(store), mode="r") # Path avoids zarr v3 URL-parsing special chars (e.g. #) in names + f = ( + store if isinstance(store, ZarrGroup) else zarr.open(Path(store), mode="r") + ) # Path avoids zarr v3 URL-parsing special chars (e.g. #) in names version = _parse_version(f, expect_attrs_key=True) assert version is not None diff --git a/src/spatialdata/_io/io_raster.py b/src/spatialdata/_io/io_raster.py index 018f481c..15908d17 100644 --- a/src/spatialdata/_io/io_raster.py +++ b/src/spatialdata/_io/io_raster.py @@ -8,7 +8,7 @@ import numpy as np import zarr from ome_zarr.format import Format -from ome_zarr.io import ZarrLocation, parse_url +from ome_zarr.io import parse_url from ome_zarr.reader import Multiscales, Node, Reader from ome_zarr.types import JSONDict from ome_zarr.writer import _get_valid_axes diff --git a/src/spatialdata/_io/io_shapes.py b/src/spatialdata/_io/io_shapes.py index 2c5669ab..73736c6d 100644 --- a/src/spatialdata/_io/io_shapes.py +++ b/src/spatialdata/_io/io_shapes.py @@ -36,10 +36,9 @@ def _read_shapes( ) -> GeoDataFrame: """Read shapes from a zarr store.""" # fix for zipstore - if isinstance(store, ZarrGroup): - f = store - else: - f = zarr.open(Path(store), mode="r") # Path avoids zarr v3 URL-parsing special chars (e.g. #) in names + f = ( + store if isinstance(store, ZarrGroup) else zarr.open(Path(store), mode="r") + ) # Path avoids zarr v3 URL-parsing special chars (e.g. #) in names version = _parse_version(f, expect_attrs_key=True) assert version is not None shape_format = ShapesFormats[version] @@ -64,7 +63,7 @@ def _read_shapes( import io target_key = f"{f.path}/shapes.parquet" if f.path else "shapes.parquet" - target_key = target_key.strip('/') + target_key = target_key.strip("/") if hasattr(f.store, "_zf") and f.store._zf is not None: parquet_bytes = f.store._zf.read(target_key) else: