Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,11 @@ classifiers = [
]

dependencies = [
"arraybridge>=0.2.9",
"numpy>=1.26.0",
"portalocker>=2.8.0", # Cross-platform file locking
"metaclass-registry",
"imageio>=2.37.0",
"zarr>=2.18.0,<3.0", # Required for ZarrStorageBackend
"ome-zarr>=0.11.0", # Required for OME-ZARR HCS compliance
]
Expand Down Expand Up @@ -197,4 +199,4 @@ ignore = [
]

[tool.ruff.per-file-ignores]
"__init__.py" = ["F401"] # unused imports
"__init__.py" = ["F401"] # unused imports
6 changes: 4 additions & 2 deletions src/polystore/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@
get_backend,
)
from .constants import Backend, MemoryType, TransportMode
from .disk import DiskStorageBackend
from .disk import DiskBackend, DiskStorageBackend
from .filemanager import FileManager
from .formats import FileFormat, DEFAULT_IMAGE_EXTENSIONS
from .memory import MemoryStorageBackend
from .memory import MemoryBackend, MemoryStorageBackend
from .metadata_writer import (
AtomicMetadataWriter,
MetadataWriteError,
Expand Down Expand Up @@ -76,7 +76,9 @@
"register_cleanup_callback",
"STORAGE_BACKENDS",
"DiskStorageBackend",
"DiskBackend",
"MemoryStorageBackend",
"MemoryBackend",
"FileManager",
"file_lock",
"atomic_write_json",
Expand Down
3 changes: 1 addition & 2 deletions src/polystore/backend_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def create_storage_registry() -> Dict[str, DataSink]:

# Backends that require context-specific initialization (e.g., plate_root)
# These are registered lazily when needed, not at startup
SKIP_BACKENDS = {'virtual_workspace', 'omero_local'}
SKIP_BACKENDS = {'virtual_workspace', 'omero_local', 'bioformats'}

registry = {}
for backend_type in STORAGE_BACKENDS.keys():
Expand Down Expand Up @@ -157,4 +157,3 @@ def cleanup_all_backends() -> None:

_backend_instances.clear()
logger.info("All backend instances cleaned up")

13 changes: 7 additions & 6 deletions src/polystore/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,15 +546,16 @@ def reset_memory_backend() -> None:
# Clear files from existing memory backend while preserving directories
memory_backend = storage_registry[Backend.MEMORY.value]

# DEBUG: Log what's in memory before clearing
existing_keys = list(memory_backend._memory_store.keys())
logger.info(f"🔍 VFS_CLEAR: Memory backend has {len(existing_keys)} entries BEFORE clear")
logger.info(f"🔍 VFS_CLEAR: First 10 keys: {existing_keys[:10]}")
logger.debug("Memory backend has %s entries before clear", len(existing_keys))
logger.debug("First memory backend keys before clear: %s", existing_keys[:10])

memory_backend.clear_files_only()

# DEBUG: Log what's in memory after clearing
remaining_keys = list(memory_backend._memory_store.keys())
logger.info(f"🔍 VFS_CLEAR: Memory backend has {len(remaining_keys)} entries AFTER clear (directories only)")
logger.info(f"🔍 VFS_CLEAR: First 10 remaining keys: {remaining_keys[:10]}")
logger.debug(
"Memory backend has %s entries after clear (directories only)",
len(remaining_keys),
)
logger.debug("First memory backend keys after clear: %s", remaining_keys[:10])
logger.info("Memory backend reset - files cleared, directories preserved")
223 changes: 223 additions & 0 deletions src/polystore/bioformats_java.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,223 @@
"""Shared Java Bio-Formats bridge for metadata discovery and plane loading."""

from __future__ import annotations

from dataclasses import dataclass
from pathlib import Path
from threading import Lock
from typing import Any, Callable

import numpy as np


class BioFormatsJavaUnavailableError(RuntimeError):
"""Raised when the Java Bio-Formats runtime cannot be initialized."""


@dataclass(frozen=True, slots=True)
class BioFormatsOpenedReader:
"""Open Bio-Formats reader plus its OME metadata store."""

reader: Any
metadata: Any

def close(self) -> None:
self.reader.close()


class BioFormatsJavaContext:
"""Lazy JVM/ImageJ context for Bio-Formats Java access."""

_lock = Lock()
_instance: "BioFormatsJavaContext | None" = None

def __init__(self, imagej_module: Any, scyjava_module: Any):
self.imagej = imagej_module
self.scyjava = scyjava_module
self.ij = None
self.ImageReader = None
self.MetadataTools = None
self.FormatTools = None

@classmethod
def instance(cls) -> "BioFormatsJavaContext":
with cls._lock:
if cls._instance is None:
cls._instance = cls._create()
return cls._instance

@classmethod
def _create(cls) -> "BioFormatsJavaContext":
try:
import imagej
import scyjava
except ImportError as exc:
raise BioFormatsJavaUnavailableError(
"Bio-Formats support requires the optional bioformats/fiji dependencies."
) from exc
return cls(imagej, scyjava)

def ensure_initialized(self) -> None:
if self.ij is not None:
return
try:
self.ij = self.imagej.init("sc.fiji:fiji", mode="headless")
self.ImageReader = self.scyjava.jimport("loci.formats.ImageReader")
self.MetadataTools = self.scyjava.jimport("loci.formats.MetadataTools")
self.FormatTools = self.scyjava.jimport("loci.formats.FormatTools")
except Exception as exc:
raise BioFormatsJavaUnavailableError(
"Could not initialize Fiji/Bio-Formats through pyimagej."
) from exc

def open_reader(self, source_path: str | Path) -> BioFormatsOpenedReader:
self.ensure_initialized()
metadata = self.MetadataTools.createOMEXMLMetadata()
reader = self.ImageReader()
try:
reader.setMetadataStore(metadata)
reader.setId(str(source_path))
return BioFormatsOpenedReader(reader=reader, metadata=metadata)
except Exception:
reader.close()
raise


def java_int(value: Any) -> int | None:
"""Convert nullable Java primitive wrappers to Python int."""
return OptionalJavaScalar.from_java(value, JAVA_SCALAR_PROJECTOR.readers).convert(int)


def java_float(value: Any) -> float | None:
"""Convert nullable Java numeric wrappers to Python float."""
return OptionalJavaScalar.from_java(value, JAVA_SCALAR_PROJECTOR.readers).convert(float)


def java_str(value: Any) -> str | None:
"""Convert nullable Java strings to Python strings."""
if value is None:
return None
return str(value)


def _read_java_value(value: Any) -> Any:
return value.value()


def _read_java_get_value(value: Any) -> Any:
return value.getValue()


@dataclass(frozen=True, slots=True)
class JavaScalarProjector:
"""Project nullable Java scalar wrappers to Python scalar values."""

readers: tuple[Callable[[Any], Any], ...]

def unwrap(self, value: Any) -> Any:
for reader in self.readers:
try:
return reader(value)
except AttributeError:
continue
return value


@dataclass(frozen=True, slots=True)
class OptionalJavaScalar:
"""Nullable Java scalar after wrapper unwrapping."""

value: Any | None

@classmethod
def from_java(
cls,
value: Any,
readers: tuple[Callable[[Any], Any], ...],
) -> "OptionalJavaScalar":
if value is None:
return cls(None)
return cls(JavaScalarProjector(readers).unwrap(value))

def convert(self, converter: Callable[[Any], Any]) -> Any | None:
if self.value is None:
return None
return converter(self.value)


JAVA_SCALAR_PROJECTOR = JavaScalarProjector(
readers=(
_read_java_value,
_read_java_get_value,
)
)


def load_bioformats_plane(
*,
source_path: Path,
series_index: int,
plane_index: int,
) -> np.ndarray:
"""Load a single 2D Bio-Formats plane through the Java ImageReader."""
context = BioFormatsJavaContext.instance()
opened = context.open_reader(source_path)
reader = opened.reader
try:
reader.setSeries(series_index)
if reader.getRGBChannelCount() != 1:
raise ValueError(
"Bio-Formats RGB/interleaved planes are not yet representable as "
"OpenHCS scalar channel planes."
)
raw = bytes(reader.openBytes(plane_index))
dtype = PixelDtypeCatalog.from_format_tools(context.FormatTools).dtype(
pixel_type=int(reader.getPixelType()),
little_endian=bool(reader.isLittleEndian()),
)
array = np.frombuffer(raw, dtype=dtype)
return array.reshape((int(reader.getSizeY()), int(reader.getSizeX())))
finally:
opened.close()


@dataclass(frozen=True, slots=True)
class PixelDtypeSpec:
"""NumPy dtype projection for one Bio-Formats pixel type."""

key: int
dtype_code: str
endian_sensitive: bool = True

def dtype(self, *, little_endian: bool) -> np.dtype:
if not self.endian_sensitive:
return np.dtype(self.dtype_code)
endian = "<" if little_endian else ">"
return np.dtype(endian + self.dtype_code)


@dataclass(frozen=True, slots=True)
class PixelDtypeCatalog:
"""Authoritative Bio-Formats pixel-type to NumPy dtype mapping."""

specs_by_key: dict[int, PixelDtypeSpec]

@classmethod
def from_format_tools(cls, format_tools: Any) -> "PixelDtypeCatalog":
specs = (
PixelDtypeSpec(int(format_tools.INT8), "i1", endian_sensitive=False),
PixelDtypeSpec(int(format_tools.UINT8), "u1", endian_sensitive=False),
PixelDtypeSpec(int(format_tools.INT16), "i2"),
PixelDtypeSpec(int(format_tools.UINT16), "u2"),
PixelDtypeSpec(int(format_tools.INT32), "i4"),
PixelDtypeSpec(int(format_tools.UINT32), "u4"),
PixelDtypeSpec(int(format_tools.FLOAT), "f4"),
PixelDtypeSpec(int(format_tools.DOUBLE), "f8"),
)
return cls({spec.key: spec for spec in specs})

def dtype(self, *, pixel_type: int, little_endian: bool) -> np.dtype:
try:
return self.specs_by_key[pixel_type].dtype(little_endian=little_endian)
except KeyError as exc:
raise ValueError(f"Unsupported Bio-Formats pixel type: {pixel_type}") from exc
Loading
Loading