Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 125 additions & 0 deletions src/mdio/builder/templates/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from mdio.builder.schemas.v1.units import AllUnitModel
from mdio.builder.schemas.v1.variable import CoordinateMetadata
from mdio.builder.schemas.v1.variable import VariableMetadata
from mdio.builder.templates.types import CoordinateSpec

if TYPE_CHECKING:
from mdio.builder.schemas.v1.dataset import Dataset
Expand All @@ -43,6 +44,13 @@ def __init__(self, data_domain: SeismicDataDomain) -> None:
self._physical_coord_names: tuple[str, ...] = ()
self._logical_coord_names: tuple[str, ...] = ()
self._var_chunk_shape: tuple[int, ...] = ()
self.synthesize_missing_dims: tuple[str, ...] = ()

# TEMPORARY (removed with declare_coordinate_specs): set when grid overrides mutate this
# template in-place (dims collapsed into 'trace', extra coordinates added). Once mutated,
# the runtime layout intentionally diverges from the static declare_coordinate_specs()
# contract, so the drift guard in build_dataset() must not run.
self._grid_overrides_applied: bool = False

self._builder: MDIODatasetBuilder | None = None
self._dim_sizes: tuple[int, ...] = ()
Expand All @@ -67,6 +75,45 @@ def _repr_html_(self) -> str:
"""Return an HTML representation of the template for Jupyter notebooks."""
return template_repr_html(self)

def declare_coordinate_specs(self) -> tuple[CoordinateSpec, ...]:
"""Declare the non-dimension coordinate specs (name, dims, dtype) for this template.

The ingestion ``SchemaResolver`` uses these specs to determine which trace-header
fields to read and how to rewrite coordinate dimensions under grid overrides.

.. note::
TEMPORARY (to be removed before the next minor release): these specs currently
duplicate the non-dimension coordinates created in :meth:`_add_coordinates`.
:meth:`build_dataset` validates that the two stay in sync (see
:meth:`_validate_declared_coordinate_specs`). Once the ingestion pipeline builds
datasets directly from the resolved schema, ``_add_coordinates`` will be derived
from these specs and the duplication will disappear.

The default implementation assumes every non-dimension coordinate spans **all**
spatial dimensions. Subclasses whose coordinates span only a subset (or use a
non-default dtype) must override this method, otherwise ``build_dataset`` raises.

Returns:
The declared non-dimension coordinate specs.
"""
specs = [
CoordinateSpec(
name=coord_name,
dimensions=self.spatial_dimension_names,
dtype=ScalarType.FLOAT64,
)
for coord_name in self.physical_coordinate_names
]
specs.extend(
CoordinateSpec(
name=coord_name,
dimensions=self.spatial_dimension_names,
dtype=ScalarType.UINT8 if coord_name == "gun" else ScalarType.INT32,
)
for coord_name in self.logical_coordinate_names
)
return tuple(specs)

def build_dataset(
self,
name: str,
Expand Down Expand Up @@ -107,6 +154,10 @@ def build_dataset(
except ValueError as exc: # coordinate may already exist
if "same name twice" not in str(exc):
raise
# Skip the static drift guard when grid overrides have transformed the template: the
# runtime layout no longer matches the declared (override-free) specs by design.
if not self._grid_overrides_applied:
self._validate_declared_coordinate_specs()
self._add_variables()
self._add_trace_mask()

Expand All @@ -123,6 +174,80 @@ def add_units(self, units: dict[str, AllUnitModel]) -> None:
raise ValueError(msg)
self._units |= units

def apply_resolved_dimensions(
self,
dim_names: tuple[str, ...],
chunk_shape: tuple[int, ...],
) -> None:
"""Update the template's dimension layout from a resolved schema.

Supported entry point for the ingestion pipeline to push back dimension names
and chunk shape after the SchemaResolver has applied grid overrides
(e.g. NonBinned, HasDuplicates), instead of mutating private attributes.

Args:
dim_names: Final ordered dimension names.
chunk_shape: Chunk shape matching ``dim_names`` length.

Raises:
ValueError: If ``len(chunk_shape) != len(dim_names)``.
"""
if len(chunk_shape) != len(dim_names):
msg = f"chunk_shape length {len(chunk_shape)} does not match dim_names length {len(dim_names)}"
raise ValueError(msg)
self._dim_names = tuple(dim_names)
self._var_chunk_shape = tuple(chunk_shape)

def _validate_declared_coordinate_specs(self) -> None:
"""Fail the build if :meth:`declare_coordinate_specs` drifted from the built coordinates.

TEMPORARY (to be removed before the next minor release): while
:meth:`declare_coordinate_specs` duplicates the non-dimension coordinates created in
:meth:`_add_coordinates`, this guard ensures the two never diverge in name, dimensions,
or dtype. The ingestion ``SchemaResolver`` trusts the declared specs, so silent drift
would corrupt resolved schemas. The check runs for every template (built-in and
user-defined) on every ``build_dataset`` call that does not apply grid overrides. Grid
overrides mutate the template in-place (collapsing dims into ``trace`` and adding
coordinates), so the runtime layout intentionally diverges from the declared specs and
the guard is skipped for those builds. It is removed once ``_add_coordinates`` is derived
from the resolved schema and the duplication no longer exists.

Raises:
ValueError: If the declared specs do not match the built non-dimension coordinates.
"""
dim_names = set(self._dim_names)
built = {coord.name: coord for coord in self._builder._coordinates if coord.name not in dim_names}
declared = {spec.name: spec for spec in self.declare_coordinate_specs()}

if set(declared) != set(built):
built_only = sorted(set(built) - set(declared))
declared_only = sorted(set(declared) - set(built))
msg = (
f"declare_coordinate_specs() for template {self.name!r} is out of sync with the "
f"coordinates built by _add_coordinates(). Built but not declared: {built_only}. "
f"Declared but not built: {declared_only}. Override declare_coordinate_specs() so "
f"it matches the non-dimension coordinates this template creates."
)
raise ValueError(msg)

for coord_name, spec in declared.items():
coord = built[coord_name]
built_dims = tuple(dim.name for dim in coord.dimensions)
if built_dims != spec.dimensions:
msg = (
f"declare_coordinate_specs() for template {self.name!r} declares coordinate "
f"{coord_name!r} over dimensions {spec.dimensions}, but _add_coordinates() built "
f"it over {built_dims}."
)
raise ValueError(msg)
if coord.data_type != spec.dtype:
msg = (
f"declare_coordinate_specs() for template {self.name!r} declares coordinate "
f"{coord_name!r} as {spec.dtype}, but _add_coordinates() built it as "
f"{coord.data_type}."
)
raise ValueError(msg)

@property
def name(self) -> str:
"""Returns the name of the template."""
Expand Down
8 changes: 8 additions & 0 deletions src/mdio/builder/templates/seismic_2d_cdp.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from mdio.builder.schemas.v1.variable import CoordinateMetadata
from mdio.builder.templates.base import AbstractDatasetTemplate
from mdio.builder.templates.types import CdpGatherDomain
from mdio.builder.templates.types import CoordinateSpec
from mdio.builder.templates.types import SeismicDataDomain


Expand Down Expand Up @@ -35,6 +36,13 @@ def _name(self) -> str:
def _load_dataset_attributes(self) -> dict[str, Any]:
return {"surveyType": "2D", "gatherType": "cdp"}

def declare_coordinate_specs(self) -> tuple[CoordinateSpec, ...]:
"""Declare CDP-indexed X/Y coordinates for the 2D CDP gathers template."""
return (
CoordinateSpec(name="cdp_x", dimensions=("cdp",), dtype=ScalarType.FLOAT64),
CoordinateSpec(name="cdp_y", dimensions=("cdp",), dtype=ScalarType.FLOAT64),
)

def _add_coordinates(self) -> None:
# Add dimension coordinates
self._builder.add_coordinate(
Expand Down
12 changes: 12 additions & 0 deletions src/mdio/builder/templates/seismic_2d_streamer_shot.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from mdio.builder.schemas.dtype import ScalarType
from mdio.builder.schemas.v1.variable import CoordinateMetadata
from mdio.builder.templates.base import AbstractDatasetTemplate
from mdio.builder.templates.types import CoordinateSpec
from mdio.builder.templates.types import SeismicDataDomain


Expand All @@ -26,6 +27,17 @@ def _name(self) -> str:
def _load_dataset_attributes(self) -> dict[str, Any]:
return {"surveyType": "2D", "gatherType": "common_source"}

def declare_coordinate_specs(self) -> tuple[CoordinateSpec, ...]:
"""Declare shot- and receiver-indexed coordinates for the 2D streamer shot gathers template."""
shot_dim = ("shot_point",)
receiver_dims = ("shot_point", "channel")
return (
CoordinateSpec(name="source_coord_x", dimensions=shot_dim, dtype=ScalarType.FLOAT64),
CoordinateSpec(name="source_coord_y", dimensions=shot_dim, dtype=ScalarType.FLOAT64),
CoordinateSpec(name="group_coord_x", dimensions=receiver_dims, dtype=ScalarType.FLOAT64),
CoordinateSpec(name="group_coord_y", dimensions=receiver_dims, dtype=ScalarType.FLOAT64),
)

def _add_coordinates(self) -> None:
# Add dimension coordinates
for name in self._dim_names:
Expand Down
8 changes: 8 additions & 0 deletions src/mdio/builder/templates/seismic_3d_cdp.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from mdio.builder.schemas.v1.variable import CoordinateMetadata
from mdio.builder.templates.base import AbstractDatasetTemplate
from mdio.builder.templates.types import CdpGatherDomain
from mdio.builder.templates.types import CoordinateSpec
from mdio.builder.templates.types import SeismicDataDomain


Expand Down Expand Up @@ -35,6 +36,13 @@ def _name(self) -> str:
def _load_dataset_attributes(self) -> dict[str, Any]:
return {"surveyType": "3D", "gatherType": "cdp"}

def declare_coordinate_specs(self) -> tuple[CoordinateSpec, ...]:
"""Declare inline/crossline-indexed X/Y coordinates for the 3D CDP gathers template."""
return (
CoordinateSpec(name="cdp_x", dimensions=("inline", "crossline"), dtype=ScalarType.FLOAT64),
CoordinateSpec(name="cdp_y", dimensions=("inline", "crossline"), dtype=ScalarType.FLOAT64),
)

def _add_coordinates(self) -> None:
# Add dimension coordinates
self._builder.add_coordinate(
Expand Down
8 changes: 8 additions & 0 deletions src/mdio/builder/templates/seismic_3d_coca.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from mdio.builder.schemas.dtype import ScalarType
from mdio.builder.schemas.v1.variable import CoordinateMetadata
from mdio.builder.templates.base import AbstractDatasetTemplate
from mdio.builder.templates.types import CoordinateSpec
from mdio.builder.templates.types import SeismicDataDomain


Expand All @@ -26,6 +27,13 @@ def _name(self) -> str:
def _load_dataset_attributes(self) -> dict[str, Any]:
return {"surveyType": "3D", "gatherType": "common_offset_common_azimuth"}

def declare_coordinate_specs(self) -> tuple[CoordinateSpec, ...]:
"""Declare inline/crossline-indexed X/Y coordinates for the 3D CoCA gathers template."""
return (
CoordinateSpec(name="cdp_x", dimensions=("inline", "crossline"), dtype=ScalarType.FLOAT64),
CoordinateSpec(name="cdp_y", dimensions=("inline", "crossline"), dtype=ScalarType.FLOAT64),
)

def _add_coordinates(self) -> None:
# Add dimension coordinates
self._builder.add_coordinate(
Expand Down
14 changes: 14 additions & 0 deletions src/mdio/builder/templates/seismic_3d_obn.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from mdio.builder.schemas.dtype import ScalarType
from mdio.builder.schemas.v1.variable import CoordinateMetadata
from mdio.builder.templates.base import AbstractDatasetTemplate
from mdio.builder.templates.types import CoordinateSpec
from mdio.builder.templates.types import SeismicDataDomain


Expand Down Expand Up @@ -49,6 +50,19 @@ def _name(self) -> str:
def _load_dataset_attributes(self) -> dict[str, Any]:
return {"surveyType": "3D", "gatherType": "common_receiver"}

def declare_coordinate_specs(self) -> tuple[CoordinateSpec, ...]:
"""Declare receiver- and shot-indexed coordinates for the 3D OBN receiver gathers template."""
receiver_dim = ("receiver",)
shot_dims = ("shot_line", "gun", "shot_index")
return (
CoordinateSpec(name="group_coord_x", dimensions=receiver_dim, dtype=ScalarType.FLOAT64),
CoordinateSpec(name="group_coord_y", dimensions=receiver_dim, dtype=ScalarType.FLOAT64),
CoordinateSpec(name="shot_point", dimensions=shot_dims, dtype=ScalarType.UINT32),
CoordinateSpec(name="orig_field_record_num", dimensions=shot_dims, dtype=ScalarType.UINT32),
CoordinateSpec(name="source_coord_x", dimensions=shot_dims, dtype=ScalarType.FLOAT64),
CoordinateSpec(name="source_coord_y", dimensions=shot_dims, dtype=ScalarType.FLOAT64),
)

def _add_coordinates(self) -> None:
# Add dimension coordinates
# EXCLUDE: `shot_index` since it's 0-N (calculated dimension)
Expand Down
8 changes: 8 additions & 0 deletions src/mdio/builder/templates/seismic_3d_offset_tiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from mdio.builder.schemas.dtype import ScalarType
from mdio.builder.schemas.v1.variable import CoordinateMetadata
from mdio.builder.templates.base import AbstractDatasetTemplate
from mdio.builder.templates.types import CoordinateSpec
from mdio.builder.templates.types import SeismicDataDomain


Expand Down Expand Up @@ -33,6 +34,13 @@ def _name(self) -> str:
def _load_dataset_attributes(self) -> dict[str, Any]:
return {"surveyType": "3D", "gatherType": "offset_tiles"}

def declare_coordinate_specs(self) -> tuple[CoordinateSpec, ...]:
"""Declare inline/crossline-indexed X/Y coordinates for the 3D offset tiles template."""
return (
CoordinateSpec(name="cdp_x", dimensions=("inline", "crossline"), dtype=ScalarType.FLOAT64),
CoordinateSpec(name="cdp_y", dimensions=("inline", "crossline"), dtype=ScalarType.FLOAT64),
)

def _add_coordinates(self) -> None:
# Add dimension coordinates
self._builder.add_coordinate(
Expand Down
13 changes: 13 additions & 0 deletions src/mdio/builder/templates/seismic_3d_receiver_gathers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from mdio.builder.schemas.dtype import ScalarType
from mdio.builder.schemas.v1.variable import CoordinateMetadata
from mdio.builder.templates.base import AbstractDatasetTemplate
from mdio.builder.templates.types import CoordinateSpec


class Seismic3DReceiverGathersTemplate(AbstractDatasetTemplate):
Expand All @@ -32,6 +33,18 @@ def _name(self) -> str:
def _load_dataset_attributes(self) -> dict[str, Any]:
return {"surveyType": "3D", "gatherType": "receiver_gathers"}

def declare_coordinate_specs(self) -> tuple[CoordinateSpec, ...]:
"""Declare receiver- and shot-indexed coordinates for the 3D receiver gathers template."""
receiver_dim = ("receiver",)
shot_dims = ("shot_line", "shot_index")
return (
CoordinateSpec(name="receiver_x", dimensions=receiver_dim, dtype=ScalarType.FLOAT64),
CoordinateSpec(name="receiver_y", dimensions=receiver_dim, dtype=ScalarType.FLOAT64),
CoordinateSpec(name="shot_point", dimensions=shot_dims, dtype=ScalarType.UINT32),
CoordinateSpec(name="source_coord_x", dimensions=shot_dims, dtype=ScalarType.FLOAT64),
CoordinateSpec(name="source_coord_y", dimensions=shot_dims, dtype=ScalarType.FLOAT64),
)

def _add_coordinates(self) -> None:
# Add dimension coordinates
# Note: shot_index is calculated (0-N), so we don't add a coordinate for it
Expand Down
13 changes: 13 additions & 0 deletions src/mdio/builder/templates/seismic_3d_shot_receiver_line.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from mdio.builder.schemas.dtype import ScalarType
from mdio.builder.schemas.v1.variable import CoordinateMetadata
from mdio.builder.templates.base import AbstractDatasetTemplate
from mdio.builder.templates.types import CoordinateSpec
from mdio.builder.templates.types import SeismicDataDomain


Expand Down Expand Up @@ -32,6 +33,18 @@ def _name(self) -> str:
def _load_dataset_attributes(self) -> dict[str, Any]:
return {"surveyType": "3D", "gatherType": "common_source"}

def declare_coordinate_specs(self) -> tuple[CoordinateSpec, ...]:
"""Declare shot-line- and receiver-line-indexed coordinates for the 3D shot/receiver-line template."""
source_dims = ("shot_line", "shot_point")
group_dims = ("receiver_line", "receiver")
return (
CoordinateSpec(name="source_coord_x", dimensions=source_dims, dtype=ScalarType.FLOAT64),
CoordinateSpec(name="source_coord_y", dimensions=source_dims, dtype=ScalarType.FLOAT64),
CoordinateSpec(name="group_coord_x", dimensions=group_dims, dtype=ScalarType.FLOAT64),
CoordinateSpec(name="group_coord_y", dimensions=group_dims, dtype=ScalarType.FLOAT64),
CoordinateSpec(name="orig_field_record_num", dimensions=source_dims, dtype=ScalarType.UINT32),
)

def _add_coordinates(self) -> None:
# Add dimension coordinates
self._builder.add_coordinate(
Expand Down
14 changes: 14 additions & 0 deletions src/mdio/builder/templates/seismic_3d_streamer_field.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from mdio.builder.schemas.dtype import ScalarType
from mdio.builder.schemas.v1.variable import CoordinateMetadata
from mdio.builder.templates.base import AbstractDatasetTemplate
from mdio.builder.templates.types import CoordinateSpec
from mdio.builder.templates.types import SeismicDataDomain


Expand Down Expand Up @@ -38,6 +39,19 @@ def _name(self) -> str:
def _load_dataset_attributes(self) -> dict[str, Any]:
return {"surveyDimensionality": "3D", "gatherType": "common_source"}

def declare_coordinate_specs(self) -> tuple[CoordinateSpec, ...]:
"""Declare shot- and receiver-indexed coordinates for the 3D streamer field records template."""
shot_dims = ("sail_line", "gun", "shot_index")
receiver_dims = ("sail_line", "gun", "shot_index", "cable", "channel")
return (
CoordinateSpec(name="orig_field_record_num", dimensions=shot_dims, dtype=ScalarType.UINT32),
CoordinateSpec(name="shot_point", dimensions=shot_dims, dtype=ScalarType.UINT32),
CoordinateSpec(name="source_coord_x", dimensions=shot_dims, dtype=ScalarType.FLOAT64),
CoordinateSpec(name="source_coord_y", dimensions=shot_dims, dtype=ScalarType.FLOAT64),
CoordinateSpec(name="group_coord_x", dimensions=receiver_dims, dtype=ScalarType.FLOAT64),
CoordinateSpec(name="group_coord_y", dimensions=receiver_dims, dtype=ScalarType.FLOAT64),
)

def _add_coordinates(self) -> None:
# Add dimension coordinates
# EXCLUDE: `shot_index` since its 0-N
Expand Down
Loading
Loading