From 759b8c633c953e18da0cce6bc83aaae6ba29e1b4 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Wed, 3 Jun 2026 09:09:56 +0200 Subject: [PATCH 1/5] refactor: move conversion/broadcasting/alignment code to linopy/alignment.py Pure move, no behavior change. The new module owns the seam between user input and linopy's labelled arrays: - coords parsing: _coords_to_dict, _as_index, _as_multiindex - conversion: get_from_iterable, pandas_to_dataarray, numpy_to_dataarray, _named_pandas_to_dataarray, fill_missing_coords, as_dataarray - MultiIndex projection: _LevelProjection, _project_onto_multiindex_levels, _warn_implicit_projections - broadcasting: _broadcast_to_coords, broadcast_to_coords - validation: validate_alignment - symmetric alignment: align common.py keeps the general utilities (formatting, label indexes, polars helpers, decorators). Importers (model, expressions, variables, __init__, tests) updated; no re-exports. Follow-up requested by @FabianHofmann in #732. Co-Authored-By: Claude Opus 4.8 (1M context) --- linopy/__init__.py | 2 +- linopy/alignment.py | 910 ++++++++++++++++++++++++++++++++++++++++++ linopy/common.py | 880 +--------------------------------------- linopy/expressions.py | 6 +- linopy/model.py | 3 +- linopy/variables.py | 3 +- test/test_common.py | 10 +- 7 files changed, 927 insertions(+), 887 deletions(-) create mode 100644 linopy/alignment.py diff --git a/linopy/__init__.py b/linopy/__init__.py index e80e615d..b813f71d 100644 --- a/linopy/__init__.py +++ b/linopy/__init__.py @@ -12,7 +12,7 @@ # Note: For intercepting multiplications between xarray dataarrays, Variables and Expressions # we need to extend their __mul__ functions with a quick special case import linopy.monkey_patch_xarray # noqa: F401 -from linopy.common import align +from linopy.alignment import align from linopy.config import options from linopy.constants import ( EQUAL, diff --git a/linopy/alignment.py b/linopy/alignment.py new file mode 100644 index 00000000..d5f65172 --- /dev/null +++ b/linopy/alignment.py @@ -0,0 +1,910 @@ +#!/usr/bin/env python3 +""" +Conversion, broadcasting, and alignment of user input against coordinates. + +This module owns the seam between what users pass (scalars, numpy arrays, +pandas / polars objects, DataArrays) and what linopy stores (labelled +DataArrays conforming to a model's coordinates): + +- :func:`as_dataarray` — convert only (type dispatch + positional labeling). +- :func:`broadcast_to_coords` — convert and broadcast against ``coords``; + ``strict=True`` (default) raises on any mismatch, ``strict=False`` passes + mismatches through for downstream xarray alignment. +- :func:`validate_alignment` — the validation primitive behind the strict mode. +- :func:`align` — the symmetric counterpart, wrapping :func:`xarray.align` + for any number of linopy objects. + +Terminology for stacked MultiIndex dimensions: a dim has *levels* (its +component index names, e.g. ``period`` / ``timestep``) and *level +combinations* (its elements — one tuple per position, e.g. ``(2030, 't1')``). +""" + +from __future__ import annotations + +from collections.abc import Callable, Hashable, Iterable, Mapping, Sequence +from functools import partial +from typing import TYPE_CHECKING, Any, Literal, NamedTuple, overload +from warnings import warn + +import numpy as np +import pandas as pd +import polars as pl +from numpy import arange +from xarray import Coordinates, DataArray, Dataset, broadcast +from xarray import align as xr_align +from xarray.core import dtypes +from xarray.core.coordinates import CoordinateValidationError +from xarray.core.types import JoinOptions, T_Alignable +from xarray.namedarray.utils import is_dict_like + +from linopy.constants import ( + HELPER_DIMS, + EvolvingAPIWarning, +) +from linopy.types import CoordsLike, DimsLike + +if TYPE_CHECKING: + from linopy.expressions import LinearExpression, QuadraticExpression + from linopy.variables import Variable + + +def _coords_to_dict( + coords: Sequence[Sequence | pd.Index] | Mapping, + dims: DimsLike | None = None, +) -> dict[Hashable, Any]: + """ + Normalize coords to a dict mapping dim names to coordinate values. + + Container forms: + + - ``xarray.Coordinates`` → kept dim entries only (MultiIndex level + coords dropped). + - ``Mapping`` → returned as a shallow ``dict`` copy. + - sequence-of-entries → each entry handled per the rules below. + + Sequence-entry rules (``i`` is the position in ``coords``, ``dims[i]`` + is the matching entry in ``dims`` when one exists). An entry is + *unlabeled* if it's an unnamed ``pd.Index`` or a bare ``list`` / + ``tuple`` / ``range`` / ``ndarray``. + + +---------------------------------+-----------------------+-----------+ + | Entry | Naming source | Outcome | + +=================================+=======================+===========+ + | ``pd.Index`` with ``.name`` | ``.name`` | accepted | + +---------------------------------+-----------------------+-----------+ + | unlabeled entry | ``dims[i]`` | accepted | + +---------------------------------+-----------------------+-----------+ + | unlabeled entry | — (no ``dims[i]``) | skipped | + | | | — xarray | + | | | assigns | + | | | ``dim_0`` | + | | | etc. | + +---------------------------------+-----------------------+-----------+ + | ``pd.MultiIndex`` with ``.name``| ``.name`` | accepted | + +---------------------------------+-----------------------+-----------+ + | ``pd.MultiIndex`` w/o ``.name`` | ``dims[i]`` | accepted | + | | | (named on | + | | | a copy) | + +---------------------------------+-----------------------+-----------+ + | ``pd.MultiIndex`` w/o ``.name`` | — (no ``dims[i]``) | TypeError | + +---------------------------------+-----------------------+-----------+ + | anything else (e.g. DataArray) | — | TypeError | + +---------------------------------+-----------------------+-----------+ + """ + if isinstance(coords, Coordinates): + # Coordinates iterates over every coord variable, including + # MultiIndex level coords. Keep only the entries that are dims. + return {d: coords[d] for d in coords.dims if d in coords} + if isinstance(coords, Mapping): + return dict(coords) + dim_names: list[Any] | None = None + if dims is not None: + dim_names = list(dims) if isinstance(dims, list | tuple) else [dims] + result: dict[Hashable, Any] = {} + for i, c in enumerate(coords): + if isinstance(c, pd.MultiIndex): + name = c.name or ( + dim_names[i] if dim_names and i < len(dim_names) else None + ) + if name is None: + raise TypeError( + "MultiIndex coords entries must have .name set so " + "xarray can use it as the dimension name. Set it via " + "`idx.name = 'my_dim'`, or pass `dims=[...]` to name " + "entries by position." + ) + if c.name is None: + c = c.copy() + c.name = name + result[name] = c + elif isinstance(c, pd.Index): + name = ( + c.name + if c.name + else (dim_names[i] if dim_names and i < len(dim_names) else None) + ) + if name is not None: + result[name] = c + elif isinstance(c, list | tuple | range | np.ndarray): + if dim_names and i < len(dim_names): + result[dim_names[i]] = pd.Index(c, name=dim_names[i]) + else: + raise TypeError( + f"coords entries must be pd.Index or an unnamed sequence " + f"(list / tuple / range / numpy.ndarray); got " + f"{type(c).__name__}. For an xarray DataArray coord, pass " + f"`variable.indexes[]` (a pd.Index) instead." + ) + return result + + +def _as_index(coord_values: Any) -> pd.Index: + return ( + coord_values if isinstance(coord_values, pd.Index) else pd.Index(coord_values) + ) + + +def _as_multiindex(coord_values: Any) -> pd.MultiIndex | None: + """Return the backing ``pd.MultiIndex`` of a coords entry, or ``None``.""" + if isinstance(coord_values, pd.MultiIndex): + return coord_values + if isinstance(coord_values, DataArray): + idx = coord_values.to_index() + if isinstance(idx, pd.MultiIndex): + return idx + return None + + +def get_from_iterable(lst: DimsLike | None, index: int) -> Any | None: + """ + Returns the element at the specified index of the list, or None if the index + is out of bounds. + """ + if lst is None: + return None + if isinstance(lst, Sequence | Iterable): + lst = list(lst) + else: + lst = [lst] + return lst[index] if 0 <= index < len(lst) else None + + +def pandas_to_dataarray( + arr: pd.DataFrame | pd.Series, + coords: CoordsLike | None = None, + dims: DimsLike | None = None, + **kwargs: Any, +) -> DataArray: + """ + Convert a pandas DataFrame or Series to a DataArray. + + As pandas objects already have a concept of coordinates, the + coordinates (index, columns) will be used as coordinates for the DataArray. + Solely the dimension names can be specified. + + Parameters + ---------- + arr (Union[pd.DataFrame, pd.Series]): + The input pandas DataFrame or Series. + coords (Union[dict, list, None]): + The coordinates for the DataArray. If None, default coordinates will be used. + dims (Union[list, None]): + The dimensions for the DataArray. If None, the column names of the DataFrame or the index names of the Series will be used. + **kwargs: + Additional keyword arguments to be passed to the DataArray constructor. + + Returns + ------- + DataArray: + The converted DataArray. + """ + dims = [ + axis.name or get_from_iterable(dims, i) or f"dim_{i}" + for i, axis in enumerate(arr.axes) + ] + return DataArray(arr, coords=None, dims=dims, **kwargs) + + +def numpy_to_dataarray( + arr: np.ndarray, + coords: CoordsLike | None = None, + dims: DimsLike | None = None, + **kwargs: Any, +) -> DataArray: + """ + Convert a numpy array to a DataArray. + + Parameters + ---------- + arr (np.ndarray): + The input numpy array. + coords (Union[dict, list, None]): + The coordinates for the DataArray. If None, default coordinates will be used. + dims (Union[list, None]): + The dimensions for the DataArray. If None, the dimensions will be automatically generated. + **kwargs: + Additional keyword arguments to be passed to the DataArray constructor. + + Returns + ------- + DataArray: + The converted DataArray. + """ + # fallback case for zero dim arrays + if arr.ndim == 0: + if dims is None and is_dict_like(coords): + dims = list(coords.keys()) + return DataArray(arr.item(), coords=coords, dims=dims, **kwargs) + + if isinstance(dims, Iterable | Sequence): + dims = list(dims) + elif dims is not None: + dims = [dims] + + if dims is not None and len(dims): + dims = [get_from_iterable(dims, i) or f"dim_{i}" for i in range(arr.ndim)] + + if dims is not None and len(dims) and coords is not None: + if isinstance(coords, list): + coords = dict(zip(dims, coords[: arr.ndim])) + elif is_dict_like(coords): + coords = {k: v for k, v in coords.items() if k in dims} + + return DataArray(arr, coords=coords, dims=dims, **kwargs) + + +def _named_pandas_to_dataarray(arr: pd.Series | pd.DataFrame) -> DataArray | None: + """ + Convert a pandas Series or DataFrame with fully named axes to a DataArray. + + Returns ``None`` if any axis (or MultiIndex level) is unnamed or + non-string, so the caller can fall back to ``as_dataarray``. + """ + names = list(arr.index.names) + if isinstance(arr, pd.DataFrame): + names += list(arr.columns.names) + if any(not isinstance(n, str) for n in names): + return None + + if isinstance(arr, pd.DataFrame): + if isinstance(arr.index, pd.MultiIndex) or isinstance( + arr.columns, pd.MultiIndex + ): + arr = arr.stack(list(range(arr.columns.nlevels)), future_stack=True) + return arr.to_xarray() + return DataArray(arr) + + return arr.to_xarray() + + +@overload +def fill_missing_coords(ds: DataArray, fill_helper_dims: bool = False) -> DataArray: ... + + +@overload +def fill_missing_coords(ds: Dataset, fill_helper_dims: bool = False) -> Dataset: ... + + +def fill_missing_coords( + ds: DataArray | Dataset, fill_helper_dims: bool = False +) -> Dataset | DataArray: + """ + Fill coordinates of a xarray Dataset or DataArray with integer coordinates. + + This function fills in the integer coordinates for all dimensions of a + Dataset or DataArray that have no coordinates assigned yet. + + Parameters + ---------- + ds : xarray.DataArray or xarray.Dataset + fill_helper_dims : bool, optional + Whether to fill in integer coordinates for helper dimensions, by default False. + + """ + ds = ds.copy() + if not isinstance(ds, Dataset | DataArray): + raise TypeError(f"Expected xarray.DataArray or xarray.Dataset, got {type(ds)}.") + + skip_dims = [] if fill_helper_dims else HELPER_DIMS + + # Fill in missing integer coordinates + for dim in ds.dims: + if dim not in ds.coords and dim not in skip_dims: + ds.coords[dim] = arange(ds.sizes[dim]) + + return ds + + +def as_dataarray( + arr: Any, + coords: CoordsLike | None = None, + dims: DimsLike | None = None, + **kwargs: Any, +) -> DataArray: + """ + Convert ``arr`` to a DataArray. + + Picks the right constructor for each supported input type (pandas, + polars, numpy, scalar, DataArray) and labels positional axes with + ``dims`` / ``coords``. The result is not reshaped against ``coords``: + dims are neither expanded, reordered, nor projected onto MultiIndex + dims. Use :func:`broadcast_to_coords` when + ``coords`` should govern the result's shape. + + Parameters + ---------- + arr + The input to convert. + coords + Coordinate values used to label positional axes. + dims + Dimension names used to label positional axes. + **kwargs + Forwarded to the underlying DataArray construction. + + Returns + ------- + DataArray + The converted input, dims and entries as ``arr`` provides them. + """ + if isinstance(arr, pd.Series | pd.DataFrame): + arr = pandas_to_dataarray(arr, coords=coords, dims=dims, **kwargs) + elif isinstance(arr, np.ndarray): + arr = numpy_to_dataarray(arr, coords=coords, dims=dims, **kwargs) + elif isinstance(arr, pl.Series): + arr = numpy_to_dataarray(arr.to_numpy(), coords=coords, dims=dims, **kwargs) + elif isinstance(arr, np.number | int | float | str | bool | list): + if isinstance(arr, np.number): + arr = float(arr) + if dims is None: + if isinstance(coords, Coordinates): + dims = coords.dims + elif is_dict_like(coords) and np.ndim(arr) == 0: + dims = list(coords.keys()) + arr = DataArray(arr, coords=coords, dims=dims, **kwargs) + + elif not isinstance(arr, DataArray): + supported_types = [ + np.number, + str, + bool, + list, + pd.Series, + pd.DataFrame, + np.ndarray, + DataArray, + pl.Series, + ] + supported_types_str = ", ".join([t.__name__ for t in supported_types]) + raise TypeError( + f"Unsupported type of arr: {type(arr)}. Supported types are: {supported_types_str}" + ) + + arr = fill_missing_coords(arr) + return arr + + +class _LevelProjection(NamedTuple): + """ + Record of one MultiIndex-level projection performed by ``_broadcast_to_coords``. + + Terminology: a stacked MultiIndex dim has *levels* (its component index + names, e.g. ``period`` / ``timestep``) and *level combinations* (its + elements — one tuple per position, e.g. ``(2030, 't1')``). + """ + + dim: Hashable + levels: list[Hashable] + is_partial: bool # input carried only a subset of the MI's levels + has_gap: bool # some level combinations of the MI dim got no value (NaN) + missing: list[Any] # the level combinations that got no value + + +def _project_onto_multiindex_levels( + arr: DataArray, + expected: dict[Hashable, Any], +) -> tuple[DataArray, list[_LevelProjection]]: + """ + Map ``arr`` dims that name levels of a stacked-MultiIndex coords dim onto it. + + For every level combination of the MultiIndex dim, select the ``arr`` + value at that combination's level values. A subset of levels broadcasts + across the remaining ones; the full set aligns element-wise. ``arr`` is + returned unchanged when it carries no level dims. + + Raises ``ValueError`` only on structural errors: a level name owned by + two MI dims, or a level value missing from ``arr``. Partial projections + and coverage gaps are recorded in the returned ``_LevelProjection`` list; + the caller decides how to treat them. + """ + level_owner: dict[Hashable, Hashable] = {} + owner_mi: dict[Hashable, pd.MultiIndex] = {} + for dim, coord_values in expected.items(): + mi = _as_multiindex(coord_values) + if mi is None: + continue + owner_mi[dim] = mi + for level in mi.names: + if level is None: + continue + if level in level_owner: + raise ValueError( + f"Level {level!r} is shared by MultiIndex dimensions " + f"{level_owner[level]!r} and {dim!r}; cannot resolve which " + f"to align to." + ) + level_owner[level] = dim + + groups: dict[Hashable, list[Hashable]] = {} + for d in arr.dims: + if d in expected: + continue + owner = level_owner.get(d) + if owner is not None: + groups.setdefault(owner, []).append(d) + + projections: list[_LevelProjection] = [] + for dim, levels in groups.items(): + mi = owner_mi[dim] + selectors = { + level: DataArray(np.asarray(mi.get_level_values(level)), dims=[dim]) + for level in levels + } + try: + arr = arr.sel(selectors) + except KeyError as err: + raise ValueError( + f"Cannot align level(s) {levels} onto MultiIndex dimension " + f"{dim!r}: value {err} is missing." + ) from err + arr = arr.assign_coords(Coordinates.from_pandas_multiindex(mi, dim)) + # A level combination is "missing" when the projection gave it no + # value at any position of the other dims. + null_mask = arr.isnull() + other_dims = [d for d in arr.dims if d != dim] + if other_dims: + null_mask = null_mask.any(other_dims) + has_gap = bool(null_mask.any()) + missing = list(arr.indexes[dim][null_mask.values]) if has_gap else [] + projections.append( + _LevelProjection( + dim=dim, + levels=levels, + is_partial=len(levels) < sum(name is not None for name in mi.names), + has_gap=has_gap, + missing=missing, + ) + ) + + return arr, projections + + +def _warn_implicit_projections(projections: list[_LevelProjection]) -> None: + """ + Deprecation warnings for implicit MultiIndex-level projections. + + The same check in every mode (scenario B of the #732 / #737 discussion): + implicit projection is deprecated and raises under the v1 convention. The + strict path raises on coverage gaps before reaching here, so only partial + levels warn there; the non-strict path warns for both. + + TODO(#738): migrate to ``warn_legacy()`` / ``LinopySemanticsWarning`` + once the v1 semantics infrastructure (#717) lands. + """ + for p in projections: + if p.is_partial or p.has_gap: + kind = ( + f"broadcasting level subset {p.levels}" + if p.is_partial + else f"filling uncovered level combinations with NaN " + f"(from level(s) {p.levels})" + ) + warn( + f"multiindex-projection: implicitly {kind} onto MultiIndex " + f"dimension {p.dim!r}. This is deprecated and will raise under " + f"the v1 convention; project the input onto the dimension " + f"explicitly (select with the dimension's level values) to " + f"keep current behavior.", + EvolvingAPIWarning, + stacklevel=3, + ) + + +def _broadcast_to_coords( + arr: Any, + coords: CoordsLike | None = None, + dims: DimsLike | None = None, + **kwargs: Any, +) -> tuple[DataArray, list[_LevelProjection]]: + """ + Convert ``arr`` and broadcast it against ``coords`` (shared mechanics). + + Returns the broadcast DataArray together with the MultiIndex-level + projections performed along the way, so the public entry points can + apply their own policy (warn or raise) to partial projections and + coverage gaps. + """ + if coords is None: + return as_dataarray(arr, coords, dims, **kwargs), [] + + if isinstance(coords, list | tuple) and any(isinstance(c, tuple) for c in coords): + # xarray reads bare `(a, b)` as `(dim_name, values)`; normalize so a + # coords entry passed as a tuple behaves identically to a list. + coords = [list(c) if isinstance(c, tuple) else c for c in coords] + + expected = _coords_to_dict(coords, dims=dims) + if not expected: + return as_dataarray(arr, coords, dims, **kwargs), [] + + if isinstance(arr, pd.Series | pd.DataFrame): + converted = _named_pandas_to_dataarray(arr) + if converted is not None: + arr = converted + + if not isinstance(arr, DataArray): + # numpy/polars/unnamed-pandas inputs are positional — their only + # meaningful information is the values; any axis labels are + # auto-generated. Default dims to coords' keys so the conversion + # labels axes correctly (instead of dim_0/dim_1), then re-assign + # coords from expected so positional inputs align to coords by + # position. A shape mismatch surfaces here as a clear xarray + # "conflicting sizes" error rather than a confusing + # "coordinates do not match" further down. + if dims is None: + dims = list(expected) + arr = as_dataarray(arr, coords, dims=dims, **kwargs) + # Skip MultiIndex dims — re-assigning a PandasMultiIndex coord emits + # a FutureWarning and isn't needed (the conversion already used it). + arr = arr.assign_coords( + { + d: expected[d] + for d in arr.dims + if d in expected and not isinstance(arr.indexes.get(d), pd.MultiIndex) + } + ) + + arr, projections = _project_onto_multiindex_levels(arr, expected) + + for dim, coord_values in expected.items(): + if dim not in arr.dims: + continue + if isinstance(arr.indexes.get(dim), pd.MultiIndex): + continue + expected_idx = _as_index(coord_values) + actual_idx = arr.coords[dim].to_index() + if actual_idx.equals(expected_idx): + continue + # Same values, different order → reindex to match expected order. + # Different value sets are left alone for downstream xarray alignment. + if len(actual_idx) == len(expected_idx) and set(actual_idx) == set( + expected_idx + ): + arr = arr.reindex({dim: expected_idx}) + + # expand_dims prepends new dimensions and their coordinate variables; + # the subsequent transpose restores coords order. Both are no-ops when + # the array already matches. Reconstruct so the DataArray's coords + # iteration order also follows coords (a Dataset built from this picks + # up its dim order from coord insertion). + expand = {k: v for k, v in expected.items() if k not in arr.dims} + if expand: + # expand_dims drops the level coords of a MultiIndex-backed dim, + # leaving a degenerate flat index that fails to align downstream. + # Broadcast against a proper Coordinates template instead. + plain = {} + for dim, coord_values in expand.items(): + mi = _as_multiindex(coord_values) + # Fall back to expand_dims when arr already carries one of the + # MultiIndex's level names as its own coord: broadcasting against + # the level coords would raise on the conflicting index. + if mi is None or set(mi.names) & (set(arr.coords) | set(arr.dims)): + plain[dim] = coord_values + continue + template = DataArray( + np.zeros(len(mi)), + coords=Coordinates.from_pandas_multiindex(mi, dim), + dims=[dim], + ) + arr, _ = broadcast(arr, template) + if plain: + arr = arr.expand_dims(plain) + + target_dims = tuple(d for d in expected if d in arr.dims) + tuple( + d for d in arr.dims if d not in expected + ) + arr = arr.transpose(*target_dims) + + coord_order = [c for c in target_dims if c in arr.coords] + [ + c for c in arr.coords if c not in target_dims + ] + if list(arr.coords) != coord_order: + arr = DataArray( + arr.variable, + coords={c: arr.coords[c] for c in coord_order}, + name=arr.name, + ) + + return arr, projections + + +@overload +def broadcast_to_coords( + arr: Any, + coords: CoordsLike | None = ..., + dims: DimsLike | None = ..., + *, + strict: Literal[True] = ..., + label: str, + **kwargs: Any, +) -> DataArray: ... + + +@overload +def broadcast_to_coords( + arr: Any, + coords: CoordsLike | None = ..., + dims: DimsLike | None = ..., + *, + strict: Literal[False], + label: None = ..., + **kwargs: Any, +) -> DataArray: ... + + +def broadcast_to_coords( + arr: Any, + coords: CoordsLike | None = None, + dims: DimsLike | None = None, + *, + strict: bool = True, + label: str | None = None, + **kwargs: Any, +) -> DataArray: + """ + Convert ``arr`` to a DataArray and broadcast it against ``coords``. + + When ``coords`` carries named dimensions, the result is aligned with + them: positional inputs are labeled by position, shared dims with equal + values in a different order are reindexed, dims missing from ``arr`` + are expanded, dims naming levels of a stacked-MultiIndex coords dim are + projected onto it, and the result is transposed to ``coords`` order. + + ``strict`` decides what happens to anything broadcasting alone cannot + resolve — extra dims, disagreeing coord values, and MultiIndex coverage + gaps: + + - ``strict=True`` (default): raise, naming ``label`` in the error. + - ``strict=False``: pass through unchanged so downstream xarray + alignment can handle them. + + A stacked-MultiIndex dim of ``coords`` has *levels* (its component + index names, e.g. ``period`` / ``timestep``) and *level combinations* + (its elements — one tuple per position, e.g. ``(2030, 't1')``). Inputs + indexed by levels instead of the dim itself are implicitly projected + onto the dim's level combinations. These projections are deprecated in + both modes and emit an :class:`~linopy.EvolvingAPIWarning`; the v1 + convention will require them to be explicit. Two cases: + + - input misses a whole level → broadcasts across it; warns in both modes. + - input gives some level combinations no value (a *coverage gap*) → + warns under ``strict=False``, raises under ``strict=True`` (the error + lists the missing combinations). + + Parameters + ---------- + arr + The input to convert and broadcast. + coords + Coordinate values the result is broadcast against. ``None`` falls + back to plain conversion. + dims + Dimension names used to label positional axes. + strict + Check that the result stays within ``coords`` (raise on violation) + instead of passing violations through. + label + Name of the input in error messages (e.g. ``"lower bound"``). + Required when ``strict=True``, not accepted otherwise. + **kwargs + Forwarded to the underlying DataArray construction. + + Returns + ------- + DataArray + Broadcast against ``coords``. + """ + if not strict: + da, projections = _broadcast_to_coords(arr, coords, dims, **kwargs) + _warn_implicit_projections(projections) + return da + + if label is None: + raise TypeError( + "broadcast_to_coords(strict=True) requires `label` to name the " + "input in error messages, e.g. label='lower bound'." + ) + subject = label + if coords is not None: + _coords_to_dict(coords, dims=dims) + try: + da, projections = _broadcast_to_coords(arr, coords, dims=dims, **kwargs) + except TypeError as err: + raise TypeError(f"{subject} could not be aligned to coords: {err}") from err + except (ValueError, CoordinateValidationError) as err: + raise ValueError(f"{subject} could not be aligned to coords: {err}") from err + for p in projections: + if p.has_gap: + preview = ", ".join(str(c) for c in p.missing[:5]) + if len(p.missing) > 5: + preview += f", … ({len(p.missing)} in total)" + raise ValueError( + f"{subject} could not be aligned to coords: no value for " + f"{len(p.missing)} level combination(s) of MultiIndex dimension " + f"{p.dim!r}: {preview}. The input is indexed by level(s) " + f"{p.levels} and must cover every combination." + ) + _warn_implicit_projections(projections) + validate_alignment(da, coords, dims=dims, label=label) + return da + + +def validate_alignment( + arr: DataArray, + coords: CoordsLike | None, + dims: DimsLike | None = None, + *, + label: str | None = None, +) -> None: + """ + Raise ``ValueError`` if ``arr`` is incompatible with ``coords``. + + ``arr`` is compatible with ``coords`` when both of the following hold: + + - every dim in ``arr.dims`` is also a dim in ``coords`` (no extras); + - for every dim shared between ``arr`` and ``coords``, the coord + values are equal. + + ``dims`` mirrors the ``dims`` argument of ``as_dataarray``: it names + unnamed entries in a sequence-form ``coords`` by position, so + ``coords=[[1, 2, 3]], dims=["x"]`` is enforced the same way as + ``coords={"x": [1, 2, 3]}``. + + ``label`` names the argument in error messages (e.g. ``"lower bound"``). + + No-op when ``coords`` is ``None`` or carries no named dimensions. + """ + if coords is None: + return + expected = _coords_to_dict(coords, dims=dims) + if not expected: + return + subject = label or "Value" + expected_dims = set(expected) + extra = set(arr.dims) - expected_dims + if extra: + raise ValueError( + f"{subject} has dimension(s) {sorted(extra, key=str)} not declared in coords " + f"({sorted(expected_dims, key=str)}). Add them to coords or remove them from " + f"{subject.lower()}." + ) + for dim, coord_values in expected.items(): + if dim not in arr.dims: + continue + expected_mi = _as_multiindex(coord_values) + actual_mi = _as_multiindex(arr.indexes.get(dim)) + if expected_mi is not None or actual_mi is not None: + if ( + expected_mi is None + or actual_mi is None + or not actual_mi.equals(expected_mi) + ): + raise ValueError( + f"{subject}: MultiIndex for dimension {dim!r} does not " + f"match coords." + ) + continue + expected_idx = _as_index(coord_values) + actual_idx = arr.coords[dim].to_index() + if not actual_idx.equals(expected_idx): + raise ValueError( + f"{subject}: coordinate values for dimension {dim!r} do not match " + f"coords — expected {expected_idx.tolist()}, got " + f"{actual_idx.tolist()}." + ) + + +def align( + *objects: LinearExpression | QuadraticExpression | Variable | T_Alignable, + join: JoinOptions = "inner", + copy: bool = True, + indexes: Any = None, + exclude: str | Iterable[Hashable] = frozenset(), + fill_value: Any = dtypes.NA, +) -> tuple[LinearExpression | QuadraticExpression | Variable | T_Alignable, ...]: + """ + Given any number of Variables, Expressions, Dataset and/or DataArray objects, + returns new objects with aligned indexes and dimension sizes. + + Array from the aligned objects are suitable as input to mathematical + operators, because along each dimension they have the same index and size. + + Missing values (if ``join != 'inner'``) are filled with ``fill_value``. + The default fill value is NaN. + + This functions essentially wraps the xarray function + :py:func:`xarray.align`. + + Parameters + ---------- + *objects : Variable, LinearExpression, Dataset or DataArray + Objects to align. + join : {"outer", "inner", "left", "right", "exact", "override"}, optional + Method for joining the indexes of the passed objects along each + dimension: + + - "outer": use the union of object indexes + - "inner": use the intersection of object indexes + - "left": use indexes from the first object with each dimension + - "right": use indexes from the last object with each dimension + - "exact": instead of aligning, raise `ValueError` when indexes to be + aligned are not equal + - "override": if indexes are of same size, rewrite indexes to be + those of the first object with that dimension. Indexes for the same + dimension must have the same size in all objects. + + copy : bool, default: True + If ``copy=True``, data in the return values is always copied. If + ``copy=False`` and reindexing is unnecessary, or can be performed with + only slice operations, then the output may share memory with the input. + In either case, new xarray objects are always returned. + indexes : dict-like, optional + Any indexes explicitly provided with the `indexes` argument should be + used in preference to the aligned indexes. + exclude : str, iterable of hashable or None, optional + Dimensions that must be excluded from alignment + fill_value : scalar or dict-like, optional + Value to use for newly missing values. If a dict-like, maps + variable names to fill values. Use a data array's name to + refer to its values. + + Returns + ------- + aligned : tuple of DataArray or Dataset + Tuple of objects with the same type as `*objects` with aligned + coordinates. + + + """ + from linopy.expressions import LinearExpression, QuadraticExpression + from linopy.variables import Variable + + finisher: list[partial[Any] | Callable[[Any], Any]] = [] + das: list[Any] = [] + for obj in objects: + if isinstance(obj, LinearExpression | QuadraticExpression): + finisher.append(partial(obj.__class__, model=obj.model)) + das.append(obj.data) + elif isinstance(obj, Variable): + finisher.append( + partial( + obj.__class__, + model=obj.model, + name=obj.data.attrs["name"], + skip_broadcast=True, + ) + ) + das.append(obj.data) + else: + finisher.append(lambda x: x) + das.append(obj) + + exclude = frozenset(exclude).union(HELPER_DIMS) + aligned = xr_align( + *das, + join=join, + copy=copy, + indexes=indexes, + exclude=exclude, + fill_value=fill_value, + ) + return tuple([f(da) for f, da in zip(finisher, aligned)]) diff --git a/linopy/common.py b/linopy/common.py index 4147cde6..5c4a30df 100644 --- a/linopy/common.py +++ b/linopy/common.py @@ -9,37 +9,31 @@ import operator import os -from collections.abc import Callable, Generator, Hashable, Iterable, Mapping, Sequence -from functools import cached_property, partial, reduce, wraps +from collections.abc import Callable, Generator, Hashable, Iterable, Sequence +from functools import cached_property, reduce, wraps from pathlib import Path -from typing import TYPE_CHECKING, Any, Generic, Literal, NamedTuple, TypeVar, overload +from typing import TYPE_CHECKING, Any, Generic, TypeVar, overload from warnings import warn import numpy as np import pandas as pd import polars as pl -from numpy import arange, nan, signedinteger +from numpy import nan, signedinteger from polars.datatypes import DataTypeClass -from xarray import Coordinates, DataArray, Dataset, apply_ufunc, broadcast +from xarray import DataArray, Dataset, apply_ufunc, broadcast from xarray import align as xr_align -from xarray.core import dtypes, indexing -from xarray.core.coordinates import CoordinateValidationError -from xarray.core.types import JoinOptions, T_Alignable +from xarray.core import indexing from xarray.namedarray.utils import is_dict_like from linopy.config import options from linopy.constants import ( - HELPER_DIMS, SIGNS, - EvolvingAPIWarning, SIGNS_alternative, SIGNS_pretty, sign_replace_dict, ) from linopy.types import ( CONSTANT_TYPES, - CoordsLike, - DimsLike, SideLike, ) @@ -118,733 +112,6 @@ def format_string_as_variable_name(name: Hashable) -> str: return str(name).replace(" ", "_").replace("-", "_") -def get_from_iterable(lst: DimsLike | None, index: int) -> Any | None: - """ - Returns the element at the specified index of the list, or None if the index - is out of bounds. - """ - if lst is None: - return None - if isinstance(lst, Sequence | Iterable): - lst = list(lst) - else: - lst = [lst] - return lst[index] if 0 <= index < len(lst) else None - - -def pandas_to_dataarray( - arr: pd.DataFrame | pd.Series, - coords: CoordsLike | None = None, - dims: DimsLike | None = None, - **kwargs: Any, -) -> DataArray: - """ - Convert a pandas DataFrame or Series to a DataArray. - - As pandas objects already have a concept of coordinates, the - coordinates (index, columns) will be used as coordinates for the DataArray. - Solely the dimension names can be specified. - - Parameters - ---------- - arr (Union[pd.DataFrame, pd.Series]): - The input pandas DataFrame or Series. - coords (Union[dict, list, None]): - The coordinates for the DataArray. If None, default coordinates will be used. - dims (Union[list, None]): - The dimensions for the DataArray. If None, the column names of the DataFrame or the index names of the Series will be used. - **kwargs: - Additional keyword arguments to be passed to the DataArray constructor. - - Returns - ------- - DataArray: - The converted DataArray. - """ - dims = [ - axis.name or get_from_iterable(dims, i) or f"dim_{i}" - for i, axis in enumerate(arr.axes) - ] - return DataArray(arr, coords=None, dims=dims, **kwargs) - - -def numpy_to_dataarray( - arr: np.ndarray, - coords: CoordsLike | None = None, - dims: DimsLike | None = None, - **kwargs: Any, -) -> DataArray: - """ - Convert a numpy array to a DataArray. - - Parameters - ---------- - arr (np.ndarray): - The input numpy array. - coords (Union[dict, list, None]): - The coordinates for the DataArray. If None, default coordinates will be used. - dims (Union[list, None]): - The dimensions for the DataArray. If None, the dimensions will be automatically generated. - **kwargs: - Additional keyword arguments to be passed to the DataArray constructor. - - Returns - ------- - DataArray: - The converted DataArray. - """ - # fallback case for zero dim arrays - if arr.ndim == 0: - if dims is None and is_dict_like(coords): - dims = list(coords.keys()) - return DataArray(arr.item(), coords=coords, dims=dims, **kwargs) - - if isinstance(dims, Iterable | Sequence): - dims = list(dims) - elif dims is not None: - dims = [dims] - - if dims is not None and len(dims): - dims = [get_from_iterable(dims, i) or f"dim_{i}" for i in range(arr.ndim)] - - if dims is not None and len(dims) and coords is not None: - if isinstance(coords, list): - coords = dict(zip(dims, coords[: arr.ndim])) - elif is_dict_like(coords): - coords = {k: v for k, v in coords.items() if k in dims} - - return DataArray(arr, coords=coords, dims=dims, **kwargs) - - -def as_dataarray( - arr: Any, - coords: CoordsLike | None = None, - dims: DimsLike | None = None, - **kwargs: Any, -) -> DataArray: - """ - Convert ``arr`` to a DataArray. - - Picks the right constructor for each supported input type (pandas, - polars, numpy, scalar, DataArray) and labels positional axes with - ``dims`` / ``coords``. The result is not reshaped against ``coords``: - dims are neither expanded, reordered, nor projected onto MultiIndex - dims. Use :func:`broadcast_to_coords` when - ``coords`` should govern the result's shape. - - Parameters - ---------- - arr - The input to convert. - coords - Coordinate values used to label positional axes. - dims - Dimension names used to label positional axes. - **kwargs - Forwarded to the underlying DataArray construction. - - Returns - ------- - DataArray - The converted input, dims and entries as ``arr`` provides them. - """ - if isinstance(arr, pd.Series | pd.DataFrame): - arr = pandas_to_dataarray(arr, coords=coords, dims=dims, **kwargs) - elif isinstance(arr, np.ndarray): - arr = numpy_to_dataarray(arr, coords=coords, dims=dims, **kwargs) - elif isinstance(arr, pl.Series): - arr = numpy_to_dataarray(arr.to_numpy(), coords=coords, dims=dims, **kwargs) - elif isinstance(arr, np.number | int | float | str | bool | list): - if isinstance(arr, np.number): - arr = float(arr) - if dims is None: - if isinstance(coords, Coordinates): - dims = coords.dims - elif is_dict_like(coords) and np.ndim(arr) == 0: - dims = list(coords.keys()) - arr = DataArray(arr, coords=coords, dims=dims, **kwargs) - - elif not isinstance(arr, DataArray): - supported_types = [ - np.number, - str, - bool, - list, - pd.Series, - pd.DataFrame, - np.ndarray, - DataArray, - pl.Series, - ] - supported_types_str = ", ".join([t.__name__ for t in supported_types]) - raise TypeError( - f"Unsupported type of arr: {type(arr)}. Supported types are: {supported_types_str}" - ) - - arr = fill_missing_coords(arr) - return arr - - -def _as_index(coord_values: Any) -> pd.Index: - return ( - coord_values if isinstance(coord_values, pd.Index) else pd.Index(coord_values) - ) - - -def _as_multiindex(coord_values: Any) -> pd.MultiIndex | None: - """Return the backing ``pd.MultiIndex`` of a coords entry, or ``None``.""" - if isinstance(coord_values, pd.MultiIndex): - return coord_values - if isinstance(coord_values, DataArray): - idx = coord_values.to_index() - if isinstance(idx, pd.MultiIndex): - return idx - return None - - -class _LevelProjection(NamedTuple): - """ - Record of one MultiIndex-level projection performed by ``_broadcast_to_coords``. - - Terminology: a stacked MultiIndex dim has *levels* (its component index - names, e.g. ``period`` / ``timestep``) and *level combinations* (its - elements — one tuple per position, e.g. ``(2030, 't1')``). - """ - - dim: Hashable - levels: list[Hashable] - is_partial: bool # input carried only a subset of the MI's levels - has_gap: bool # some level combinations of the MI dim got no value (NaN) - missing: list[Any] # the level combinations that got no value - - -def _project_onto_multiindex_levels( - arr: DataArray, - expected: dict[Hashable, Any], -) -> tuple[DataArray, list[_LevelProjection]]: - """ - Map ``arr`` dims that name levels of a stacked-MultiIndex coords dim onto it. - - For every level combination of the MultiIndex dim, select the ``arr`` - value at that combination's level values. A subset of levels broadcasts - across the remaining ones; the full set aligns element-wise. ``arr`` is - returned unchanged when it carries no level dims. - - Raises ``ValueError`` only on structural errors: a level name owned by - two MI dims, or a level value missing from ``arr``. Partial projections - and coverage gaps are recorded in the returned ``_LevelProjection`` list; - the caller decides how to treat them. - """ - level_owner: dict[Hashable, Hashable] = {} - owner_mi: dict[Hashable, pd.MultiIndex] = {} - for dim, coord_values in expected.items(): - mi = _as_multiindex(coord_values) - if mi is None: - continue - owner_mi[dim] = mi - for level in mi.names: - if level is None: - continue - if level in level_owner: - raise ValueError( - f"Level {level!r} is shared by MultiIndex dimensions " - f"{level_owner[level]!r} and {dim!r}; cannot resolve which " - f"to align to." - ) - level_owner[level] = dim - - groups: dict[Hashable, list[Hashable]] = {} - for d in arr.dims: - if d in expected: - continue - owner = level_owner.get(d) - if owner is not None: - groups.setdefault(owner, []).append(d) - - projections: list[_LevelProjection] = [] - for dim, levels in groups.items(): - mi = owner_mi[dim] - selectors = { - level: DataArray(np.asarray(mi.get_level_values(level)), dims=[dim]) - for level in levels - } - try: - arr = arr.sel(selectors) - except KeyError as err: - raise ValueError( - f"Cannot align level(s) {levels} onto MultiIndex dimension " - f"{dim!r}: value {err} is missing." - ) from err - arr = arr.assign_coords(Coordinates.from_pandas_multiindex(mi, dim)) - # A level combination is "missing" when the projection gave it no - # value at any position of the other dims. - null_mask = arr.isnull() - other_dims = [d for d in arr.dims if d != dim] - if other_dims: - null_mask = null_mask.any(other_dims) - has_gap = bool(null_mask.any()) - missing = list(arr.indexes[dim][null_mask.values]) if has_gap else [] - projections.append( - _LevelProjection( - dim=dim, - levels=levels, - is_partial=len(levels) < sum(name is not None for name in mi.names), - has_gap=has_gap, - missing=missing, - ) - ) - - return arr, projections - - -def _broadcast_to_coords( - arr: Any, - coords: CoordsLike | None = None, - dims: DimsLike | None = None, - **kwargs: Any, -) -> tuple[DataArray, list[_LevelProjection]]: - """ - Convert ``arr`` and broadcast it against ``coords`` (shared mechanics). - - Returns the broadcast DataArray together with the MultiIndex-level - projections performed along the way, so the public entry points can - apply their own policy (warn or raise) to partial projections and - coverage gaps. - """ - if coords is None: - return as_dataarray(arr, coords, dims, **kwargs), [] - - if isinstance(coords, list | tuple) and any(isinstance(c, tuple) for c in coords): - # xarray reads bare `(a, b)` as `(dim_name, values)`; normalize so a - # coords entry passed as a tuple behaves identically to a list. - coords = [list(c) if isinstance(c, tuple) else c for c in coords] - - expected = _coords_to_dict(coords, dims=dims) - if not expected: - return as_dataarray(arr, coords, dims, **kwargs), [] - - if isinstance(arr, pd.Series | pd.DataFrame): - converted = _named_pandas_to_dataarray(arr) - if converted is not None: - arr = converted - - if not isinstance(arr, DataArray): - # numpy/polars/unnamed-pandas inputs are positional — their only - # meaningful information is the values; any axis labels are - # auto-generated. Default dims to coords' keys so the conversion - # labels axes correctly (instead of dim_0/dim_1), then re-assign - # coords from expected so positional inputs align to coords by - # position. A shape mismatch surfaces here as a clear xarray - # "conflicting sizes" error rather than a confusing - # "coordinates do not match" further down. - if dims is None: - dims = list(expected) - arr = as_dataarray(arr, coords, dims=dims, **kwargs) - # Skip MultiIndex dims — re-assigning a PandasMultiIndex coord emits - # a FutureWarning and isn't needed (the conversion already used it). - arr = arr.assign_coords( - { - d: expected[d] - for d in arr.dims - if d in expected and not isinstance(arr.indexes.get(d), pd.MultiIndex) - } - ) - - arr, projections = _project_onto_multiindex_levels(arr, expected) - - for dim, coord_values in expected.items(): - if dim not in arr.dims: - continue - if isinstance(arr.indexes.get(dim), pd.MultiIndex): - continue - expected_idx = _as_index(coord_values) - actual_idx = arr.coords[dim].to_index() - if actual_idx.equals(expected_idx): - continue - # Same values, different order → reindex to match expected order. - # Different value sets are left alone for downstream xarray alignment. - if len(actual_idx) == len(expected_idx) and set(actual_idx) == set( - expected_idx - ): - arr = arr.reindex({dim: expected_idx}) - - # expand_dims prepends new dimensions and their coordinate variables; - # the subsequent transpose restores coords order. Both are no-ops when - # the array already matches. Reconstruct so the DataArray's coords - # iteration order also follows coords (a Dataset built from this picks - # up its dim order from coord insertion). - expand = {k: v for k, v in expected.items() if k not in arr.dims} - if expand: - # expand_dims drops the level coords of a MultiIndex-backed dim, - # leaving a degenerate flat index that fails to align downstream. - # Broadcast against a proper Coordinates template instead. - plain = {} - for dim, coord_values in expand.items(): - mi = _as_multiindex(coord_values) - # Fall back to expand_dims when arr already carries one of the - # MultiIndex's level names as its own coord: broadcasting against - # the level coords would raise on the conflicting index. - if mi is None or set(mi.names) & (set(arr.coords) | set(arr.dims)): - plain[dim] = coord_values - continue - template = DataArray( - np.zeros(len(mi)), - coords=Coordinates.from_pandas_multiindex(mi, dim), - dims=[dim], - ) - arr, _ = broadcast(arr, template) - if plain: - arr = arr.expand_dims(plain) - - target_dims = tuple(d for d in expected if d in arr.dims) + tuple( - d for d in arr.dims if d not in expected - ) - arr = arr.transpose(*target_dims) - - coord_order = [c for c in target_dims if c in arr.coords] + [ - c for c in arr.coords if c not in target_dims - ] - if list(arr.coords) != coord_order: - arr = DataArray( - arr.variable, - coords={c: arr.coords[c] for c in coord_order}, - name=arr.name, - ) - - return arr, projections - - -@overload -def broadcast_to_coords( - arr: Any, - coords: CoordsLike | None = ..., - dims: DimsLike | None = ..., - *, - strict: Literal[True] = ..., - label: str, - **kwargs: Any, -) -> DataArray: ... - - -@overload -def broadcast_to_coords( - arr: Any, - coords: CoordsLike | None = ..., - dims: DimsLike | None = ..., - *, - strict: Literal[False], - label: None = ..., - **kwargs: Any, -) -> DataArray: ... - - -def broadcast_to_coords( - arr: Any, - coords: CoordsLike | None = None, - dims: DimsLike | None = None, - *, - strict: bool = True, - label: str | None = None, - **kwargs: Any, -) -> DataArray: - """ - Convert ``arr`` to a DataArray and broadcast it against ``coords``. - - When ``coords`` carries named dimensions, the result is aligned with - them: positional inputs are labeled by position, shared dims with equal - values in a different order are reindexed, dims missing from ``arr`` - are expanded, dims naming levels of a stacked-MultiIndex coords dim are - projected onto it, and the result is transposed to ``coords`` order. - - ``strict`` decides what happens to anything broadcasting alone cannot - resolve — extra dims, disagreeing coord values, and MultiIndex coverage - gaps: - - - ``strict=True`` (default): raise, naming ``label`` in the error. - - ``strict=False``: pass through unchanged so downstream xarray - alignment can handle them. - - A stacked-MultiIndex dim of ``coords`` has *levels* (its component - index names, e.g. ``period`` / ``timestep``) and *level combinations* - (its elements — one tuple per position, e.g. ``(2030, 't1')``). Inputs - indexed by levels instead of the dim itself are implicitly projected - onto the dim's level combinations. These projections are deprecated in - both modes and emit an :class:`~linopy.EvolvingAPIWarning`; the v1 - convention will require them to be explicit. Two cases: - - - input misses a whole level → broadcasts across it; warns in both modes. - - input gives some level combinations no value (a *coverage gap*) → - warns under ``strict=False``, raises under ``strict=True`` (the error - lists the missing combinations). - - Parameters - ---------- - arr - The input to convert and broadcast. - coords - Coordinate values the result is broadcast against. ``None`` falls - back to plain conversion. - dims - Dimension names used to label positional axes. - strict - Check that the result stays within ``coords`` (raise on violation) - instead of passing violations through. - label - Name of the input in error messages (e.g. ``"lower bound"``). - Required when ``strict=True``, not accepted otherwise. - **kwargs - Forwarded to the underlying DataArray construction. - - Returns - ------- - DataArray - Broadcast against ``coords``. - """ - if not strict: - da, projections = _broadcast_to_coords(arr, coords, dims, **kwargs) - _warn_implicit_projections(projections) - return da - - if label is None: - raise TypeError( - "broadcast_to_coords(strict=True) requires `label` to name the " - "input in error messages, e.g. label='lower bound'." - ) - subject = label - if coords is not None: - _coords_to_dict(coords, dims=dims) - try: - da, projections = _broadcast_to_coords(arr, coords, dims=dims, **kwargs) - except TypeError as err: - raise TypeError(f"{subject} could not be aligned to coords: {err}") from err - except (ValueError, CoordinateValidationError) as err: - raise ValueError(f"{subject} could not be aligned to coords: {err}") from err - for p in projections: - if p.has_gap: - preview = ", ".join(str(c) for c in p.missing[:5]) - if len(p.missing) > 5: - preview += f", … ({len(p.missing)} in total)" - raise ValueError( - f"{subject} could not be aligned to coords: no value for " - f"{len(p.missing)} level combination(s) of MultiIndex dimension " - f"{p.dim!r}: {preview}. The input is indexed by level(s) " - f"{p.levels} and must cover every combination." - ) - _warn_implicit_projections(projections) - validate_alignment(da, coords, dims=dims, label=label) - return da - - -def _warn_implicit_projections(projections: list[_LevelProjection]) -> None: - """ - Deprecation warnings for implicit MultiIndex-level projections. - - The same check in every mode (scenario B of the #732 / #737 discussion): - implicit projection is deprecated and raises under the v1 convention. The - strict path raises on coverage gaps before reaching here, so only partial - levels warn there; the non-strict path warns for both. - - TODO(#738): migrate to ``warn_legacy()`` / ``LinopySemanticsWarning`` - once the v1 semantics infrastructure (#717) lands. - """ - for p in projections: - if p.is_partial or p.has_gap: - kind = ( - f"broadcasting level subset {p.levels}" - if p.is_partial - else f"filling uncovered level combinations with NaN " - f"(from level(s) {p.levels})" - ) - warn( - f"multiindex-projection: implicitly {kind} onto MultiIndex " - f"dimension {p.dim!r}. This is deprecated and will raise under " - f"the v1 convention; project the input onto the dimension " - f"explicitly (select with the dimension's level values) to " - f"keep current behavior.", - EvolvingAPIWarning, - stacklevel=3, - ) - - -def validate_alignment( - arr: DataArray, - coords: CoordsLike | None, - dims: DimsLike | None = None, - *, - label: str | None = None, -) -> None: - """ - Raise ``ValueError`` if ``arr`` is incompatible with ``coords``. - - ``arr`` is compatible with ``coords`` when both of the following hold: - - - every dim in ``arr.dims`` is also a dim in ``coords`` (no extras); - - for every dim shared between ``arr`` and ``coords``, the coord - values are equal. - - ``dims`` mirrors the ``dims`` argument of ``as_dataarray``: it names - unnamed entries in a sequence-form ``coords`` by position, so - ``coords=[[1, 2, 3]], dims=["x"]`` is enforced the same way as - ``coords={"x": [1, 2, 3]}``. - - ``label`` names the argument in error messages (e.g. ``"lower bound"``). - - No-op when ``coords`` is ``None`` or carries no named dimensions. - """ - if coords is None: - return - expected = _coords_to_dict(coords, dims=dims) - if not expected: - return - subject = label or "Value" - expected_dims = set(expected) - extra = set(arr.dims) - expected_dims - if extra: - raise ValueError( - f"{subject} has dimension(s) {sorted(extra, key=str)} not declared in coords " - f"({sorted(expected_dims, key=str)}). Add them to coords or remove them from " - f"{subject.lower()}." - ) - for dim, coord_values in expected.items(): - if dim not in arr.dims: - continue - expected_mi = _as_multiindex(coord_values) - actual_mi = _as_multiindex(arr.indexes.get(dim)) - if expected_mi is not None or actual_mi is not None: - if ( - expected_mi is None - or actual_mi is None - or not actual_mi.equals(expected_mi) - ): - raise ValueError( - f"{subject}: MultiIndex for dimension {dim!r} does not " - f"match coords." - ) - continue - expected_idx = _as_index(coord_values) - actual_idx = arr.coords[dim].to_index() - if not actual_idx.equals(expected_idx): - raise ValueError( - f"{subject}: coordinate values for dimension {dim!r} do not match " - f"coords — expected {expected_idx.tolist()}, got " - f"{actual_idx.tolist()}." - ) - - -def _coords_to_dict( - coords: Sequence[Sequence | pd.Index] | Mapping, - dims: DimsLike | None = None, -) -> dict[Hashable, Any]: - """ - Normalize coords to a dict mapping dim names to coordinate values. - - Container forms: - - - ``xarray.Coordinates`` → kept dim entries only (MultiIndex level - coords dropped). - - ``Mapping`` → returned as a shallow ``dict`` copy. - - sequence-of-entries → each entry handled per the rules below. - - Sequence-entry rules (``i`` is the position in ``coords``, ``dims[i]`` - is the matching entry in ``dims`` when one exists). An entry is - *unlabeled* if it's an unnamed ``pd.Index`` or a bare ``list`` / - ``tuple`` / ``range`` / ``ndarray``. - - +---------------------------------+-----------------------+-----------+ - | Entry | Naming source | Outcome | - +=================================+=======================+===========+ - | ``pd.Index`` with ``.name`` | ``.name`` | accepted | - +---------------------------------+-----------------------+-----------+ - | unlabeled entry | ``dims[i]`` | accepted | - +---------------------------------+-----------------------+-----------+ - | unlabeled entry | — (no ``dims[i]``) | skipped | - | | | — xarray | - | | | assigns | - | | | ``dim_0`` | - | | | etc. | - +---------------------------------+-----------------------+-----------+ - | ``pd.MultiIndex`` with ``.name``| ``.name`` | accepted | - +---------------------------------+-----------------------+-----------+ - | ``pd.MultiIndex`` w/o ``.name`` | ``dims[i]`` | accepted | - | | | (named on | - | | | a copy) | - +---------------------------------+-----------------------+-----------+ - | ``pd.MultiIndex`` w/o ``.name`` | — (no ``dims[i]``) | TypeError | - +---------------------------------+-----------------------+-----------+ - | anything else (e.g. DataArray) | — | TypeError | - +---------------------------------+-----------------------+-----------+ - """ - if isinstance(coords, Coordinates): - # Coordinates iterates over every coord variable, including - # MultiIndex level coords. Keep only the entries that are dims. - return {d: coords[d] for d in coords.dims if d in coords} - if isinstance(coords, Mapping): - return dict(coords) - dim_names: list[Any] | None = None - if dims is not None: - dim_names = list(dims) if isinstance(dims, list | tuple) else [dims] - result: dict[Hashable, Any] = {} - for i, c in enumerate(coords): - if isinstance(c, pd.MultiIndex): - name = c.name or ( - dim_names[i] if dim_names and i < len(dim_names) else None - ) - if name is None: - raise TypeError( - "MultiIndex coords entries must have .name set so " - "xarray can use it as the dimension name. Set it via " - "`idx.name = 'my_dim'`, or pass `dims=[...]` to name " - "entries by position." - ) - if c.name is None: - c = c.copy() - c.name = name - result[name] = c - elif isinstance(c, pd.Index): - name = ( - c.name - if c.name - else (dim_names[i] if dim_names and i < len(dim_names) else None) - ) - if name is not None: - result[name] = c - elif isinstance(c, list | tuple | range | np.ndarray): - if dim_names and i < len(dim_names): - result[dim_names[i]] = pd.Index(c, name=dim_names[i]) - else: - raise TypeError( - f"coords entries must be pd.Index or an unnamed sequence " - f"(list / tuple / range / numpy.ndarray); got " - f"{type(c).__name__}. For an xarray DataArray coord, pass " - f"`variable.indexes[]` (a pd.Index) instead." - ) - return result - - -def _named_pandas_to_dataarray(arr: pd.Series | pd.DataFrame) -> DataArray | None: - """ - Convert a pandas Series or DataFrame with fully named axes to a DataArray. - - Returns ``None`` if any axis (or MultiIndex level) is unnamed or - non-string, so the caller can fall back to ``as_dataarray``. - """ - names = list(arr.index.names) - if isinstance(arr, pd.DataFrame): - names += list(arr.columns.names) - if any(not isinstance(n, str) for n in names): - return None - - if isinstance(arr, pd.DataFrame): - if isinstance(arr.index, pd.MultiIndex) or isinstance( - arr.columns, pd.MultiIndex - ): - arr = arr.stack(list(range(arr.columns.nlevels)), future_stack=True) - return arr.to_xarray() - return DataArray(arr) - - return arr.to_xarray() - - # TODO: rename to to_pandas_dataframe def to_dataframe( ds: Dataset, @@ -1069,44 +336,6 @@ def assign_multiindex_safe(ds: Dataset, **fields: Any) -> Dataset: return Dataset({**ds[remainders], **fields}, attrs=ds.attrs) -@overload -def fill_missing_coords(ds: DataArray, fill_helper_dims: bool = False) -> DataArray: ... - - -@overload -def fill_missing_coords(ds: Dataset, fill_helper_dims: bool = False) -> Dataset: ... - - -def fill_missing_coords( - ds: DataArray | Dataset, fill_helper_dims: bool = False -) -> Dataset | DataArray: - """ - Fill coordinates of a xarray Dataset or DataArray with integer coordinates. - - This function fills in the integer coordinates for all dimensions of a - Dataset or DataArray that have no coordinates assigned yet. - - Parameters - ---------- - ds : xarray.DataArray or xarray.Dataset - fill_helper_dims : bool, optional - Whether to fill in integer coordinates for helper dimensions, by default False. - - """ - ds = ds.copy() - if not isinstance(ds, Dataset | DataArray): - raise TypeError(f"Expected xarray.DataArray or xarray.Dataset, got {type(ds)}.") - - skip_dims = [] if fill_helper_dims else HELPER_DIMS - - # Fill in missing integer coordinates - for dim in ds.dims: - if dim not in ds.coords and dim not in skip_dims: - ds.coords[dim] = arange(ds.sizes[dim]) - - return ds - - T = TypeVar("T", Dataset, "Variable", "LinearExpression", "ConstraintBase") @@ -1844,103 +1073,6 @@ def check_common_keys_values(list_of_dicts: list[dict[str, Any]]) -> bool: return all(len({d[k] for d in list_of_dicts if k in d}) == 1 for k in common_keys) -def align( - *objects: LinearExpression | QuadraticExpression | Variable | T_Alignable, - join: JoinOptions = "inner", - copy: bool = True, - indexes: Any = None, - exclude: str | Iterable[Hashable] = frozenset(), - fill_value: Any = dtypes.NA, -) -> tuple[LinearExpression | QuadraticExpression | Variable | T_Alignable, ...]: - """ - Given any number of Variables, Expressions, Dataset and/or DataArray objects, - returns new objects with aligned indexes and dimension sizes. - - Array from the aligned objects are suitable as input to mathematical - operators, because along each dimension they have the same index and size. - - Missing values (if ``join != 'inner'``) are filled with ``fill_value``. - The default fill value is NaN. - - This functions essentially wraps the xarray function - :py:func:`xarray.align`. - - Parameters - ---------- - *objects : Variable, LinearExpression, Dataset or DataArray - Objects to align. - join : {"outer", "inner", "left", "right", "exact", "override"}, optional - Method for joining the indexes of the passed objects along each - dimension: - - - "outer": use the union of object indexes - - "inner": use the intersection of object indexes - - "left": use indexes from the first object with each dimension - - "right": use indexes from the last object with each dimension - - "exact": instead of aligning, raise `ValueError` when indexes to be - aligned are not equal - - "override": if indexes are of same size, rewrite indexes to be - those of the first object with that dimension. Indexes for the same - dimension must have the same size in all objects. - - copy : bool, default: True - If ``copy=True``, data in the return values is always copied. If - ``copy=False`` and reindexing is unnecessary, or can be performed with - only slice operations, then the output may share memory with the input. - In either case, new xarray objects are always returned. - indexes : dict-like, optional - Any indexes explicitly provided with the `indexes` argument should be - used in preference to the aligned indexes. - exclude : str, iterable of hashable or None, optional - Dimensions that must be excluded from alignment - fill_value : scalar or dict-like, optional - Value to use for newly missing values. If a dict-like, maps - variable names to fill values. Use a data array's name to - refer to its values. - - Returns - ------- - aligned : tuple of DataArray or Dataset - Tuple of objects with the same type as `*objects` with aligned - coordinates. - - - """ - from linopy.expressions import LinearExpression, QuadraticExpression - from linopy.variables import Variable - - finisher: list[partial[Any] | Callable[[Any], Any]] = [] - das: list[Any] = [] - for obj in objects: - if isinstance(obj, LinearExpression | QuadraticExpression): - finisher.append(partial(obj.__class__, model=obj.model)) - das.append(obj.data) - elif isinstance(obj, Variable): - finisher.append( - partial( - obj.__class__, - model=obj.model, - name=obj.data.attrs["name"], - skip_broadcast=True, - ) - ) - das.append(obj.data) - else: - finisher.append(lambda x: x) - das.append(obj) - - exclude = frozenset(exclude).union(HELPER_DIMS) - aligned = xr_align( - *das, - join=join, - copy=copy, - indexes=indexes, - exclude=exclude, - fill_value=fill_value, - ) - return tuple([f(da) for f, da in zip(finisher, aligned)]) - - LocT = TypeVar( "LocT", "Dataset", diff --git a/linopy/expressions.py b/linopy/expressions.py index 673eaba9..916f30e9 100644 --- a/linopy/expressions.py +++ b/linopy/expressions.py @@ -44,16 +44,14 @@ from types import EllipsisType, NotImplementedType from linopy import constraints, variables +from linopy.alignment import as_dataarray, broadcast_to_coords, fill_missing_coords from linopy.common import ( EmptyDeprecationWrapper, LocIndexer, - as_dataarray, assign_multiindex_safe, - broadcast_to_coords, check_common_keys_values, check_has_nulls, check_has_nulls_polars, - fill_missing_coords, filter_nulls_polars, format_coord, format_single_expression, @@ -2297,7 +2295,7 @@ def as_expression( model : linopy.Model, optional Assigned model, by default None **kwargs : - Keyword arguments passed to `linopy.common.broadcast_to_coords`. + Keyword arguments passed to `linopy.alignment.broadcast_to_coords`. Returns ------- diff --git a/linopy/model.py b/linopy/model.py index aa0e5d29..4fea3176 100644 --- a/linopy/model.py +++ b/linopy/model.py @@ -27,11 +27,10 @@ from xarray.core.types import T_Chunks from linopy import solvers +from linopy.alignment import as_dataarray, broadcast_to_coords from linopy.common import ( - as_dataarray, assign_multiindex_safe, best_int, - broadcast_to_coords, maybe_replace_signs, replace_by_map, to_path, diff --git a/linopy/variables.py b/linopy/variables.py index 755a3afc..33e3be37 100644 --- a/linopy/variables.py +++ b/linopy/variables.py @@ -31,13 +31,12 @@ from xarray.core.utils import Frozen import linopy.expressions as expressions +from linopy.alignment import as_dataarray, broadcast_to_coords from linopy.common import ( LabelPositionIndex, LocIndexer, VariableLabelIndex, - as_dataarray, assign_multiindex_safe, - broadcast_to_coords, check_has_nulls, check_has_nulls_polars, filter_nulls_polars, diff --git a/test/test_common.py b/test/test_common.py index 692cb910..0fa1c9a1 100644 --- a/test/test_common.py +++ b/test/test_common.py @@ -18,17 +18,19 @@ from xarray.testing.assertions import assert_equal from linopy import EvolvingAPIWarning, LinearExpression, Model, Variable -from linopy.common import ( +from linopy.alignment import ( align, as_dataarray, + broadcast_to_coords, + validate_alignment, +) +from linopy.common import ( assign_multiindex_safe, best_int, - broadcast_to_coords, get_dims_with_index_levels, is_constant, iterate_slices, maybe_group_terms_polars, - validate_alignment, ) from linopy.testing import assert_linequal, assert_varequal from linopy.types import CoordsLike @@ -817,7 +819,7 @@ class TestCoordsToDictRules: @staticmethod def _parse(coords: Any, dims: Any = None) -> dict: - from linopy.common import _coords_to_dict + from linopy.alignment import _coords_to_dict return _coords_to_dict(coords, dims=dims) From 4649c813740c2016ec31e84714263f14b33eab64 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Wed, 3 Jun 2026 09:31:30 +0200 Subject: [PATCH 2/5] test: restructure alignment tests into test_alignment.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit One class per concept in linopy.alignment, mirroring the module's public surface: - TestAsDataarrayFrom{Pandas,Numpy,Scalar,DataArray} + MultiIndexCoords - TestCoordsToDict (the coords-entry naming rules) - TestBroadcastToCoords (strict=False mechanics) - TestMultiIndexProjection (projection values, deprecation warnings, coverage gaps — the legacy/v1 fork point for #717) - TestStrictMode (strict=True contract) - TestValidateAlignment - TestAlign Shared fixtures (mi_index / mi_coords / by_level1) replace the repeated MultiIndex setup; the pandas dims-naming and numpy labeling tests are consolidated into parametrized tables. test_common.py keeps the utility tests. Full suite count unchanged (3202) — no coverage lost. Co-Authored-By: Claude Opus 4.8 (1M context) --- doc/release_notes.rst | 2 +- test/test_alignment.py | 910 +++++++++++++++++++++++++++++++++++++++ test/test_common.py | 942 +---------------------------------------- 3 files changed, 912 insertions(+), 942 deletions(-) create mode 100644 test/test_alignment.py diff --git a/doc/release_notes.rst b/doc/release_notes.rst index ca582462..f0a26521 100644 --- a/doc/release_notes.rst +++ b/doc/release_notes.rst @@ -73,7 +73,7 @@ Most users should keep calling ``model.solve(...)``. If you want more control, y **Internal** -* ``linopy.common`` provides two DataArray conversion helpers: ``as_dataarray`` (convert only) and ``broadcast_to_coords`` (convert and broadcast against ``coords``). The latter takes ``strict`` (default ``True``): any mismatch with ``coords`` raises, naming ``label`` in the error; ``strict=False`` passes mismatches through for downstream xarray alignment. +* New module ``linopy.alignment`` owns conversion, broadcasting, and alignment of user input against coordinates (moved out of ``linopy.common``): ``as_dataarray`` (convert only), ``broadcast_to_coords`` (convert and broadcast against ``coords``; ``strict=True`` by default raises on any mismatch, naming ``label`` in the error), ``validate_alignment``, and ``align``. * Each ``Solver`` subclass now overrides at most three hooks: ``_build_direct`` (build the native model), ``_run_direct`` (run it), and ``_run_file`` (run the solver on an LP/MPS file). File-only solvers (CBC, GLPK, CPLEX, SCIP, Knitro, COPT, MindOpt) only override ``_run_file``. * New ``ConstraintLabelIndex`` cached on ``Model.constraints`` (mirrors the existing ``Variables.label_index``); ``ConstraintBase`` gains ``active_labels()`` and a ``range`` property; ``CSRConstraint`` exposes ``coords``. * ``linopy.common`` gains ``values_to_lookup_array``; the legacy pandas-based helpers ``series_to_lookup_array`` and ``lookup_vals`` are removed. diff --git a/test/test_alignment.py b/test/test_alignment.py new file mode 100644 index 00000000..e8c778ea --- /dev/null +++ b/test/test_alignment.py @@ -0,0 +1,910 @@ +#!/usr/bin/env python3 +""" +Tests for linopy.alignment — conversion, broadcasting, and validation of +user input against coordinates. + +Organized by the module's public surface: + +- ``TestAsDataarrayFrom*`` — :func:`as_dataarray` (convert only) +- ``TestCoordsToDict`` — the coords-entry naming rules +- ``TestBroadcastToCoords`` — ``broadcast_to_coords(strict=False)`` +- ``TestMultiIndexProjection`` — implicit MI-level projection (values, + deprecation warnings, coverage gaps) — the legacy/v1 fork point +- ``TestStrictMode`` — ``broadcast_to_coords(strict=True)`` +- ``TestValidateAlignment`` — the validation primitive +- ``TestAlign`` — symmetric :func:`align` +""" + +import warnings +from collections.abc import Callable +from typing import Any + +import numpy as np +import pandas as pd +import polars as pl +import pytest +import xarray as xr +from xarray import DataArray +from xarray.testing.assertions import assert_equal + +from linopy import EvolvingAPIWarning, LinearExpression, Model, Variable +from linopy.alignment import ( + _coords_to_dict, + align, + as_dataarray, + broadcast_to_coords, + validate_alignment, +) +from linopy.testing import assert_linequal, assert_varequal +from linopy.types import CoordsLike + +# --------------------------------------------------------------------------- +# Shared fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture +def mi_index() -> pd.MultiIndex: + """Named (level1, level2) MultiIndex backing the stacked dim 'dim_3'.""" + idx = pd.MultiIndex.from_product([[1, 2], ["a", "b"]], names=("level1", "level2")) + idx.name = "dim_3" + return idx + + +@pytest.fixture +def mi_coords(mi_index: pd.MultiIndex) -> xr.Coordinates: + """Coordinates of the stacked MultiIndex dim 'dim_3'.""" + return xr.Coordinates.from_pandas_multiindex(mi_index, "dim_3") + + +@pytest.fixture +def by_level1() -> DataArray: + """A constant indexed by level1 only — a partial level set.""" + return DataArray([10.0, 20.0], coords={"level1": [1, 2]}, dims=["level1"]) + + +# --------------------------------------------------------------------------- +# as_dataarray — convert only +# --------------------------------------------------------------------------- + + +class TestAsDataarrayFromPandas: + """Series / DataFrame conversion: pandas axis names vs the dims argument.""" + + @pytest.mark.parametrize( + ("index", "dims", "expected_dim"), + [ + pytest.param([0, 1, 2], None, "dim_0", id="default"), + pytest.param(["a", "b", "c"], ["dim1"], "dim1", id="dims-set"), + pytest.param( + pd.Index(["a", "b", "c"], name="dim1"), [], "dim1", id="dims-given" + ), + pytest.param( + pd.Index(["a", "b", "c"], name="dim1"), + ["other"], + "dim1", + id="pandas-name-has-priority", + ), + pytest.param(["a", "b", "c"], [], "dim_0", id="dims-subset"), + pytest.param( + ["a", "b", "c"], ["dim_a", "other"], "dim_a", id="dims-superset" + ), + ], + ) + def test_series_dim_naming( + self, index: Any, dims: list[str] | None, expected_dim: str + ) -> None: + s = pd.Series([1, 2, 3], index=index) + da = as_dataarray(s, dims=dims) if dims is not None else as_dataarray(s) + assert isinstance(da, DataArray) + assert da.dims == (expected_dim,) + assert list(da.coords[expected_dim].values) == list(s.index) + + @pytest.mark.parametrize( + ("index", "columns", "dims", "expected_dims"), + [ + pytest.param([0, 1], ["A", "B"], None, ("dim_0", "dim_1"), id="default"), + pytest.param( + ["a", "b"], + ["A", "B"], + ("dim1", "dim2"), + ("dim1", "dim2"), + id="dims-set", + ), + pytest.param( + pd.Index(["a", "b"], name="dim1"), + pd.Index(["A", "B"], name="dim2"), + [], + ("dim1", "dim2"), + id="dims-given", + ), + pytest.param( + pd.Index(["a", "b"], name="dim1"), + pd.Index(["A", "B"], name="dim2"), + ["other"], + ("dim1", "dim2"), + id="pandas-name-has-priority", + ), + pytest.param( + ["a", "b"], ["A", "B"], [], ("dim_0", "dim_1"), id="dims-subset" + ), + pytest.param( + ["a", "b"], + ["A", "B"], + ["dim_a", "dim_b", "other"], + ("dim_a", "dim_b"), + id="dims-superset", + ), + ], + ) + def test_dataframe_dim_naming( + self, + index: Any, + columns: Any, + dims: Any, + expected_dims: tuple[str, ...], + ) -> None: + df = pd.DataFrame([[1, 2], [3, 4]], index=index, columns=columns) + da = as_dataarray(df, dims=dims) if dims is not None else as_dataarray(df) + assert isinstance(da, DataArray) + assert da.dims == expected_dims + assert list(da.coords[expected_dims[0]].values) == list(df.index) + assert list(da.coords[expected_dims[1]].values) == list(df.columns) + + def test_series_aligned_coords(self) -> None: + """This should not give out a warning even though coords are given.""" + target_dim = "dim_0" + target_index = ["a", "b", "c"] + s = pd.Series([1, 2, 3], index=target_index) + da = as_dataarray(s, coords=[target_index]) + assert isinstance(da, DataArray) + assert da.dims == (target_dim,) + assert list(da.coords[target_dim].values) == target_index + + da = as_dataarray(s, coords={target_dim: target_index}) + assert isinstance(da, DataArray) + assert da.dims == (target_dim,) + assert list(da.coords[target_dim].values) == target_index + + def test_dataframe_aligned_coords(self) -> None: + """This should not give out a warning even though coords are given.""" + target_dims = ("dim_0", "dim_1") + target_index = ["a", "b"] + target_columns = ["A", "B"] + df = pd.DataFrame([[1, 2], [3, 4]], index=target_index, columns=target_columns) + da = as_dataarray(df, coords=[target_index, target_columns]) + assert isinstance(da, DataArray) + assert da.dims == target_dims + assert list(da.coords[target_dims[0]].values) == target_index + assert list(da.coords[target_dims[1]].values) == target_columns + + coords = dict(zip(target_dims, [target_index, target_columns])) + da = as_dataarray(df, coords=coords) + assert isinstance(da, DataArray) + assert da.dims == target_dims + assert list(da.coords[target_dims[0]].values) == target_index + assert list(da.coords[target_dims[1]].values) == target_columns + + def test_polars_series(self) -> None: + target_dim = "dim_0" + target_index = [0, 1, 2] + s = pl.Series([1, 2, 3]) + da = as_dataarray(s) + assert isinstance(da, DataArray) + assert da.dims == (target_dim,) + assert list(da.coords[target_dim].values) == target_index + + +class TestAsDataarrayFromNumpy: + """ndarray conversion: positional labeling from coords / dims.""" + + arr = np.array([[1, 2], [3, 4]]) + + @pytest.mark.parametrize( + ("coords", "dims", "expected"), + [ + pytest.param( + None, None, {"dim_0": [0, 1], "dim_1": [0, 1]}, id="no-coords-no-dims" + ), + pytest.param( + [["a", "b"], ["A", "B"]], + None, + {"dim_0": ["a", "b"], "dim_1": ["A", "B"]}, + id="coords-list", + ), + pytest.param( + [pd.Index(["a", "b"], name="dim1"), pd.Index(["A", "B"], name="dim2")], + None, + {"dim1": ["a", "b"], "dim2": ["A", "B"]}, + id="coords-named-indexes", + ), + pytest.param( + {"dim_0": ["a", "b"], "dim_2": ["A", "B"]}, + None, + {"dim_0": ["a", "b"], "dim_2": ["A", "B"]}, + id="coords-dict", + ), + pytest.param( + [["a", "b"], ["A", "B"]], + ("dim1", "dim2"), + {"dim1": ["a", "b"], "dim2": ["A", "B"]}, + id="coords-list-and-dims", + ), + pytest.param( + [["a", "b"], ["A", "B"]], + ("dim1", "dim2", "dim3"), + {"dim1": ["a", "b"], "dim2": ["A", "B"]}, + id="dims-superset", + ), + pytest.param( + [["a", "b"], ["A", "B"]], + ["dim0"], + {"dim0": ["a", "b"], "dim_1": ["A", "B"]}, + id="dims-subset", + ), + pytest.param( + [pd.Index(["a", "b"], name="dim1"), pd.Index(["A", "B"], name="dim2")], + ("dim1", "dim2"), + {"dim1": ["a", "b"], "dim2": ["A", "B"]}, + id="named-indexes-and-matching-dims", + ), + pytest.param( + {"dim_0": ["a", "b"], "dim_1": ["A", "B"]}, + ("dim_0", "dim_1"), + {"dim_0": ["a", "b"], "dim_1": ["A", "B"]}, + id="coords-dict-and-matching-dims", + ), + ], + ) + def test_labeling(self, coords: Any, dims: Any, expected: dict[str, list]) -> None: + da = as_dataarray(self.arr, coords=coords, dims=dims) + assert isinstance(da, DataArray) + assert da.dims == tuple(expected) + for dim, values in expected.items(): + assert list(da.coords[dim]) == values + + def test_named_indexes_conflicting_dims_raise(self) -> None: + coords = [pd.Index(["a", "b"], name="dim1"), pd.Index(["A", "B"], name="dim2")] + with pytest.raises(ValueError): + as_dataarray(self.arr, coords=coords, dims=("dim3", "dim4")) + + def test_extra_coord_entries_are_dropped(self) -> None: + """as_dataarray converts only: dims label the axes, extra coord entries are dropped.""" + target_coords = {"dim_0": ["a", "b"], "dim_2": ["A", "B"]} + da = as_dataarray(self.arr, coords=target_coords, dims=("dim_0", "dim_1")) + assert da.dims == ("dim_0", "dim_1") + assert list(da.coords["dim_0"].values) == ["a", "b"] + assert "dim_2" not in da.coords + + +class TestAsDataarrayFromScalar: + """Scalar conversion: numbers expand over coords when given.""" + + @pytest.mark.parametrize( + "num", [1, np.float64(1)], ids=["python-int", "np-float64"] + ) + def test_with_dims_and_coords(self, num: Any) -> None: + da = as_dataarray(num, dims=["dim1"], coords=[["a"]]) + assert isinstance(da, DataArray) + assert da.dims == ("dim1",) + assert list(da.coords["dim1"].values) == ["a"] + + def test_default_dims_coords(self) -> None: + da = as_dataarray(1) + assert isinstance(da, DataArray) + assert da.dims == () + assert da.coords == {} + + def test_with_named_index_coords(self) -> None: + da = as_dataarray(1, coords=[pd.RangeIndex(10, name="a")]) + assert isinstance(da, DataArray) + assert da.dims == ("a",) + assert list(da.coords["a"].values) == list(range(10)) + + +class TestAsDataarrayFromDataArray: + """DataArray inputs pass through; unsupported types raise.""" + + da_in = DataArray( + data=[[1, 2], [3, 4]], + dims=["dim1", "dim2"], + coords={"dim1": ["a", "b"], "dim2": ["A", "B"]}, + ) + + @pytest.mark.parametrize( + "kwargs", + [ + pytest.param( + {"dims": ["dim1", "dim2"], "coords": [["a", "b"], ["A", "B"]]}, + id="matching-dims-and-coords", + ), + pytest.param({}, id="default"), + ], + ) + def test_passthrough(self, kwargs: dict[str, Any]) -> None: + da_out = as_dataarray(self.da_in, **kwargs) + assert isinstance(da_out, DataArray) + assert da_out.dims == self.da_in.dims + assert list(da_out.coords["dim1"].values) == list( + self.da_in.coords["dim1"].values + ) + assert list(da_out.coords["dim2"].values) == list( + self.da_in.coords["dim2"].values + ) + + def test_unsupported_type_raises(self) -> None: + with pytest.raises(TypeError): + as_dataarray(lambda x: 1, dims=["dim1"], coords=[["a"]]) + + def test_does_not_expand_missing_coord_dims(self) -> None: + """as_dataarray converts; only broadcast_to_coords expands missing dims.""" + coords = {"a": [0, 1], "b": [10, 20]} + arr = np.array([1, 2]) + + converted = as_dataarray(arr, coords=coords, dims=["a"]) + assert converted.dims == ("a",) + + broadcast = broadcast_to_coords(arr, coords=coords, dims=["a"], strict=False) + assert broadcast.dims == ("a", "b") + + +class TestAsDataarrayMultiIndexCoords: + """MultiIndex coords inputs: level names must not become extra dims.""" + + station_mi = pd.MultiIndex.from_tuples( + [("a", 1), ("b", 2)], names=["letter", "num"] + ) + + @pytest.mark.parametrize( + ("arr", "expected_values"), + [ + (np.float64(3.0), [3.0, 3.0]), + (3, [3, 3]), + (3.0, [3.0, 3.0]), + (np.array([10.0, 20.0]), [10.0, 20.0]), + ], + ids=["np_number", "python_int", "python_float", "numpy_array"], + ) + def test_input_types(self, arr: object, expected_values: list[float]) -> None: + """Level names in multi-index coords must not be treated as extra dims.""" + source = DataArray( + [1.0, 2.0], coords={"station": self.station_mi}, dims="station" + ) + + da = as_dataarray(arr, coords=source.coords) + + assert da.dims == ("station",) + assert da.shape == (2,) + assert set(da.coords.keys()) == {"station", "letter", "num"} + assert list(da.coords["letter"].values) == ["a", "b"] + assert list(da.coords["num"].values) == [1, 2] + assert da.coords["letter"].dims == ("station",) + assert da.coords["num"].dims == ("station",) + assert list(da.values) == expected_values + + @pytest.mark.parametrize( + "coords_factory", + [ + lambda mi: xr.Coordinates.from_pandas_multiindex(mi, "station"), + lambda mi: {"station": mi}, + lambda mi: ( + DataArray([1.0, 2.0], coords={"station": mi}, dims="station").coords + ), + ], + ids=["xarray_Coordinates", "plain_dict", "dataarray_coords"], + ) + def test_coord_input_forms( + self, coords_factory: Callable[[pd.MultiIndex], CoordsLike] + ) -> None: + """Users may pass a MultiIndex via Coordinates, a dict, or another DataArray's coords.""" + coords = coords_factory(self.station_mi) + + da = as_dataarray(3.0, coords=coords) + + assert da.dims == ("station",) + assert da.shape == (2,) + assert set(da.coords.keys()) == {"station", "letter", "num"} + assert da.coords["letter"].dims == ("station",) + assert da.coords["num"].dims == ("station",) + assert (da.values == 3.0).all() + + def test_explicit_dims_win_over_inference(self) -> None: + """Explicit dims must win over any inference from Coordinates.""" + source = DataArray( + [1.0, 2.0], coords={"station": self.station_mi}, dims="station" + ) + + da = as_dataarray(3.0, coords=source.coords, dims=["station"]) + assert da.dims == ("station",) + assert da.shape == (2,) + assert set(da.coords.keys()) == {"station", "letter", "num"} + + +# --------------------------------------------------------------------------- +# _coords_to_dict — the coords-entry naming rules +# --------------------------------------------------------------------------- + + +class TestCoordsToDict: + """ + One test per row of the ``_coords_to_dict`` rules table. + + Each test name states the rule it pins; the assertions show the + expected outcome. Together they form the executable spec of how + sequence-form ``coords`` entries are named. + """ + + @staticmethod + def _parse(coords: Any, dims: Any = None) -> dict: + + return _coords_to_dict(coords, dims=dims) + + # -- container forms --------------------------------------------------- + + def test_mapping_is_returned_as_shallow_dict_copy(self) -> None: + src = {"x": [0, 1, 2], "y": [10, 20]} + result = self._parse(src) + assert result == src + assert result is not src + + def test_xarray_coordinates_keeps_only_dim_entries(self) -> None: + midx = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=["i", "j"]) + coords = xr.Coordinates.from_pandas_multiindex(midx, "stacked") + result = self._parse(coords) + assert set(result) == {"stacked"} + + # -- pd.Index entries -------------------------------------------------- + + def test_named_pd_index_uses_its_name(self) -> None: + result = self._parse([pd.Index([0, 1, 2], name="x")]) + assert set(result) == {"x"} + + def test_unnamed_pd_index_with_dims_uses_dims(self) -> None: + result = self._parse([pd.Index([0, 1, 2])], dims=["x"]) + assert set(result) == {"x"} + + def test_unnamed_pd_index_without_dims_is_size_only(self) -> None: + # Same as a bare sequence: contributes no dim name; xarray assigns + # ``dim_0`` downstream. + assert self._parse([pd.Index([0, 1, 2])]) == {} + m = Model() + v = m.add_variables(coords=[pd.Index([0, 1, 2])]) + assert v.dims == ("dim_0",) + + # -- pd.MultiIndex entries -------------------------------------------- + + def test_named_multiindex_uses_its_name(self) -> None: + mi = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=["i", "j"]) + mi.name = "multi" + result = self._parse([mi]) + assert set(result) == {"multi"} + + def test_unnamed_multiindex_with_dims_uses_dims(self) -> None: + mi = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=["i", "j"]) + result = self._parse([mi], dims=["multi"]) + assert set(result) == {"multi"} + assert result["multi"].name == "multi" + assert mi.name is None # caller's MultiIndex not mutated + + def test_unnamed_multiindex_without_dims_raises(self) -> None: + mi = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=["i", "j"]) + with pytest.raises(TypeError, match=r"MultiIndex.*must have \.name set"): + self._parse([mi]) + + # -- bare sequence entries -------------------------------------------- + + @pytest.mark.parametrize( + "entry", + [[0, 1, 2], (0, 1, 2), range(3), np.array([0, 1, 2])], + ids=["list", "tuple", "range", "ndarray"], + ) + def test_bare_sequence_with_dims_uses_dims(self, entry: Any) -> None: + result = self._parse([entry], dims=["x"]) + assert set(result) == {"x"} + + @pytest.mark.parametrize( + "entry", + [[0, 1, 2], (0, 1, 2), range(3), np.array([0, 1, 2])], + ids=["list", "tuple", "range", "ndarray"], + ) + def test_bare_sequence_without_dims_is_silently_skipped(self, entry: Any) -> None: + assert self._parse([entry]) == {} + + @pytest.mark.parametrize( + "entry", + [[0, 1, 2], (0, 1, 2), range(3), np.array([0, 1, 2])], + ids=["list", "tuple", "range", "ndarray"], + ) + def test_bare_sequence_without_dims_falls_through_to_xarray_dim_0( + self, entry: Any + ) -> None: + m = Model() + v = m.add_variables(coords=[entry]) + assert v.dims == ("dim_0",) + + # -- unsupported entries ---------------------------------------------- + + def test_dataarray_entry_raises(self) -> None: + with pytest.raises(TypeError, match=r"coords entries must be pd\.Index"): + self._parse([DataArray([0, 1, 2], dims=["x"])]) + + def test_unknown_type_entry_raises(self) -> None: + class Foo: ... + + with pytest.raises(TypeError, match=r"coords entries must be pd\.Index"): + self._parse([Foo()]) + + +# --------------------------------------------------------------------------- +# broadcast_to_coords(strict=False) — broadcast mechanics, mismatches pass +# --------------------------------------------------------------------------- + + +class TestBroadcastToCoords: + """strict=False: dims are made to agree; entry mismatches pass through.""" + + def test_preserves_extra_dims(self) -> None: + """Extra dims in the input are not rejected — they broadcast downstream.""" + arr = DataArray( + [[1, 2], [3, 4], [5, 6]], + dims=["a", "t"], + coords={"a": [0, 1, 2], "t": [10, 20]}, + ) + coords = {"a": [0, 1, 2]} + da = broadcast_to_coords(arr, coords=coords, strict=False) + assert set(da.dims) == {"a", "t"} + assert list(da.coords["t"].values) == [10, 20] + + def test_keeps_disjoint_shared_dim_values(self) -> None: + """Different value sets on a shared dim are passed through (xr.align handles).""" + arr = DataArray([1, 2, 3, 4, 5], dims=["a"], coords={"a": [0, 1, 2, 3, 4]}) + coords = {"a": [2, 3]} + da = broadcast_to_coords(arr, coords=coords, strict=False) + # No exception, no reindex; downstream alignment intersects. + assert list(da.coords["a"].values) == [0, 1, 2, 3, 4] + + def test_extra_coord_entries_broadcast_in(self) -> None: + """Coords is source of truth: extra coord entries broadcast into the result.""" + target_coords = {"dim_0": ["a", "b"], "dim_2": ["A", "B"]} + arr = np.array([[1, 2], [3, 4]]) + da = broadcast_to_coords( + arr, coords=target_coords, dims=("dim_0", "dim_1"), strict=False + ) + # dims labels the positional axes; coords adds dim_2 by broadcast. + assert set(da.dims) == {"dim_0", "dim_1", "dim_2"} + assert list(da.coords["dim_0"].values) == ["a", "b"] + assert list(da.coords["dim_2"].values) == ["A", "B"] + + +# --------------------------------------------------------------------------- +# Implicit MultiIndex-level projection — the legacy/v1 fork point +# --------------------------------------------------------------------------- + + +class TestMultiIndexProjection: + """ + Inputs indexed by levels of a stacked MultiIndex dim are projected onto it. + + Implicit projection is deprecated (scenario B, #732/#737): it warns under + both modes today and will raise under the v1 convention. Coverage gaps + raise under strict mode. When #717 lands, the deprecation tests here fork + into legacy (warn) and v1 (raise) variants. + """ + + def test_broadcasts_single_level( + self, mi_coords: xr.Coordinates, by_level1: DataArray + ) -> None: + """ + A constant indexed by one MultiIndex level broadcasts across the MI dim. + + PyPSA multi-investment multiplies an expression over a (period, timestep) + 'snapshot' MultiIndex by a weighting indexed only by 'period'. Each level + combination of the MultiIndex must pick up its level's value. + """ + with pytest.warns(EvolvingAPIWarning, match=r"broadcasting level subset"): + da = broadcast_to_coords( + by_level1, coords=mi_coords, dims=["dim_3"], strict=False + ) + + assert da.dims == ("dim_3",) + assert isinstance(da.indexes["dim_3"], pd.MultiIndex) + assert da.sel(dim_3=(1, "a")).item() == 10.0 + assert da.sel(dim_3=(1, "b")).item() == 10.0 + assert da.sel(dim_3=(2, "a")).item() == 20.0 + assert da.sel(dim_3=(2, "b")).item() == 20.0 + + def test_stacks_full_levels(self, mi_coords: xr.Coordinates) -> None: + """ + A constant indexed by all MI level names stacks element-wise into the MI dim. + + PyPSA's storage_weightings is a pandas Series over a (period, timestep) + MultiIndex subset (the last snapshot of each period); it must align onto + the matching level combinations of the 'snapshot' MultiIndex. Combinations + the subset does not cover are left as NaN (broadcast path). + """ + subset = pd.MultiIndex.from_tuples( + [(1, "a"), (2, "b")], names=["level1", "level2"] + ) + weights = pd.Series([10.0, 20.0], index=subset) + + with pytest.warns( + EvolvingAPIWarning, match=r"filling uncovered level combinations" + ): + da = broadcast_to_coords( + weights, coords=mi_coords, dims=["dim_3"], strict=False + ) + + assert da.dims == ("dim_3",) + assert isinstance(da.indexes["dim_3"], pd.MultiIndex) + assert da.sel(dim_3=(1, "a")).item() == 10.0 + assert da.sel(dim_3=(2, "b")).item() == 20.0 + assert np.isnan(da.sel(dim_3=(1, "b")).item()) + assert np.isnan(da.sel(dim_3=(2, "a")).item()) + + def test_full_coverage_is_silent( + self, mi_coords: xr.Coordinates, mi_index: pd.MultiIndex + ) -> None: + """ + Full-level, fully-covering alignment is convention-clean → no warning. + + Aligning an input that reconstructs the whole MultiIndex onto its dim is + equivalent to the input already carrying that dim (future §11), so it must + not emit the EvolvingAPIWarning the partial/gap projections do. + """ + full = pd.Series([1.0, 2.0, 3.0, 4.0], index=mi_index) + + with warnings.catch_warnings(): + warnings.simplefilter("error", EvolvingAPIWarning) + da = broadcast_to_coords( + full, coords=mi_coords, dims=["dim_3"], strict=False + ) + + assert da.dims == ("dim_3",) + assert da.values.tolist() == [1.0, 2.0, 3.0, 4.0] + + def test_expands_missing_mi_dim_keeps_levels(self) -> None: + """ + Broadcasting a missing MultiIndex dim must keep its level coords intact. + + expand_dims drops MultiIndex level coords, leaving a degenerate flat + index that fails to align downstream (PyPSA multi-investment regression). + """ + midx = pd.MultiIndex.from_tuples( + [(2020, 0), (2020, 1), (2030, 0), (2030, 1)], + names=["period", "timestep"], + ) + midx.name = "snapshot" + sc = xr.Coordinates.from_pandas_multiindex(midx, "snapshot") + labels = DataArray( + [[1], [2], [3], [4]], + coords={**sc, "name": ["1"]}, + dims=["snapshot", "name"], + ) + coeff = broadcast_to_coords( + DataArray([1.0], coords={"name": ["1"]}, dims=["name"]), + coords=labels.coords, + dims=labels.dims, + strict=False, + ) + assert set(coeff.xindexes) == {"snapshot", "period", "timestep", "name"} + coeff.reindex_like(labels, fill_value=0) + + def test_ambiguous_level_raises(self) -> None: + """A level name shared by two MI dims cannot be resolved.""" + a = pd.MultiIndex.from_product([[1, 2], ["a", "b"]], names=("shared", "x")) + b = pd.MultiIndex.from_product([[1, 2], ["c", "d"]], names=("shared", "y")) + coords = { + **xr.Coordinates.from_pandas_multiindex(a, "dimA"), + **xr.Coordinates.from_pandas_multiindex(b, "dimB"), + } + arr = DataArray([1.0, 2.0], coords={"shared": [1, 2]}, dims=["shared"]) + + with pytest.raises(ValueError, match=r"shared.*shared by MultiIndex"): + broadcast_to_coords(arr, coords=coords, strict=False) + + def test_missing_level_value_raises(self, mi_coords: xr.Coordinates) -> None: + """A level value absent from the input cannot be broadcast.""" + by_level1 = DataArray([10.0, 20.0], coords={"level1": [1, 9]}, dims=["level1"]) + + with pytest.raises(ValueError, match=r"Cannot align level.*is missing"): + broadcast_to_coords( + by_level1, coords=mi_coords, dims=["dim_3"], strict=False + ) + + def test_unrelated_mi_series_still_unstacks(self) -> None: + """A MI Series whose levels match no coords MI dim keeps unstacking.""" + sub = pd.MultiIndex.from_product([["p", "q"], [1, 2]], names=["foo", "bar"]) + series = pd.Series([1.0, 2.0, 3.0, 4.0], index=sub) + + da = broadcast_to_coords(series, coords={"time": [0, 1, 2]}, strict=False) + + assert set(da.dims) == {"time", "foo", "bar"} + + # --- strict-mode policy on MI projections (deprecation / gaps) --- + + def test_strict_partial_level_warns( + self, mi_coords: xr.Coordinates, by_level1: DataArray + ) -> None: + """ + Per-level bounds broadcast across the MI dim, with the deprecation warning. + + Scenario B (#732 / #737 discussion): implicit MI-level projection is + deprecated everywhere, including the strict (bounds/mask) path, and will + raise under the v1 convention. + """ + with pytest.warns(EvolvingAPIWarning, match=r"broadcasting level subset"): + da = broadcast_to_coords( + by_level1, mi_coords, dims=["dim_3"], label="lower bound" + ) + + assert da.sel(dim_3=(1, "b")).item() == 10.0 + assert da.sel(dim_3=(2, "a")).item() == 20.0 + + def test_strict_rejects_coverage_gap(self, mi_coords: xr.Coordinates) -> None: + """A coverage gap warns on the broadcast rung but raises on the strict rung.""" + subset = pd.MultiIndex.from_tuples( + [(1, "a"), (2, "b")], names=["level1", "level2"] + ) + weights = pd.Series([10.0, 20.0], index=subset) + + with pytest.warns( + EvolvingAPIWarning, match=r"filling uncovered level combinations" + ): + broadcast_to_coords(weights, coords=mi_coords, dims=["dim_3"], strict=False) + + with pytest.raises(ValueError, match=r"no value for .* level combination"): + broadcast_to_coords(weights, mi_coords, dims=["dim_3"], label="lower bound") + + def test_strict_rejects_unnamed_mi_mismatch(self) -> None: + """ + A MultiIndex input with unnamed levels cannot be projected by level name, + so it keeps its own index under the coords dim. The strict rung must still + reject it when its level combinations don't cover coords, just as the + named-level coverage-gap case does. + """ + idx = pd.MultiIndex.from_product([[2020, 2030], ["t1", "t2"]], names=("p", "t")) + idx.name = "snapshot" + coords = xr.Coordinates.from_pandas_multiindex(idx, "snapshot") + sparse_unnamed = pd.Series({(2020, "t1"): 1.0, (2030, "t2"): 2.0}) + + with pytest.raises(ValueError, match=r"MultiIndex for dimension 'snapshot'"): + broadcast_to_coords( + sparse_unnamed, coords, dims=["snapshot"], label="lower bound" + ) + + +# --------------------------------------------------------------------------- +# broadcast_to_coords(strict=True) — the contract +# --------------------------------------------------------------------------- + + +class TestStrictMode: + """strict=True: anything broadcasting can't resolve raises, naming label.""" + + def test_extra_dims_pass_loose_fail_strict(self) -> None: + """Extra dims pass through the broadcast rung but fail the strict rung.""" + arr = DataArray( + [[1, 2], [3, 4]], dims=["a", "t"], coords={"a": [0, 1], "t": [10, 20]} + ) + coords = {"a": [0, 1]} + + da = broadcast_to_coords(arr, coords=coords, strict=False) + assert set(da.dims) == {"a", "t"} + + with pytest.raises(ValueError, match=r"not declared in coords"): + broadcast_to_coords(arr, coords, label="lower bound") + + def test_requires_label(self) -> None: + """strict=True without label raises: errors must name their subject.""" + with pytest.raises(TypeError, match=r"requires `label`"): + broadcast_to_coords(np.array([1, 2]), {"x": [0, 1]}) # type: ignore[call-overload] + + def test_wraps_conversion_errors(self) -> None: + with pytest.raises(ValueError, match=r"lower bound could not be aligned"): + broadcast_to_coords(np.array([1, 2]), {"x": [0, 1, 2]}, label="lower bound") + + def test_preserves_type_errors(self) -> None: + """Unsupported input types stay TypeError (don't become ValueError).""" + with pytest.raises(TypeError, match=r"lower bound could not be aligned"): + broadcast_to_coords(lambda x: x, {"x": [0, 1, 2]}, label="lower bound") + + def test_does_not_relabel_coords_errors(self) -> None: + """Coords-side TypeError carries its own message, not the value label.""" + mi = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=["i", "j"]) + with pytest.raises(TypeError, match=r"MultiIndex.*must have \.name set"): + broadcast_to_coords(np.array([1, 2, 3, 4]), [mi], label="lower bound") + + +# --------------------------------------------------------------------------- +# validate_alignment — the validation primitive +# --------------------------------------------------------------------------- + + +class TestValidateAlignment: + """Raise when arr is incompatible with coords; no-op otherwise.""" + + def test_rejects_extra_dims(self) -> None: + arr = DataArray( + [[1, 2], [3, 4]], dims=["a", "b"], coords={"a": [0, 1], "b": [0, 1]} + ) + with pytest.raises(ValueError, match=r"not declared in coords"): + validate_alignment(arr, {"a": [0, 1]}) + + def test_rejects_value_mismatch(self) -> None: + arr = DataArray([1, 2, 3], dims=["a"], coords={"a": [0, 1, 2]}) + with pytest.raises(ValueError, match="do not match coords"): + validate_alignment(arr, {"a": [10, 20, 30]}) + + def test_allows_subset_dims(self) -> None: + """arr.dims ⊂ coords.dims is fine (broadcasting fills the missing dim).""" + arr = DataArray([1, 2, 3], dims=["a"], coords={"a": [0, 1, 2]}) + validate_alignment(arr, {"a": [0, 1, 2], "b": [10, 20]}) # no raise + + def test_unnamed_coords_and_dims(self) -> None: + """coords=[[...]], dims=[...] enforces the same contract as a named mapping.""" + arr = DataArray([1, 2, 3], dims=["x"], coords={"x": [0, 1, 2]}) + validate_alignment(arr, [[0, 1, 2]], dims=["x"]) # no raise + + bad = DataArray( + [[1, 2], [3, 4]], dims=["x", "y"], coords={"x": [0, 1], "y": [0, 1]} + ) + with pytest.raises(ValueError, match=r"not declared in coords"): + validate_alignment(bad, [[0, 1]], dims=["x"]) + + def test_label_in_error(self) -> None: + arr = DataArray( + [[1, 2], [3, 4]], dims=["a", "b"], coords={"a": [0, 1], "b": [0, 1]} + ) + with pytest.raises(ValueError, match=r"lower bound has dimension\(s\) \['b'\]"): + validate_alignment(arr, {"a": [0, 1]}, label="lower bound") + + +# --------------------------------------------------------------------------- +# align — the symmetric counterpart (wraps xarray.align) +# --------------------------------------------------------------------------- + + +class TestAlign: + """align() conforms multiple linopy / xarray objects to common coords.""" + + def test_align(self, x: Variable, u: Variable) -> None: + alpha = xr.DataArray([1, 2], [[1, 2]]) + beta = xr.DataArray( + [1, 2, 3], + [ + ( + "dim_3", + pd.MultiIndex.from_tuples( + [(1, "b"), (2, "b"), (1, "c")], names=["level1", "level2"] + ), + ) + ], + ) + + # inner join + x_obs, alpha_obs = align(x, alpha) + assert isinstance(x_obs, Variable) + assert x_obs.shape == alpha_obs.shape == (1,) + assert_varequal(x_obs, x.loc[[1]]) + + # left-join + x_obs, alpha_obs = align(x, alpha, join="left") + assert x_obs.shape == alpha_obs.shape == (2,) + assert isinstance(x_obs, Variable) + assert_varequal(x_obs, x) + assert_equal(alpha_obs, DataArray([np.nan, 1], [[0, 1]])) + + # multiindex + beta_obs, u_obs = align(beta, u) + assert u_obs.shape == beta_obs.shape == (2,) + assert isinstance(u_obs, Variable) + assert_varequal(u_obs, u.loc[[(1, "b"), (2, "b")]]) + assert_equal(beta_obs, beta.loc[[(1, "b"), (2, "b")]]) + + # with linear expression + expr = 20 * x + x_obs, expr_obs, alpha_obs = align(x, expr, alpha) + assert x_obs.shape == alpha_obs.shape == (1,) + assert expr_obs.shape == (1, 1) # _term dim + assert isinstance(expr_obs, LinearExpression) + assert_linequal(expr_obs, expr.loc[[1]]) diff --git a/test/test_common.py b/test/test_common.py index 0fa1c9a1..c2f78996 100644 --- a/test/test_common.py +++ b/test/test_common.py @@ -5,25 +5,13 @@ @author: fabian """ -import warnings -from collections.abc import Callable -from typing import Any - import numpy as np import pandas as pd import polars as pl import pytest import xarray as xr -from xarray import DataArray -from xarray.testing.assertions import assert_equal -from linopy import EvolvingAPIWarning, LinearExpression, Model, Variable -from linopy.alignment import ( - align, - as_dataarray, - broadcast_to_coords, - validate_alignment, -) +from linopy import Model from linopy.common import ( assign_multiindex_safe, best_int, @@ -32,891 +20,6 @@ iterate_slices, maybe_group_terms_polars, ) -from linopy.testing import assert_linequal, assert_varequal -from linopy.types import CoordsLike - - -def test_as_dataarray_with_series_dims_default() -> None: - target_dim = "dim_0" - target_index = [0, 1, 2] - s = pd.Series([1, 2, 3]) - da = as_dataarray(s) - assert isinstance(da, DataArray) - assert da.dims == (target_dim,) - assert list(da.coords[target_dim].values) == target_index - - -def test_as_dataarray_with_series_dims_set() -> None: - target_dim = "dim1" - target_index = ["a", "b", "c"] - s = pd.Series([1, 2, 3], index=target_index) - dims = [target_dim] - da = as_dataarray(s, dims=dims) - assert isinstance(da, DataArray) - assert da.dims == (target_dim,) - assert list(da.coords[target_dim].values) == target_index - - -def test_as_dataarray_with_series_dims_given() -> None: - target_dim = "dim1" - target_index = ["a", "b", "c"] - index = pd.Index(target_index, name=target_dim) - s = pd.Series([1, 2, 3], index=index) - dims: list[str] = [] - da = as_dataarray(s, dims=dims) - assert isinstance(da, DataArray) - assert da.dims == (target_dim,) - assert list(da.coords[target_dim].values) == target_index - - -def test_as_dataarray_with_series_dims_priority() -> None: - """The dimension name from the pandas object should have priority.""" - target_dim = "dim1" - target_index = ["a", "b", "c"] - index = pd.Index(target_index, name=target_dim) - s = pd.Series([1, 2, 3], index=index) - dims = ["other"] - da = as_dataarray(s, dims=dims) - assert isinstance(da, DataArray) - assert da.dims == (target_dim,) - assert list(da.coords[target_dim].values) == target_index - - -def test_as_dataarray_with_series_dims_subset() -> None: - target_dim = "dim_0" - target_index = ["a", "b", "c"] - s = pd.Series([1, 2, 3], index=target_index) - dims: list[str] = [] - da = as_dataarray(s, dims=dims) - assert isinstance(da, DataArray) - assert da.dims == (target_dim,) - assert list(da.coords[target_dim].values) == target_index - - -def test_as_dataarray_with_series_dims_superset() -> None: - target_dim = "dim_a" - target_index = ["a", "b", "c"] - s = pd.Series([1, 2, 3], index=target_index) - dims = [target_dim, "other"] - da = as_dataarray(s, dims=dims) - assert isinstance(da, DataArray) - assert da.dims == (target_dim,) - assert list(da.coords[target_dim].values) == target_index - - -def test_as_dataarray_with_series_aligned_coords() -> None: - """This should not give out a warning even though coords are given.""" - target_dim = "dim_0" - target_index = ["a", "b", "c"] - s = pd.Series([1, 2, 3], index=target_index) - da = as_dataarray(s, coords=[target_index]) - assert isinstance(da, DataArray) - assert da.dims == (target_dim,) - assert list(da.coords[target_dim].values) == target_index - - da = as_dataarray(s, coords={target_dim: target_index}) - assert isinstance(da, DataArray) - assert da.dims == (target_dim,) - assert list(da.coords[target_dim].values) == target_index - - -def test_as_dataarray_with_pl_series_dims_default() -> None: - target_dim = "dim_0" - target_index = [0, 1, 2] - s = pl.Series([1, 2, 3]) - da = as_dataarray(s) - assert isinstance(da, DataArray) - assert da.dims == (target_dim,) - assert list(da.coords[target_dim].values) == target_index - - -def test_as_dataarray_dataframe_dims_default() -> None: - target_dims = ("dim_0", "dim_1") - target_index = [0, 1] - target_columns = ["A", "B"] - df = pd.DataFrame([[1, 2], [3, 4]], index=target_index, columns=target_columns) - da = as_dataarray(df) - assert isinstance(da, DataArray) - assert da.dims == target_dims - assert list(da.coords[target_dims[0]].values) == target_index - assert list(da.coords[target_dims[1]].values) == target_columns - - -def test_as_dataarray_dataframe_dims_set() -> None: - target_dims = ("dim1", "dim2") - target_index = ["a", "b"] - target_columns = ["A", "B"] - df = pd.DataFrame([[1, 2], [3, 4]], index=target_index, columns=target_columns) - da = as_dataarray(df, dims=target_dims) - assert isinstance(da, DataArray) - assert da.dims == target_dims - assert list(da.coords[target_dims[0]].values) == target_index - assert list(da.coords[target_dims[1]].values) == target_columns - - -def test_as_dataarray_dataframe_dims_given() -> None: - target_dims = ("dim1", "dim2") - target_index = ["a", "b"] - target_columns = ["A", "B"] - index = pd.Index(target_index, name=target_dims[0]) - columns = pd.Index(target_columns, name=target_dims[1]) - df = pd.DataFrame([[1, 2], [3, 4]], index=index, columns=columns) - dims: list[str] = [] - da = as_dataarray(df, dims=dims) - assert isinstance(da, DataArray) - assert da.dims == target_dims - assert list(da.coords[target_dims[0]].values) == target_index - assert list(da.coords[target_dims[1]].values) == target_columns - - -def test_as_dataarray_dataframe_dims_priority() -> None: - """The dimension name from the pandas object should have priority.""" - target_dims = ("dim1", "dim2") - target_index = ["a", "b"] - target_columns = ["A", "B"] - index = pd.Index(target_index, name=target_dims[0]) - columns = pd.Index(target_columns, name=target_dims[1]) - df = pd.DataFrame([[1, 2], [3, 4]], index=index, columns=columns) - dims = ["other"] - da = as_dataarray(df, dims=dims) - assert isinstance(da, DataArray) - assert da.dims == target_dims - assert list(da.coords[target_dims[0]].values) == target_index - assert list(da.coords[target_dims[1]].values) == target_columns - - -def test_as_dataarray_dataframe_dims_subset() -> None: - target_dims = ("dim_0", "dim_1") - target_index = ["a", "b"] - target_columns = ["A", "B"] - df = pd.DataFrame([[1, 2], [3, 4]], index=target_index, columns=target_columns) - dims: list[str] = [] - da = as_dataarray(df, dims=dims) - assert isinstance(da, DataArray) - assert da.dims == target_dims - assert list(da.coords[target_dims[0]].values) == target_index - assert list(da.coords[target_dims[1]].values) == target_columns - - -def test_as_dataarray_dataframe_dims_superset() -> None: - target_dims = ("dim_a", "dim_b") - target_index = ["a", "b"] - target_columns = ["A", "B"] - df = pd.DataFrame([[1, 2], [3, 4]], index=target_index, columns=target_columns) - dims = [*target_dims, "other"] - da = as_dataarray(df, dims=dims) - assert isinstance(da, DataArray) - assert da.dims == target_dims - assert list(da.coords[target_dims[0]].values) == target_index - assert list(da.coords[target_dims[1]].values) == target_columns - - -def test_as_dataarray_dataframe_aligned_coords() -> None: - """This should not give out a warning even though coords are given.""" - target_dims = ("dim_0", "dim_1") - target_index = ["a", "b"] - target_columns = ["A", "B"] - df = pd.DataFrame([[1, 2], [3, 4]], index=target_index, columns=target_columns) - da = as_dataarray(df, coords=[target_index, target_columns]) - assert isinstance(da, DataArray) - assert da.dims == target_dims - assert list(da.coords[target_dims[0]].values) == target_index - assert list(da.coords[target_dims[1]].values) == target_columns - - coords = dict(zip(target_dims, [target_index, target_columns])) - da = as_dataarray(df, coords=coords) - assert isinstance(da, DataArray) - assert da.dims == target_dims - assert list(da.coords[target_dims[0]].values) == target_index - assert list(da.coords[target_dims[1]].values) == target_columns - - -def test_as_dataarray_with_ndarray_no_coords_no_dims() -> None: - target_dims = ("dim_0", "dim_1") - target_coords = [[0, 1], [0, 1]] - arr = np.array([[1, 2], [3, 4]]) - da = as_dataarray(arr) - assert isinstance(da, DataArray) - assert da.dims == target_dims - for i, dim in enumerate(target_dims): - assert list(da.coords[dim]) == target_coords[i] - - -def test_as_dataarray_with_ndarray_coords_list_no_dims() -> None: - target_dims = ("dim_0", "dim_1") - target_coords = [["a", "b"], ["A", "B"]] - arr = np.array([[1, 2], [3, 4]]) - da = as_dataarray(arr, coords=target_coords) - assert isinstance(da, DataArray) - assert da.dims == target_dims - for i, dim in enumerate(target_dims): - assert list(da.coords[dim]) == target_coords[i] - - -def test_as_dataarray_with_ndarray_coords_indexes_no_dims() -> None: - target_dims = ("dim1", "dim2") - target_coords = [ - pd.Index(["a", "b"], name="dim1"), - pd.Index(["A", "B"], name="dim2"), - ] - arr = np.array([[1, 2], [3, 4]]) - da = as_dataarray(arr, coords=target_coords) - assert isinstance(da, DataArray) - assert da.dims == target_dims - for i, dim in enumerate(target_dims): - assert list(da.coords[dim]) == list(target_coords[i]) - - -def test_as_dataarray_with_ndarray_coords_dict_set_no_dims() -> None: - """If no dims are given and coords are a dict, the keys of the dict should be used as dims.""" - target_dims = ("dim_0", "dim_2") - target_coords = {"dim_0": ["a", "b"], "dim_2": ["A", "B"]} - arr = np.array([[1, 2], [3, 4]]) - da = as_dataarray(arr, coords=target_coords) - assert isinstance(da, DataArray) - assert da.dims == target_dims - for dim in target_dims: - assert list(da.coords[dim]) == target_coords[dim] - - -def test_as_dataarray_with_ndarray_coords_list_dims() -> None: - target_dims = ("dim1", "dim2") - target_coords = [["a", "b"], ["A", "B"]] - arr = np.array([[1, 2], [3, 4]]) - da = as_dataarray(arr, coords=target_coords, dims=target_dims) - assert isinstance(da, DataArray) - assert da.dims == target_dims - for i, dim in enumerate(target_dims): - assert list(da.coords[dim]) == target_coords[i] - - -def test_as_dataarray_with_ndarray_coords_list_dims_superset() -> None: - target_dims = ("dim1", "dim2") - target_coords = [["a", "b"], ["A", "B"]] - arr = np.array([[1, 2], [3, 4]]) - dims = [*target_dims, "dim3"] - da = as_dataarray(arr, coords=target_coords, dims=dims) - assert isinstance(da, DataArray) - assert da.dims == target_dims - for i, dim in enumerate(target_dims): - assert list(da.coords[dim]) == target_coords[i] - - -def test_as_dataarray_with_ndarray_coords_list_dims_subset() -> None: - target_dims = ("dim0", "dim_1") - target_coords = [["a", "b"], ["A", "B"]] - arr = np.array([[1, 2], [3, 4]]) - dims = ["dim0"] - da = as_dataarray(arr, coords=target_coords, dims=dims) - assert isinstance(da, DataArray) - assert da.dims == target_dims - for i, dim in enumerate(target_dims): - assert list(da.coords[dim]) == target_coords[i] - - -def test_as_dataarray_with_ndarray_coords_indexes_dims_aligned() -> None: - target_dims = ("dim1", "dim2") - target_coords = [ - pd.Index(["a", "b"], name="dim1"), - pd.Index(["A", "B"], name="dim2"), - ] - arr = np.array([[1, 2], [3, 4]]) - da = as_dataarray(arr, coords=target_coords, dims=target_dims) - assert isinstance(da, DataArray) - assert da.dims == target_dims - for i, dim in enumerate(target_dims): - assert list(da.coords[dim]) == list(target_coords[i]) - - -def test_as_dataarray_with_ndarray_coords_indexes_dims_not_aligned() -> None: - target_dims = ("dim3", "dim4") - target_coords = [ - pd.Index(["a", "b"], name="dim1"), - pd.Index(["A", "B"], name="dim2"), - ] - arr = np.array([[1, 2], [3, 4]]) - with pytest.raises(ValueError): - as_dataarray(arr, coords=target_coords, dims=target_dims) - - -def test_as_dataarray_with_ndarray_coords_dict_dims_aligned() -> None: - target_dims = ("dim_0", "dim_1") - target_coords = {"dim_0": ["a", "b"], "dim_1": ["A", "B"]} - arr = np.array([[1, 2], [3, 4]]) - da = as_dataarray(arr, coords=target_coords, dims=target_dims) - assert isinstance(da, DataArray) - assert da.dims == target_dims - for dim in target_dims: - assert list(da.coords[dim]) == target_coords[dim] - - -def test_as_dataarray_with_ndarray_coords_dict_set_dims_not_aligned() -> None: - """as_dataarray converts only: dims label the axes, extra coord entries are dropped.""" - target_dims = ("dim_0", "dim_1") - target_coords = {"dim_0": ["a", "b"], "dim_2": ["A", "B"]} - arr = np.array([[1, 2], [3, 4]]) - da = as_dataarray(arr, coords=target_coords, dims=target_dims) - assert da.dims == target_dims - assert list(da.coords["dim_0"].values) == ["a", "b"] - assert "dim_2" not in da.coords - - -def test_broadcast_to_coords_with_ndarray_coords_dict_set_dims_not_aligned() -> None: - """Coords is source of truth: extra coord entries broadcast into the result.""" - target_dims = ("dim_0", "dim_1") - target_coords = {"dim_0": ["a", "b"], "dim_2": ["A", "B"]} - arr = np.array([[1, 2], [3, 4]]) - da = broadcast_to_coords(arr, coords=target_coords, dims=target_dims, strict=False) - # dims labels the positional axes; coords adds dim_2 by broadcast. - assert set(da.dims) == {"dim_0", "dim_1", "dim_2"} - assert list(da.coords["dim_0"].values) == ["a", "b"] - assert list(da.coords["dim_2"].values) == ["A", "B"] - - -def test_as_dataarray_with_number() -> None: - num = 1 - da = as_dataarray(num, dims=["dim1"], coords=[["a"]]) - assert isinstance(da, DataArray) - assert da.dims == ("dim1",) - assert list(da.coords["dim1"].values) == ["a"] - - -def test_as_dataarray_with_np_number() -> None: - num = np.float64(1) - da = as_dataarray(num, dims=["dim1"], coords=[["a"]]) - assert isinstance(da, DataArray) - assert da.dims == ("dim1",) - assert list(da.coords["dim1"].values) == ["a"] - - -def test_as_dataarray_with_number_default_dims_coords() -> None: - num = 1 - da = as_dataarray(num) - assert isinstance(da, DataArray) - assert da.dims == () - assert da.coords == {} - - -def test_as_dataarray_with_number_and_coords() -> None: - num = 1 - da = as_dataarray(num, coords=[pd.RangeIndex(10, name="a")]) - assert isinstance(da, DataArray) - assert da.dims == ("a",) - assert list(da.coords["a"].values) == list(range(10)) - - -@pytest.mark.parametrize( - ("arr", "expected_values"), - [ - (np.float64(3.0), [3.0, 3.0]), - (3, [3, 3]), - (3.0, [3.0, 3.0]), - (np.array([10.0, 20.0]), [10.0, 20.0]), - ], - ids=["np_number", "python_int", "python_float", "numpy_array"], -) -def test_as_dataarray_with_multiindex_coords( - arr: object, expected_values: list[float] -) -> None: - """Level names in multi-index coords must not be treated as extra dims.""" - mi = pd.MultiIndex.from_tuples([("a", 1), ("b", 2)], names=["letter", "num"]) - source = DataArray([1.0, 2.0], coords={"station": mi}, dims="station") - - da = as_dataarray(arr, coords=source.coords) - - assert da.dims == ("station",) - assert da.shape == (2,) - assert set(da.coords.keys()) == {"station", "letter", "num"} - assert list(da.coords["letter"].values) == ["a", "b"] - assert list(da.coords["num"].values) == [1, 2] - assert da.coords["letter"].dims == ("station",) - assert da.coords["num"].dims == ("station",) - assert list(da.values) == expected_values - - -@pytest.mark.parametrize( - "coords_factory", - [ - lambda mi: xr.Coordinates.from_pandas_multiindex(mi, "station"), - lambda mi: {"station": mi}, - lambda mi: DataArray([1.0, 2.0], coords={"station": mi}, dims="station").coords, - ], - ids=["xarray_Coordinates", "plain_dict", "dataarray_coords"], -) -def test_as_dataarray_with_various_multiindex_coord_inputs( - coords_factory: Callable[[pd.MultiIndex], CoordsLike], -) -> None: - """Users may pass a MultiIndex via Coordinates, a dict, or another DataArray's coords.""" - mi = pd.MultiIndex.from_tuples([("a", 1), ("b", 2)], names=["letter", "num"]) - coords = coords_factory(mi) - - da = as_dataarray(3.0, coords=coords) - - assert da.dims == ("station",) - assert da.shape == (2,) - assert set(da.coords.keys()) == {"station", "letter", "num"} - assert da.coords["letter"].dims == ("station",) - assert da.coords["num"].dims == ("station",) - assert (da.values == 3.0).all() - - -def test_as_dataarray_with_scalar_and_explicit_dims_over_multiindex_coords() -> None: - """Explicit dims must win over any inference from Coordinates.""" - mi = pd.MultiIndex.from_tuples([("a", 1), ("b", 2)], names=["letter", "num"]) - source = DataArray([1.0, 2.0], coords={"station": mi}, dims="station") - - da = as_dataarray(3.0, coords=source.coords, dims=["station"]) - assert da.dims == ("station",) - assert da.shape == (2,) - assert set(da.coords.keys()) == {"station", "letter", "num"} - - -def test_as_dataarray_with_dataarray() -> None: - da_in = DataArray( - data=[[1, 2], [3, 4]], - dims=["dim1", "dim2"], - coords={"dim1": ["a", "b"], "dim2": ["A", "B"]}, - ) - da_out = as_dataarray(da_in, dims=["dim1", "dim2"], coords=[["a", "b"], ["A", "B"]]) - assert isinstance(da_out, DataArray) - assert da_out.dims == da_in.dims - assert list(da_out.coords["dim1"].values) == list(da_in.coords["dim1"].values) - assert list(da_out.coords["dim2"].values) == list(da_in.coords["dim2"].values) - - -def test_as_dataarray_with_dataarray_default_dims_coords() -> None: - da_in = DataArray( - data=[[1, 2], [3, 4]], - dims=["dim1", "dim2"], - coords={"dim1": ["a", "b"], "dim2": ["A", "B"]}, - ) - da_out = as_dataarray(da_in) - assert isinstance(da_out, DataArray) - assert da_out.dims == da_in.dims - assert list(da_out.coords["dim1"].values) == list(da_in.coords["dim1"].values) - assert list(da_out.coords["dim2"].values) == list(da_in.coords["dim2"].values) - - -def test_as_dataarray_with_unsupported_type() -> None: - with pytest.raises(TypeError): - as_dataarray(lambda x: 1, dims=["dim1"], coords=[["a"]]) - - -def test_broadcast_to_coords_preserves_extra_dims() -> None: - """Extra dims in the input are not rejected — they broadcast downstream.""" - arr = DataArray( - [[1, 2], [3, 4], [5, 6]], - dims=["a", "t"], - coords={"a": [0, 1, 2], "t": [10, 20]}, - ) - coords = {"a": [0, 1, 2]} - da = broadcast_to_coords(arr, coords=coords, strict=False) - assert set(da.dims) == {"a", "t"} - assert list(da.coords["t"].values) == [10, 20] - - -def test_broadcast_to_coords_keeps_disjoint_shared_dim_values() -> None: - """Different value sets on a shared dim are passed through (xr.align handles).""" - arr = DataArray([1, 2, 3, 4, 5], dims=["a"], coords={"a": [0, 1, 2, 3, 4]}) - coords = {"a": [2, 3]} - da = broadcast_to_coords(arr, coords=coords, strict=False) - # No exception, no reindex; downstream alignment intersects. - assert list(da.coords["a"].values) == [0, 1, 2, 3, 4] - - -def test_broadcast_to_coords_expands_missing_multiindex_dim_keeps_levels() -> None: - """ - Broadcasting a missing MultiIndex dim must keep its level coords intact. - - expand_dims drops MultiIndex level coords, leaving a degenerate flat - index that fails to align downstream (PyPSA multi-investment regression). - """ - midx = pd.MultiIndex.from_tuples( - [(2020, 0), (2020, 1), (2030, 0), (2030, 1)], - names=["period", "timestep"], - ) - midx.name = "snapshot" - sc = xr.Coordinates.from_pandas_multiindex(midx, "snapshot") - labels = DataArray( - [[1], [2], [3], [4]], coords={**sc, "name": ["1"]}, dims=["snapshot", "name"] - ) - coeff = broadcast_to_coords( - DataArray([1.0], coords={"name": ["1"]}, dims=["name"]), - coords=labels.coords, - dims=labels.dims, - strict=False, - ) - assert set(coeff.xindexes) == {"snapshot", "period", "timestep", "name"} - coeff.reindex_like(labels, fill_value=0) - - -def test_broadcast_to_coords_broadcasts_single_multiindex_level() -> None: - """ - A constant indexed by one MultiIndex level broadcasts across the MI dim. - - PyPSA multi-investment multiplies an expression over a (period, timestep) - 'snapshot' MultiIndex by a weighting indexed only by 'period'. Each entry - of the MultiIndex must pick up its level's value. - """ - idx = pd.MultiIndex.from_product([[1, 2], ["a", "b"]], names=("level1", "level2")) - idx.name = "dim_3" - coords = xr.Coordinates.from_pandas_multiindex(idx, "dim_3") - by_level1 = DataArray([10.0, 20.0], coords={"level1": [1, 2]}, dims=["level1"]) - - with pytest.warns(EvolvingAPIWarning, match=r"broadcasting level subset"): - da = broadcast_to_coords(by_level1, coords=coords, dims=["dim_3"], strict=False) - - assert da.dims == ("dim_3",) - assert isinstance(da.indexes["dim_3"], pd.MultiIndex) - assert da.sel(dim_3=(1, "a")).item() == 10.0 - assert da.sel(dim_3=(1, "b")).item() == 10.0 - assert da.sel(dim_3=(2, "a")).item() == 20.0 - assert da.sel(dim_3=(2, "b")).item() == 20.0 - - -def test_broadcast_to_coords_stacks_full_multiindex_levels() -> None: - """ - A constant indexed by all MI level names stacks element-wise into the MI dim. - - PyPSA's storage_weightings is a pandas Series over a (period, timestep) - MultiIndex subset (the last snapshot of each period); it must align onto - the matching level combinations of the 'snapshot' MultiIndex. Combinations the subset does - not cover are left as NaN (broadcast path). - """ - idx = pd.MultiIndex.from_product([[1, 2], ["a", "b"]], names=("level1", "level2")) - idx.name = "dim_3" - coords = xr.Coordinates.from_pandas_multiindex(idx, "dim_3") - subset = pd.MultiIndex.from_tuples([(1, "a"), (2, "b")], names=["level1", "level2"]) - weights = pd.Series([10.0, 20.0], index=subset) - - with pytest.warns( - EvolvingAPIWarning, match=r"filling uncovered level combinations" - ): - da = broadcast_to_coords(weights, coords=coords, dims=["dim_3"], strict=False) - - assert da.dims == ("dim_3",) - assert isinstance(da.indexes["dim_3"], pd.MultiIndex) - assert da.sel(dim_3=(1, "a")).item() == 10.0 - assert da.sel(dim_3=(2, "b")).item() == 20.0 - assert np.isnan(da.sel(dim_3=(1, "b")).item()) - assert np.isnan(da.sel(dim_3=(2, "a")).item()) - - -def test_broadcast_to_coords_full_multiindex_full_coverage_is_silent() -> None: - """ - Full-level, fully-covering alignment is convention-clean → no warning. - - Aligning an input that reconstructs the whole MultiIndex onto its dim is - equivalent to the input already carrying that dim (future §11), so it must - not emit the EvolvingAPIWarning the partial/gap projections do. - """ - idx = pd.MultiIndex.from_product([[1, 2], ["a", "b"]], names=("level1", "level2")) - idx.name = "dim_3" - coords = xr.Coordinates.from_pandas_multiindex(idx, "dim_3") - full = pd.Series([1.0, 2.0, 3.0, 4.0], index=idx) - - with warnings.catch_warnings(): - warnings.simplefilter("error", EvolvingAPIWarning) - da = broadcast_to_coords(full, coords=coords, dims=["dim_3"], strict=False) - - assert da.dims == ("dim_3",) - assert da.values.tolist() == [1.0, 2.0, 3.0, 4.0] - - -def test_broadcast_to_coords_level_projection_ambiguous_raises() -> None: - """A level name shared by two MI dims cannot be resolved.""" - a = pd.MultiIndex.from_product([[1, 2], ["a", "b"]], names=("shared", "x")) - b = pd.MultiIndex.from_product([[1, 2], ["c", "d"]], names=("shared", "y")) - coords = { - **xr.Coordinates.from_pandas_multiindex(a, "dimA"), - **xr.Coordinates.from_pandas_multiindex(b, "dimB"), - } - arr = DataArray([1.0, 2.0], coords={"shared": [1, 2]}, dims=["shared"]) - - with pytest.raises(ValueError, match=r"shared.*shared by MultiIndex"): - broadcast_to_coords(arr, coords=coords, strict=False) - - -def test_broadcast_to_coords_level_projection_missing_value_raises() -> None: - """A level value absent from the input cannot be broadcast.""" - idx = pd.MultiIndex.from_product([[1, 2], ["a", "b"]], names=("level1", "level2")) - idx.name = "dim_3" - coords = xr.Coordinates.from_pandas_multiindex(idx, "dim_3") - by_level1 = DataArray([10.0, 20.0], coords={"level1": [1, 9]}, dims=["level1"]) - - with pytest.raises(ValueError, match=r"Cannot align level.*is missing"): - broadcast_to_coords(by_level1, coords=coords, dims=["dim_3"], strict=False) - - -def test_broadcast_to_coords_unrelated_multiindex_series_still_unstacks() -> None: - """A MI Series whose levels match no coords MI dim keeps unstacking.""" - sub = pd.MultiIndex.from_product([["p", "q"], [1, 2]], names=["foo", "bar"]) - series = pd.Series([1.0, 2.0, 3.0, 4.0], index=sub) - - da = broadcast_to_coords(series, coords={"time": [0, 1, 2]}, strict=False) - - assert set(da.dims) == {"time", "foo", "bar"} - - -# --------------------------------------------------------------------------- -# Strictness: as_dataarray (convert) ⊂ broadcast_to_coords(strict=False) ⊂ broadcast_to_coords(strict=True) -# --------------------------------------------------------------------------- - - -def test_as_dataarray_does_not_expand_missing_coord_dims() -> None: - """as_dataarray converts; only broadcast_to_coords expands missing dims.""" - coords = {"a": [0, 1], "b": [10, 20]} - arr = np.array([1, 2]) - - converted = as_dataarray(arr, coords=coords, dims=["a"]) - assert converted.dims == ("a",) - - broadcast = broadcast_to_coords(arr, coords=coords, dims=["a"], strict=False) - assert broadcast.dims == ("a", "b") - - -def test_extra_dims_pass_broadcast_rung_fail_strict_rung() -> None: - """Extra dims pass through the broadcast rung but fail the strict rung.""" - arr = DataArray( - [[1, 2], [3, 4]], dims=["a", "t"], coords={"a": [0, 1], "t": [10, 20]} - ) - coords = {"a": [0, 1]} - - da = broadcast_to_coords(arr, coords=coords, strict=False) - assert set(da.dims) == {"a", "t"} - - with pytest.raises(ValueError, match=r"not declared in coords"): - broadcast_to_coords(arr, coords, label="lower bound") - - -def test_broadcast_to_coords_rejects_multiindex_coverage_gap() -> None: - """A coverage gap warns on the broadcast rung but raises on the strict rung.""" - idx = pd.MultiIndex.from_product([[1, 2], ["a", "b"]], names=("level1", "level2")) - idx.name = "dim_3" - coords = xr.Coordinates.from_pandas_multiindex(idx, "dim_3") - subset = pd.MultiIndex.from_tuples([(1, "a"), (2, "b")], names=["level1", "level2"]) - weights = pd.Series([10.0, 20.0], index=subset) - - with pytest.warns( - EvolvingAPIWarning, match=r"filling uncovered level combinations" - ): - broadcast_to_coords(weights, coords=coords, dims=["dim_3"], strict=False) - - with pytest.raises(ValueError, match=r"no value for .* level combination"): - broadcast_to_coords(weights, coords, dims=["dim_3"], label="lower bound") - - -def test_broadcast_to_coords_rejects_unnamed_multiindex_mismatch() -> None: - """ - A MultiIndex input with unnamed levels cannot be projected by level name, - so it keeps its own index under the coords dim. The strict rung must still - reject it when its level combinations don't cover coords, just as the - named-level coverage-gap case does. - """ - idx = pd.MultiIndex.from_product([[2020, 2030], ["t1", "t2"]], names=("p", "t")) - idx.name = "snapshot" - coords = xr.Coordinates.from_pandas_multiindex(idx, "snapshot") - sparse_unnamed = pd.Series({(2020, "t1"): 1.0, (2030, "t2"): 2.0}) - - with pytest.raises(ValueError, match=r"MultiIndex for dimension 'snapshot'"): - broadcast_to_coords( - sparse_unnamed, coords, dims=["snapshot"], label="lower bound" - ) - - -def test_broadcast_to_coords_strict_partial_level_warns() -> None: - """ - Per-level bounds broadcast across the MI dim, with the deprecation warning. - - Scenario B (#732 / #737 discussion): implicit MI-level projection is - deprecated everywhere, including the strict (bounds/mask) path, and will - raise under the v1 convention. - """ - idx = pd.MultiIndex.from_product([[1, 2], ["a", "b"]], names=("level1", "level2")) - idx.name = "dim_3" - coords = xr.Coordinates.from_pandas_multiindex(idx, "dim_3") - by_level1 = DataArray([10.0, 20.0], coords={"level1": [1, 2]}, dims=["level1"]) - - with pytest.warns(EvolvingAPIWarning, match=r"broadcasting level subset"): - da = broadcast_to_coords(by_level1, coords, dims=["dim_3"], label="lower bound") - - assert da.sel(dim_3=(1, "b")).item() == 10.0 - assert da.sel(dim_3=(2, "a")).item() == 20.0 - - -def test_validate_alignment_rejects_extra_dims() -> None: - arr = DataArray( - [[1, 2], [3, 4]], dims=["a", "b"], coords={"a": [0, 1], "b": [0, 1]} - ) - with pytest.raises(ValueError, match=r"not declared in coords"): - validate_alignment(arr, {"a": [0, 1]}) - - -def test_validate_alignment_rejects_value_mismatch() -> None: - arr = DataArray([1, 2, 3], dims=["a"], coords={"a": [0, 1, 2]}) - with pytest.raises(ValueError, match="do not match coords"): - validate_alignment(arr, {"a": [10, 20, 30]}) - - -def test_validate_alignment_allows_subset_dims() -> None: - """arr.dims ⊂ coords.dims is fine (broadcasting fills the missing dim).""" - arr = DataArray([1, 2, 3], dims=["a"], coords={"a": [0, 1, 2]}) - validate_alignment(arr, {"a": [0, 1, 2], "b": [10, 20]}) # no raise - - -def test_validate_alignment_unnamed_coords_and_dims() -> None: - """coords=[[...]], dims=[...] enforces the same contract as a named mapping.""" - arr = DataArray([1, 2, 3], dims=["x"], coords={"x": [0, 1, 2]}) - validate_alignment(arr, [[0, 1, 2]], dims=["x"]) # no raise - - bad = DataArray( - [[1, 2], [3, 4]], dims=["x", "y"], coords={"x": [0, 1], "y": [0, 1]} - ) - with pytest.raises(ValueError, match=r"not declared in coords"): - validate_alignment(bad, [[0, 1]], dims=["x"]) - - -def test_validate_alignment_label_in_error() -> None: - arr = DataArray( - [[1, 2], [3, 4]], dims=["a", "b"], coords={"a": [0, 1], "b": [0, 1]} - ) - with pytest.raises(ValueError, match=r"lower bound has dimension\(s\) \['b'\]"): - validate_alignment(arr, {"a": [0, 1]}, label="lower bound") - - -def test_broadcast_to_coords_strict_requires_label() -> None: - """strict=True without label raises: errors must name their subject.""" - with pytest.raises(TypeError, match=r"requires `label`"): - broadcast_to_coords(np.array([1, 2]), {"x": [0, 1]}) # type: ignore[call-overload] - - -def test_broadcast_to_coords_wraps_conversion_errors() -> None: - with pytest.raises(ValueError, match=r"lower bound could not be aligned"): - broadcast_to_coords(np.array([1, 2]), {"x": [0, 1, 2]}, label="lower bound") - - -def test_broadcast_to_coords_preserves_type_errors() -> None: - """Unsupported input types stay TypeError (don't become ValueError).""" - with pytest.raises(TypeError, match=r"lower bound could not be aligned"): - broadcast_to_coords(lambda x: x, {"x": [0, 1, 2]}, label="lower bound") - - -def test_broadcast_to_coords_does_not_relabel_coords_errors() -> None: - """Coords-side TypeError carries its own message, not the value label.""" - mi = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=["i", "j"]) - with pytest.raises(TypeError, match=r"MultiIndex.*must have \.name set"): - broadcast_to_coords(np.array([1, 2, 3, 4]), [mi], label="lower bound") - - -class TestCoordsToDictRules: - """ - One test per row of the ``_coords_to_dict`` rules table. - - Each test name states the rule it pins; the assertions show the - expected outcome. Together they form the executable spec of how - sequence-form ``coords`` entries are named. - """ - - @staticmethod - def _parse(coords: Any, dims: Any = None) -> dict: - from linopy.alignment import _coords_to_dict - - return _coords_to_dict(coords, dims=dims) - - # -- container forms --------------------------------------------------- - - def test_mapping_is_returned_as_shallow_dict_copy(self) -> None: - src = {"x": [0, 1, 2], "y": [10, 20]} - result = self._parse(src) - assert result == src - assert result is not src - - def test_xarray_coordinates_keeps_only_dim_entries(self) -> None: - midx = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=["i", "j"]) - coords = xr.Coordinates.from_pandas_multiindex(midx, "stacked") - result = self._parse(coords) - assert set(result) == {"stacked"} - - # -- pd.Index entries -------------------------------------------------- - - def test_named_pd_index_uses_its_name(self) -> None: - result = self._parse([pd.Index([0, 1, 2], name="x")]) - assert set(result) == {"x"} - - def test_unnamed_pd_index_with_dims_uses_dims(self) -> None: - result = self._parse([pd.Index([0, 1, 2])], dims=["x"]) - assert set(result) == {"x"} - - def test_unnamed_pd_index_without_dims_is_size_only(self) -> None: - # Same as a bare sequence: contributes no dim name; xarray assigns - # ``dim_0`` downstream. - assert self._parse([pd.Index([0, 1, 2])]) == {} - m = Model() - v = m.add_variables(coords=[pd.Index([0, 1, 2])]) - assert v.dims == ("dim_0",) - - # -- pd.MultiIndex entries -------------------------------------------- - - def test_named_multiindex_uses_its_name(self) -> None: - mi = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=["i", "j"]) - mi.name = "multi" - result = self._parse([mi]) - assert set(result) == {"multi"} - - def test_unnamed_multiindex_with_dims_uses_dims(self) -> None: - mi = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=["i", "j"]) - result = self._parse([mi], dims=["multi"]) - assert set(result) == {"multi"} - assert result["multi"].name == "multi" - assert mi.name is None # caller's MultiIndex not mutated - - def test_unnamed_multiindex_without_dims_raises(self) -> None: - mi = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=["i", "j"]) - with pytest.raises(TypeError, match=r"MultiIndex.*must have \.name set"): - self._parse([mi]) - - # -- bare sequence entries -------------------------------------------- - - @pytest.mark.parametrize( - "entry", - [[0, 1, 2], (0, 1, 2), range(3), np.array([0, 1, 2])], - ids=["list", "tuple", "range", "ndarray"], - ) - def test_bare_sequence_with_dims_uses_dims(self, entry: Any) -> None: - result = self._parse([entry], dims=["x"]) - assert set(result) == {"x"} - - @pytest.mark.parametrize( - "entry", - [[0, 1, 2], (0, 1, 2), range(3), np.array([0, 1, 2])], - ids=["list", "tuple", "range", "ndarray"], - ) - def test_bare_sequence_without_dims_is_silently_skipped(self, entry: Any) -> None: - assert self._parse([entry]) == {} - - @pytest.mark.parametrize( - "entry", - [[0, 1, 2], (0, 1, 2), range(3), np.array([0, 1, 2])], - ids=["list", "tuple", "range", "ndarray"], - ) - def test_bare_sequence_without_dims_falls_through_to_xarray_dim_0( - self, entry: Any - ) -> None: - m = Model() - v = m.add_variables(coords=[entry]) - assert v.dims == ("dim_0",) - - # -- unsupported entries ---------------------------------------------- - - def test_dataarray_entry_raises(self) -> None: - with pytest.raises(TypeError, match=r"coords entries must be pd\.Index"): - self._parse([DataArray([0, 1, 2], dims=["x"])]) - - def test_unknown_type_entry_raises(self) -> None: - class Foo: ... - - with pytest.raises(TypeError, match=r"coords entries must be pd\.Index"): - self._parse([Foo()]) def test_best_int() -> None: @@ -1154,49 +257,6 @@ def test_get_dims_with_index_levels() -> None: assert get_dims_with_index_levels(ds5) == [] -def test_align(x: Variable, u: Variable) -> None: # noqa: F811 - alpha = xr.DataArray([1, 2], [[1, 2]]) - beta = xr.DataArray( - [1, 2, 3], - [ - ( - "dim_3", - pd.MultiIndex.from_tuples( - [(1, "b"), (2, "b"), (1, "c")], names=["level1", "level2"] - ), - ) - ], - ) - - # inner join - x_obs, alpha_obs = align(x, alpha) - assert isinstance(x_obs, Variable) - assert x_obs.shape == alpha_obs.shape == (1,) - assert_varequal(x_obs, x.loc[[1]]) - - # left-join - x_obs, alpha_obs = align(x, alpha, join="left") - assert x_obs.shape == alpha_obs.shape == (2,) - assert isinstance(x_obs, Variable) - assert_varequal(x_obs, x) - assert_equal(alpha_obs, DataArray([np.nan, 1], [[0, 1]])) - - # multiindex - beta_obs, u_obs = align(beta, u) - assert u_obs.shape == beta_obs.shape == (2,) - assert isinstance(u_obs, Variable) - assert_varequal(u_obs, u.loc[[(1, "b"), (2, "b")]]) - assert_equal(beta_obs, beta.loc[[(1, "b"), (2, "b")]]) - - # with linear expression - expr = 20 * x - x_obs, expr_obs, alpha_obs = align(x, expr, alpha) - assert x_obs.shape == alpha_obs.shape == (1,) - assert expr_obs.shape == (1, 1) # _term dim - assert isinstance(expr_obs, LinearExpression) - assert_linequal(expr_obs, expr.loc[[1]]) - - def test_is_constant() -> None: model = Model() index = pd.Index(range(10), name="t") From 219baccf89236920e75cf9f59f80dc6803061a34 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Wed, 3 Jun 2026 10:28:28 +0200 Subject: [PATCH 3/5] test: close coverage gaps in alignment.py and the MI coords serialization alignment.py: 97% -> 99%. New edge-case tests: bare-string dims, 0-d arrays, fill_missing_coords type check, partially-named MI levels, gap detection with extra dims, gap-error truncation (>5 missing combinations). The two remaining uncovered lines are defensive branches for inputs outside the DimsLike contract (non-iterable dims). common.py: 88% -> 90%. The MultiIndex round-trip through coords_to_dataset_vars / coords_from_dataset (used by CSRConstraint) had zero coverage; now pinned. Co-Authored-By: Claude Opus 4.8 (1M context) --- test/test_alignment.py | 72 ++++++++++++++++++++++++++++++++++++++++++ test/test_common.py | 21 ++++++++++++ 2 files changed, 93 insertions(+) diff --git a/test/test_alignment.py b/test/test_alignment.py index e8c778ea..c75ca807 100644 --- a/test/test_alignment.py +++ b/test/test_alignment.py @@ -33,6 +33,7 @@ align, as_dataarray, broadcast_to_coords, + fill_missing_coords, validate_alignment, ) from linopy.testing import assert_linequal, assert_varequal @@ -194,6 +195,11 @@ def test_polars_series(self) -> None: assert da.dims == (target_dim,) assert list(da.coords[target_dim].values) == target_index + def test_series_dims_as_bare_string(self) -> None: + """Dims may be a single dim name instead of a list.""" + da = as_dataarray(pd.Series([1, 2, 3]), dims="x") + assert da.dims == ("x",) + class TestAsDataarrayFromNumpy: """ndarray conversion: positional labeling from coords / dims.""" @@ -276,6 +282,19 @@ def test_extra_coord_entries_are_dropped(self) -> None: assert list(da.coords["dim_0"].values) == ["a", "b"] assert "dim_2" not in da.coords + def test_dims_as_bare_string(self) -> None: + """Dims may be a single dim name; dict coords are filtered to those dims.""" + da = as_dataarray(np.array([1, 2]), coords={"x": [0, 1], "drop": [9]}, dims="x") + assert da.dims == ("x",) + assert list(da.coords["x"].values) == [0, 1] + assert "drop" not in da.coords + + def test_zero_dim_array_expands_over_dict_coords(self) -> None: + """A 0-d array converts like a scalar, expanding over dict coords.""" + da = as_dataarray(np.array(5.0), coords={"a": [0, 1]}) + assert da.dims == ("a",) + assert da.values.tolist() == [5.0, 5.0] + class TestAsDataarrayFromScalar: """Scalar conversion: numbers expand over coords when given.""" @@ -336,6 +355,12 @@ def test_unsupported_type_raises(self) -> None: with pytest.raises(TypeError): as_dataarray(lambda x: 1, dims=["dim1"], coords=[["a"]]) + def test_fill_missing_coords_rejects_non_xarray(self) -> None: + with pytest.raises( + TypeError, match="Expected xarray.DataArray or xarray.Dataset" + ): + fill_missing_coords([1, 2, 3]) # type: ignore[call-overload] + def test_does_not_expand_missing_coord_dims(self) -> None: """as_dataarray converts; only broadcast_to_coords expands missing dims.""" coords = {"a": [0, 1], "b": [10, 20]} @@ -720,6 +745,53 @@ def test_unrelated_mi_series_still_unstacks(self) -> None: assert set(da.dims) == {"time", "foo", "bar"} + def test_partially_named_mi_levels(self) -> None: + """A None level name in the MultiIndex is skipped during projection.""" + mi = pd.MultiIndex.from_product([[1, 2], ["a", "b"]], names=("level1", None)) + mi.name = "dim_3" + by_level1 = DataArray([10.0, 20.0], coords={"level1": [1, 2]}, dims=["level1"]) + + with pytest.warns(EvolvingAPIWarning, match=r"broadcasting level subset"): + da = broadcast_to_coords(by_level1, coords={"dim_3": mi}, strict=False) + + assert da.dims == ("dim_3",) + assert da.values.tolist() == [10.0, 10.0, 20.0, 20.0] + + def test_gap_detection_with_extra_dims(self, mi_coords: xr.Coordinates) -> None: + """Gaps are detected per level combination even when the input has extra dims.""" + arr = DataArray( + [[[1.0, np.nan], [2.0, 2.0]], [[3.0, 3.0], [4.0, 4.0]]], + dims=["level1", "level2", "extra"], + coords={"level1": [1, 2], "level2": ["a", "b"], "extra": [0, 1]}, + ) + + with pytest.warns( + EvolvingAPIWarning, match=r"filling uncovered level combinations" + ): + da = broadcast_to_coords( + arr, coords=mi_coords, dims=["dim_3"], strict=False + ) + + assert set(da.dims) == {"dim_3", "extra"} + + def test_strict_gap_error_truncates_long_missing_list(self) -> None: + """More than 5 missing combinations are truncated in the error message.""" + idx = pd.MultiIndex.from_product( + [[1, 2, 3], ["a", "b", "c"]], names=("l1", "l2") + ) + idx.name = "dim_m" + coords = xr.Coordinates.from_pandas_multiindex(idx, "dim_m") + # Diagonal subset: every level value present, 6 of 9 combinations missing. + diagonal = pd.MultiIndex.from_tuples( + [(1, "a"), (2, "b"), (3, "c")], names=["l1", "l2"] + ) + weights = pd.Series([1.0, 2.0, 3.0], index=diagonal) + + with pytest.raises( + ValueError, match=r"no value for 6 level combination.*in total" + ): + broadcast_to_coords(weights, coords, dims=["dim_m"], label="lower bound") + # --- strict-mode policy on MI projections (deprecation / gaps) --- def test_strict_partial_level_warns( diff --git a/test/test_common.py b/test/test_common.py index c2f78996..c56b17b1 100644 --- a/test/test_common.py +++ b/test/test_common.py @@ -15,6 +15,8 @@ from linopy.common import ( assign_multiindex_safe, best_int, + coords_from_dataset, + coords_to_dataset_vars, get_dims_with_index_levels, is_constant, iterate_slices, @@ -72,6 +74,25 @@ def test_assign_multiindex_safe() -> None: assert result["pressure"].equals(data) +def test_coords_dataset_vars_roundtrip_multiindex() -> None: + """MultiIndex and plain coords survive serialization to Dataset vars and back.""" + mi = pd.MultiIndex.from_product( + [[2020, 2030], ["t1", "t2"]], names=("period", "timestep") + ) + mi.name = "snapshot" + plain = pd.Index([1, 2, 3], name="simple") + + ds = xr.Dataset(coords_to_dataset_vars([mi, plain])) + restored = coords_from_dataset(ds, ["snapshot", "simple"]) + + assert isinstance(restored[0], pd.MultiIndex) + assert restored[0].equals(mi) + assert list(restored[0].names) == ["period", "timestep"] + assert restored[0].name == "snapshot" + assert restored[1].equals(plain) + assert restored[1].name == "simple" + + def test_iterate_slices_basic() -> None: ds = xr.Dataset( {"var": (("x", "y"), np.random.rand(10, 10))}, # noqa: NPY002 From d31d091929b87aae67c37648b5ba7505f63b43d1 Mon Sep 17 00:00:00 2001 From: Felix <117816358+FBumann@users.noreply.github.com> Date: Wed, 3 Jun 2026 11:36:41 +0200 Subject: [PATCH 4/5] Update test/test_alignment.py Co-authored-by: Fabian Hofmann --- test/test_alignment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_alignment.py b/test/test_alignment.py index c75ca807..c40e6f66 100644 --- a/test/test_alignment.py +++ b/test/test_alignment.py @@ -606,7 +606,7 @@ def test_extra_coord_entries_broadcast_in(self) -> None: # --------------------------------------------------------------------------- -class TestMultiIndexProjection: +class TestBroadcastToCoordsMultiIndexProjection: """ Inputs indexed by levels of a stacked MultiIndex dim are projected onto it. From b3db1a808a04d97e84ecd2e2944abe91fc776589 Mon Sep 17 00:00:00 2001 From: Felix <117816358+FBumann@users.noreply.github.com> Date: Wed, 3 Jun 2026 11:36:54 +0200 Subject: [PATCH 5/5] Update test/test_alignment.py Co-authored-by: Fabian Hofmann --- test/test_alignment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_alignment.py b/test/test_alignment.py index c40e6f66..5cbee415 100644 --- a/test/test_alignment.py +++ b/test/test_alignment.py @@ -850,7 +850,7 @@ def test_strict_rejects_unnamed_mi_mismatch(self) -> None: # --------------------------------------------------------------------------- -class TestStrictMode: +class TestBroadcastToCoordsStrictMode: """strict=True: anything broadcasting can't resolve raises, naming label.""" def test_extra_dims_pass_loose_fail_strict(self) -> None: