diff --git a/doc/release_notes.rst b/doc/release_notes.rst index b2ff60de..29503064 100644 --- a/doc/release_notes.rst +++ b/doc/release_notes.rst @@ -56,7 +56,7 @@ Most users should keep calling ``model.solve(...)``. If you want more control, y **Bug Fixes** -* ``add_variables`` / ``add_constraints``: extends 0.7.0's coords-as-truth rule to ``lower``, ``upper`` and ``mask`` for every bound type and dim order. Pandas ``Series`` / ``DataFrame`` bounds or masks missing a dimension are broadcast to ``coords`` instead of being silently dropped (`#709 `__); the variable's dimension order always follows ``coords`` (`#706 `__); bare-tuple coord entries (``coords=[(0, 1, 2)]``) now behave like lists. Mismatched values or extra dims raise ``ValueError`` with a labelled message; sparse-coord masks (formerly a v0.6.3 ``FutureWarning``, #580) raise ``ValueError``, and masks with dims not in the data raise ``ValueError`` instead of ``AssertionError``. +* ``add_variables`` / ``add_constraints``: extends 0.7.0's coords-as-truth rule to ``lower``, ``upper`` and ``mask`` for every bound type and dim order. Pandas ``Series`` / ``DataFrame`` bounds or masks missing a dimension are broadcast to ``coords`` instead of being silently dropped (`#709 `__); the variable's dimension order always follows ``coords`` (`#706 `__); tuple coord entries follow xarray's ``(dim_name, values)`` convention (e.g. ``coords=[("origin", origins)]``), while a bare value sequence uses a ``list``. Mismatched values or extra dims raise ``ValueError`` with a labelled message; sparse-coord masks (formerly a v0.6.3 ``FutureWarning``, #580) raise ``ValueError``, and masks with dims not in the data raise ``ValueError`` instead of ``AssertionError``. * Pandas inputs whose index names *levels* of a stacked-``MultiIndex`` ``coords`` dimension are now projected onto that dimension: a level subset broadcasts across the others, the full set aligns element-wise. This fixes PyPSA multi-investment arithmetic (e.g. an expression over a ``(period, timestep)`` ``snapshot`` MultiIndex times a ``period``-indexed weighting). In ``add_variables`` / ``add_constraints`` the input must provide a value for every level combination of the MultiIndex or a ``ValueError`` is raised (the error lists the missing combinations). **Implicit level projections are deprecated**: they emit an ``EvolvingAPIWarning`` everywhere — in arithmetic *and* in ``add_variables`` / ``add_constraints`` — and will raise under the upcoming v1 convention. Project the input onto the dimension explicitly (select with the dimension's level values) to keep current behavior. Aligning the full level set with full coverage stays silent. Strict validation also rejects a ``MultiIndex`` input with *unnamed* levels whose combinations don't match ``coords`` (previously a silent bypass, as such inputs can't be projected by level name). * ``add_piecewise_formulation`` now produces a reproducible dimension order in the broadcast breakpoint array. The previous set-based expansion gave a hash-randomized order that varied between processes. * SOS constraints on masked variables no longer cause solver-specific failures (Gurobi ``IndexError``, Xpress ``?404 Invalid column number``, LP parse errors, silent set corruption). ``Model.solve()`` and ``Model.to_file()`` now raise a clear ``NotImplementedError`` referring users to `#688 `__; pass ``reformulate_sos=True`` as a workaround. diff --git a/linopy/alignment.py b/linopy/alignment.py index d5f65172..e7125dae 100644 --- a/linopy/alignment.py +++ b/linopy/alignment.py @@ -65,7 +65,9 @@ def _coords_to_dict( Sequence-entry rules (``i`` is the position in ``coords``, ``dims[i]`` is the matching entry in ``dims`` when one exists). An entry is *unlabeled* if it's an unnamed ``pd.Index`` or a bare ``list`` / - ``tuple`` / ``range`` / ``ndarray``. + ``range`` / ``ndarray``. A ``tuple`` is **not** unlabeled: following + xarray, it is read as ``(dim_name, values[, attrs])`` — the first + element names the dimension. +---------------------------------+-----------------------+-----------+ | Entry | Naming source | Outcome | @@ -80,6 +82,12 @@ def _coords_to_dict( | | | ``dim_0`` | | | | etc. | +---------------------------------+-----------------------+-----------+ + | ``(name, values)`` tuple | ``name`` (1st elem) | accepted | + | | | (xarray | + | | | form) | + +---------------------------------+-----------------------+-----------+ + | tuple of length < 2 | — | TypeError | + +---------------------------------+-----------------------+-----------+ | ``pd.MultiIndex`` with ``.name``| ``.name`` | accepted | +---------------------------------+-----------------------+-----------+ | ``pd.MultiIndex`` w/o ``.name`` | ``dims[i]`` | accepted | @@ -124,16 +132,36 @@ def _coords_to_dict( else (dim_names[i] if dim_names and i < len(dim_names) else None) ) if name is not None: - result[name] = c - elif isinstance(c, list | tuple | range | np.ndarray): + result[name] = c if c.name == name else c.rename(name) + elif isinstance(c, tuple): + if ( + len(c) < 2 + or not isinstance(c[0], Hashable) + or isinstance(c[0], list | tuple | np.ndarray) + ): + raise TypeError( + f"tuple coords entries follow xarray's (dim_name, values) " + f"convention; got {c!r}. Pass a list for a bare sequence " + f"of coordinate values." + ) + name, values = c[0], c[1] + try: + result[name] = pd.Index(values, name=name) + except TypeError as err: + raise TypeError( + f"tuple coords entries follow xarray's (dim_name, values) " + f"convention with array-like values; got {c!r}. Pass a " + f"list for a bare sequence of coordinate values." + ) from err + elif isinstance(c, list | range | np.ndarray): if dim_names and i < len(dim_names): result[dim_names[i]] = pd.Index(c, name=dim_names[i]) else: raise TypeError( - f"coords entries must be pd.Index or an unnamed sequence " - f"(list / tuple / range / numpy.ndarray); got " - f"{type(c).__name__}. For an xarray DataArray coord, pass " - f"`variable.indexes[]` (a pd.Index) instead." + f"coords entries must be pd.Index, an unlabeled sequence " + f"(list / range / numpy.ndarray), or a (dim_name, values) " + f"tuple; got {type(c).__name__}. For an xarray DataArray " + f"coord, pass `variable.indexes[]` (a pd.Index) instead." ) return result @@ -527,11 +555,6 @@ def _broadcast_to_coords( if coords is None: return as_dataarray(arr, coords, dims, **kwargs), [] - if isinstance(coords, list | tuple) and any(isinstance(c, tuple) for c in coords): - # xarray reads bare `(a, b)` as `(dim_name, values)`; normalize so a - # coords entry passed as a tuple behaves identically to a list. - coords = [list(c) if isinstance(c, tuple) else c for c in coords] - expected = _coords_to_dict(coords, dims=dims) if not expected: return as_dataarray(arr, coords, dims, **kwargs), [] diff --git a/test/test_alignment.py b/test/test_alignment.py index 5cbee415..73d2cfdb 100644 --- a/test/test_alignment.py +++ b/test/test_alignment.py @@ -7,6 +7,7 @@ - ``TestAsDataarrayFrom*`` — :func:`as_dataarray` (convert only) - ``TestCoordsToDict`` — the coords-entry naming rules +- ``TestAddVariablesCoords`` — coords/dims → variable dims (end-to-end) - ``TestBroadcastToCoords`` — ``broadcast_to_coords(strict=False)`` - ``TestMultiIndexProjection`` — implicit MI-level projection (values, deprecation warnings, coverage gaps) — the legacy/v1 fork point @@ -152,39 +153,34 @@ def test_dataframe_dim_naming( assert list(da.coords[expected_dims[0]].values) == list(df.index) assert list(da.coords[expected_dims[1]].values) == list(df.columns) - def test_series_aligned_coords(self) -> None: - """This should not give out a warning even though coords are given.""" - target_dim = "dim_0" - target_index = ["a", "b", "c"] - s = pd.Series([1, 2, 3], index=target_index) - da = as_dataarray(s, coords=[target_index]) - assert isinstance(da, DataArray) - assert da.dims == (target_dim,) - assert list(da.coords[target_dim].values) == target_index - - da = as_dataarray(s, coords={target_dim: target_index}) - assert isinstance(da, DataArray) - assert da.dims == (target_dim,) - assert list(da.coords[target_dim].values) == target_index - - def test_dataframe_aligned_coords(self) -> None: - """This should not give out a warning even though coords are given.""" - target_dims = ("dim_0", "dim_1") - target_index = ["a", "b"] - target_columns = ["A", "B"] - df = pd.DataFrame([[1, 2], [3, 4]], index=target_index, columns=target_columns) - da = as_dataarray(df, coords=[target_index, target_columns]) - assert isinstance(da, DataArray) - assert da.dims == target_dims - assert list(da.coords[target_dims[0]].values) == target_index - assert list(da.coords[target_dims[1]].values) == target_columns + @pytest.mark.parametrize( + "coords", + [[["a", "b", "c"]], {"dim_0": ["a", "b", "c"]}], + ids=["list", "dict"], + ) + def test_series_aligned_coords_do_not_warn(self, coords: Any) -> None: + """Coords matching the pandas index are accepted silently — no misalignment warning.""" + s = pd.Series([1, 2, 3], index=["a", "b", "c"]) + with warnings.catch_warnings(): + warnings.simplefilter("error") + da = as_dataarray(s, coords=coords) + assert da.dims == ("dim_0",) + assert list(da.coords["dim_0"].values) == ["a", "b", "c"] - coords = dict(zip(target_dims, [target_index, target_columns])) - da = as_dataarray(df, coords=coords) - assert isinstance(da, DataArray) - assert da.dims == target_dims - assert list(da.coords[target_dims[0]].values) == target_index - assert list(da.coords[target_dims[1]].values) == target_columns + @pytest.mark.parametrize( + "coords", + [[["a", "b"], ["A", "B"]], {"dim_0": ["a", "b"], "dim_1": ["A", "B"]}], + ids=["list", "dict"], + ) + def test_dataframe_aligned_coords_do_not_warn(self, coords: Any) -> None: + """Coords matching the frame's index/columns are accepted silently.""" + df = pd.DataFrame([[1, 2], [3, 4]], index=["a", "b"], columns=["A", "B"]) + with warnings.catch_warnings(): + warnings.simplefilter("error") + da = as_dataarray(df, coords=coords) + assert da.dims == ("dim_0", "dim_1") + assert list(da.coords["dim_0"].values) == ["a", "b"] + assert list(da.coords["dim_1"].values) == ["A", "B"] def test_polars_series(self) -> None: target_dim = "dim_0" @@ -445,6 +441,18 @@ def test_explicit_dims_win_over_inference(self) -> None: assert set(da.coords.keys()) == {"station", "letter", "num"} +def _ij_multiindex() -> pd.MultiIndex: + """Unnamed (i, j) MultiIndex used across the coords-entry tests.""" + return pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=["i", "j"]) + + +def _named_multiindex(name: str = "multi") -> pd.MultiIndex: + """:func:`_ij_multiindex` carrying an overall index name.""" + mi = _ij_multiindex() + mi.name = name + return mi + + # --------------------------------------------------------------------------- # _coords_to_dict — the coords-entry naming rules # --------------------------------------------------------------------------- @@ -452,112 +460,153 @@ def test_explicit_dims_win_over_inference(self) -> None: class TestCoordsToDict: """ - One test per row of the ``_coords_to_dict`` rules table. - - Each test name states the rule it pins; the assertions show the - expected outcome. Together they form the executable spec of how - sequence-form ``coords`` entries are named. + Executable spec of ``_coords_to_dict``: how each coords-entry form is + named or rejected, parameterized by entry form. The end-to-end dim + assignment these feed lives in :class:`TestAddVariablesCoords`. """ @staticmethod def _parse(coords: Any, dims: Any = None) -> dict: - return _coords_to_dict(coords, dims=dims) - # -- container forms --------------------------------------------------- + @pytest.mark.parametrize( + "coords, dims", + [ + ([("x", [0, 1, 2])], None), + ([pd.Index([0, 1, 2], name="x")], None), + ([pd.Index([0, 1, 2])], ["x"]), + ([[0, 1, 2]], ["x"]), + ([range(3)], ["x"]), + ([np.array([0, 1, 2])], ["x"]), + ], + ids=[ + "tuple", + "named-index", + "unnamed-index+dims", + "list+dims", + "range+dims", + "ndarray+dims", + ], + ) + def test_named_form_parses_to_x(self, coords: Any, dims: Any) -> None: + """Each naming form parses to {"x": [0, 1, 2]} (tuple = xarray form).""" + result = self._parse(coords, dims=dims) + assert set(result) == {"x"} + assert list(result["x"]) == [0, 1, 2] + assert result["x"].name == "x" + + @pytest.mark.parametrize( + "coords, expected", + [ + ( + xr.Coordinates.from_pandas_multiindex(_ij_multiindex(), "stacked"), + {"stacked"}, + ), + ([_named_multiindex()], {"multi"}), + ([("x", [0, 1, 2], {"units": "m"})], {"x"}), + ], + ids=["xarray-coordinates", "named-multiindex", "tuple-with-attrs"], + ) + def test_other_forms_parse_to_expected_names( + self, coords: Any, expected: set + ) -> None: + assert set(self._parse(coords)) == expected - def test_mapping_is_returned_as_shallow_dict_copy(self) -> None: + def test_mapping_returns_shallow_copy(self) -> None: src = {"x": [0, 1, 2], "y": [10, 20]} result = self._parse(src) assert result == src assert result is not src - def test_xarray_coordinates_keeps_only_dim_entries(self) -> None: - midx = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=["i", "j"]) - coords = xr.Coordinates.from_pandas_multiindex(midx, "stacked") - result = self._parse(coords) - assert set(result) == {"stacked"} - - # -- pd.Index entries -------------------------------------------------- - - def test_named_pd_index_uses_its_name(self) -> None: - result = self._parse([pd.Index([0, 1, 2], name="x")]) - assert set(result) == {"x"} - - def test_unnamed_pd_index_with_dims_uses_dims(self) -> None: - result = self._parse([pd.Index([0, 1, 2])], dims=["x"]) - assert set(result) == {"x"} - - def test_unnamed_pd_index_without_dims_is_size_only(self) -> None: - # Same as a bare sequence: contributes no dim name; xarray assigns - # ``dim_0`` downstream. - assert self._parse([pd.Index([0, 1, 2])]) == {} - m = Model() - v = m.add_variables(coords=[pd.Index([0, 1, 2])]) - assert v.dims == ("dim_0",) - - # -- pd.MultiIndex entries -------------------------------------------- - - def test_named_multiindex_uses_its_name(self) -> None: - mi = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=["i", "j"]) - mi.name = "multi" - result = self._parse([mi]) - assert set(result) == {"multi"} - - def test_unnamed_multiindex_with_dims_uses_dims(self) -> None: - mi = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=["i", "j"]) - result = self._parse([mi], dims=["multi"]) - assert set(result) == {"multi"} - assert result["multi"].name == "multi" - assert mi.name is None # caller's MultiIndex not mutated - - def test_unnamed_multiindex_without_dims_raises(self) -> None: - mi = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=["i", "j"]) - with pytest.raises(TypeError, match=r"MultiIndex.*must have \.name set"): - self._parse([mi]) - - # -- bare sequence entries -------------------------------------------- - @pytest.mark.parametrize( - "entry", - [[0, 1, 2], (0, 1, 2), range(3), np.array([0, 1, 2])], - ids=["list", "tuple", "range", "ndarray"], + "entry", [pd.Index([0, 1, 2]), _ij_multiindex()], ids=["index", "multiindex"] ) - def test_bare_sequence_with_dims_uses_dims(self, entry: Any) -> None: + def test_unnamed_index_named_from_dims_on_a_copy(self, entry: Any) -> None: result = self._parse([entry], dims=["x"]) - assert set(result) == {"x"} + assert result["x"].name == "x" + assert entry.name is None # caller not mutated @pytest.mark.parametrize( "entry", - [[0, 1, 2], (0, 1, 2), range(3), np.array([0, 1, 2])], - ids=["list", "tuple", "range", "ndarray"], + [[0, 1, 2], range(3), np.array([0, 1, 2]), pd.Index([0, 1, 2])], + ids=["list", "range", "ndarray", "unnamed-index"], ) - def test_bare_sequence_without_dims_is_silently_skipped(self, entry: Any) -> None: + def test_unlabeled_without_dims_is_skipped(self, entry: Any) -> None: assert self._parse([entry]) == {} @pytest.mark.parametrize( - "entry", - [[0, 1, 2], (0, 1, 2), range(3), np.array([0, 1, 2])], - ids=["list", "tuple", "range", "ndarray"], + "coords, dims, match", + [ + ([_ij_multiindex()], None, r"MultiIndex.*must have \.name set"), + ([("x",)], None, r"\(dim_name, values\) convention"), + ([(0, 1, 2)], ["x"], r"\(dim_name, values\) convention"), + ([("x", 5)], None, r"with array-like values"), + ( + [DataArray([0, 1, 2], dims=["x"])], + None, + r"coords entries must be pd\.Index", + ), + ([object()], None, r"coords entries must be pd\.Index"), + ], + ids=[ + "unnamed-multiindex", + "tuple-too-short", + "tuple-bare-values", + "tuple-scalar-values", + "dataarray", + "unknown-type", + ], ) - def test_bare_sequence_without_dims_falls_through_to_xarray_dim_0( - self, entry: Any + def test_invalid_entry_raises_typeerror( + self, coords: Any, dims: Any, match: str ) -> None: - m = Model() - v = m.add_variables(coords=[entry]) - assert v.dims == ("dim_0",) + with pytest.raises(TypeError, match=match): + self._parse(coords, dims=dims) - # -- unsupported entries ---------------------------------------------- - def test_dataarray_entry_raises(self) -> None: - with pytest.raises(TypeError, match=r"coords entries must be pd\.Index"): - self._parse([DataArray([0, 1, 2], dims=["x"])]) +# --------------------------------------------------------------------------- +# add_variables — coords / dims map to the variable's dimensions +# --------------------------------------------------------------------------- - def test_unknown_type_entry_raises(self) -> None: - class Foo: ... - with pytest.raises(TypeError, match=r"coords entries must be pd\.Index"): - self._parse([Foo()]) +class TestAddVariablesCoords: + """End-to-end: each coords / dims form sets the variable's dimensions.""" + + @pytest.mark.parametrize( + "coords, dims, expected_dims", + [ + ([("x", [0, 1, 2])], None, ("x",)), + ([pd.Index([0, 1, 2], name="x")], None, ("x",)), + ([pd.Index([0, 1, 2])], ["x"], ("x",)), + ([[0, 1, 2]], ["x"], ("x",)), + ([range(3)], ["x"], ("x",)), + ([np.array([0, 1, 2])], ["x"], ("x",)), + ([[0, 1, 2]], None, ("dim_0",)), + ([range(3)], None, ("dim_0",)), + ([np.array([0, 1, 2])], None, ("dim_0",)), + ([pd.Index([0, 1, 2])], None, ("dim_0",)), + ([("origin", ["a", "b"]), ("dest", ["x", "y"])], None, ("origin", "dest")), + ], + ids=[ + "tuple", + "named-index", + "unnamed-index+dims", + "list+dims", + "range+dims", + "ndarray+dims", + "list", + "range", + "ndarray", + "unnamed-index", + "multiple-tuples", + ], + ) + def test_coords_set_variable_dims( + self, coords: Any, dims: Any, expected_dims: tuple + ) -> None: + m = Model() + v = m.add_variables(lower=0, coords=coords, dims=dims) + assert v.dims == expected_dims # --------------------------------------------------------------------------- @@ -939,8 +988,29 @@ def test_label_in_error(self) -> None: class TestAlign: """align() conforms multiple linopy / xarray objects to common coords.""" - def test_align(self, x: Variable, u: Variable) -> None: + def test_inner_join_intersects_coords(self, x: Variable) -> None: + """Default join keeps only the shared coords (x over [0, 1] ∩ alpha over [1, 2]).""" + alpha = xr.DataArray([1, 2], [[1, 2]]) + + x_obs, alpha_obs = align(x, alpha) + + assert isinstance(x_obs, Variable) + assert x_obs.shape == alpha_obs.shape == (1,) + assert_varequal(x_obs, x.loc[[1]]) + + def test_left_join_keeps_left_coords_and_fills(self, x: Variable) -> None: + """join='left' keeps x's coords; the right operand is reindexed with NaN.""" alpha = xr.DataArray([1, 2], [[1, 2]]) + + x_obs, alpha_obs = align(x, alpha, join="left") + + assert isinstance(x_obs, Variable) + assert x_obs.shape == alpha_obs.shape == (2,) + assert_varequal(x_obs, x) + assert_equal(alpha_obs, DataArray([np.nan, 1], [[0, 1]])) + + def test_inner_join_over_multiindex(self, u: Variable) -> None: + """Inner join intersects MultiIndex coords element-wise across the stacked dim.""" beta = xr.DataArray( [1, 2, 3], [ @@ -953,30 +1023,21 @@ def test_align(self, x: Variable, u: Variable) -> None: ], ) - # inner join - x_obs, alpha_obs = align(x, alpha) - assert isinstance(x_obs, Variable) - assert x_obs.shape == alpha_obs.shape == (1,) - assert_varequal(x_obs, x.loc[[1]]) - - # left-join - x_obs, alpha_obs = align(x, alpha, join="left") - assert x_obs.shape == alpha_obs.shape == (2,) - assert isinstance(x_obs, Variable) - assert_varequal(x_obs, x) - assert_equal(alpha_obs, DataArray([np.nan, 1], [[0, 1]])) - - # multiindex beta_obs, u_obs = align(beta, u) - assert u_obs.shape == beta_obs.shape == (2,) + assert isinstance(u_obs, Variable) + assert u_obs.shape == beta_obs.shape == (2,) assert_varequal(u_obs, u.loc[[(1, "b"), (2, "b")]]) assert_equal(beta_obs, beta.loc[[(1, "b"), (2, "b")]]) - # with linear expression + def test_aligns_linear_expression(self, x: Variable) -> None: + """A LinearExpression aligns alongside variables, keeping its _term dim.""" + alpha = xr.DataArray([1, 2], [[1, 2]]) expr = 20 * x + x_obs, expr_obs, alpha_obs = align(x, expr, alpha) - assert x_obs.shape == alpha_obs.shape == (1,) - assert expr_obs.shape == (1, 1) # _term dim + assert isinstance(expr_obs, LinearExpression) + assert x_obs.shape == alpha_obs.shape == (1,) + assert expr_obs.shape == (1, 1) # the trailing 1 is the _term dim assert_linequal(expr_obs, expr.loc[[1]])