From bf1287709c1cffa73b1fdb7b2ca40d38ee42a8fd Mon Sep 17 00:00:00 2001 From: Fabian Date: Fri, 13 Mar 2026 14:43:57 +0100 Subject: [PATCH 01/27] Add aggregate_time support to convert_and_aggregate --- RELEASE_NOTES.rst | 4 + atlite/convert.py | 101 ++++++++++++++------ test/test_aggregate_time.py | 181 ++++++++++++++++++++++++++++++++++++ 3 files changed, 260 insertions(+), 26 deletions(-) create mode 100644 test/test_aggregate_time.py diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index a14bc44c..ffb4a504 100755 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -18,6 +18,10 @@ Upcoming Release ``pip install git+https://github.com/pypsa/atlite``. +* Add ``aggregate_time={"sum", "mean", False}`` to ``convert_and_aggregate`` for temporal + aggregation with and without spatial aggregation, and deprecate ``capacity_factor``/``capacity_factor_timeseries`` + in favor of it + `v0.5.0 `__ (13th March 2026) ======================================================================================= diff --git a/atlite/convert.py b/atlite/convert.py index 98fa5a06..0a1a89cc 100644 --- a/atlite/convert.py +++ b/atlite/convert.py @@ -9,10 +9,11 @@ import datetime as dt import logging +import warnings from collections import namedtuple from operator import itemgetter from pathlib import Path -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Literal import geopandas as gpd import numpy as np @@ -43,8 +44,6 @@ logger = logging.getLogger(__name__) if TYPE_CHECKING: - from typing import Literal - from atlite.resource import TurbineConfig @@ -58,6 +57,7 @@ def convert_and_aggregate( shapes_crs=4326, per_unit=False, return_capacity=False, + aggregate_time: Literal["sum", "mean", False] | None = None, capacity_factor=False, capacity_factor_timeseries=False, show_progress=False, @@ -93,12 +93,17 @@ def convert_and_aggregate( return_capacity : boolean Additionally returns the installed capacity at each bus corresponding to ``layout`` (defaults to False). + aggregate_time : "sum", "mean", False, or None + Controls temporal aggregation of results. ``"sum"`` sums over time, + ``"mean"`` averages over time, ``False`` returns full timeseries. + ``None`` keeps the historical default behavior: time-summed results + without spatial aggregation and full timeseries with spatial + aggregation. Replaces the deprecated ``capacity_factor`` and + ``capacity_factor_timeseries`` parameters. capacity_factor : boolean - If True, the static capacity factor of the chosen resource for each - grid cell is computed. + Deprecated. Use ``aggregate_time="mean"`` instead. capacity_factor_timeseries : boolean - If True, the capacity factor time series of the chosen resource for - each grid cell is computed. + Deprecated. Use ``aggregate_time=False`` instead (which is the default). show_progress : boolean, default False Whether to show a progress bar. dask_kwargs : dict, default {} @@ -116,17 +121,21 @@ def convert_and_aggregate( **With aggregation** (``matrix``, ``shapes``, or ``layout`` given): Time-series of renewable generation aggregated to buses, with - dimensions ``(bus, time)``. + dimensions ``(bus, time)``. If ``aggregate_time`` is set, the time + dimension is reduced accordingly. **Without aggregation** (none of the above given): - - ``capacity_factor_timeseries=True``: per-cell capacity factor - time series with dimensions ``(time, y, x)`` in p.u. Individual - locations can be extracted with - ``result.sel(x=lon, y=lat, method="nearest")``. - - ``capacity_factor=True``: time-averaged capacity factor per cell - with dimensions ``(y, x)`` in p.u. - - Otherwise: total energy sum per cell with dimensions ``(y, x)``. + - ``aggregate_time=False``: per-cell timeseries ``(time, y, x)``. + - ``aggregate_time="mean"``: time-averaged per cell ``(y, x)``. + - ``aggregate_time="sum"``: time-summed per cell ``(y, x)``. + + Legacy behavior (deprecated): + + - ``capacity_factor_timeseries=True``: equivalent to + ``aggregate_time=False``. + - ``capacity_factor=True``: equivalent to ``aggregate_time="mean"``. + - No flags: historical default behavior. units : xr.DataArray (optional) The installed units per bus in MW corresponding to ``layout`` @@ -138,6 +147,41 @@ def convert_and_aggregate( pv : Generate solar PV generation time-series. """ + if ( + aggregate_time is not None + and aggregate_time is not False + and aggregate_time + not in ( + "sum", + "mean", + ) + ): + raise ValueError( + f"aggregate_time must be 'sum', 'mean', False, or None, got {aggregate_time!r}" + ) + + if capacity_factor or capacity_factor_timeseries: + if aggregate_time is not None and aggregate_time is not False: + raise ValueError( + "Cannot use 'aggregate_time' together with deprecated " + "'capacity_factor' or 'capacity_factor_timeseries'." + ) + if capacity_factor: + warnings.warn( + "capacity_factor is deprecated. Use aggregate_time='mean' instead.", + FutureWarning, + stacklevel=2, + ) + aggregate_time = "mean" + if capacity_factor_timeseries: + warnings.warn( + "capacity_factor_timeseries is deprecated. " + "Use aggregate_time=False instead.", + FutureWarning, + stacklevel=2, + ) + aggregate_time = False + func_name = convert_func.__name__.replace("convert_", "") logger.info(f"Convert and aggregate '{func_name}'.") da = convert_func(cutout.data, **convert_kwds) @@ -150,16 +194,15 @@ def convert_and_aggregate( "One of `matrix`, `shapes` and `layout` must be " "given for `per_unit` or `return_capacity`" ) - if capacity_factor or capacity_factor_timeseries: - if capacity_factor_timeseries: - res = da.rename("capacity factor") - else: - res = da.mean("time").rename("capacity factor") - res.attrs["units"] = "p.u." - return maybe_progressbar(res, show_progress, **dask_kwargs) - else: + + effective_aggregate_time = "sum" if aggregate_time is None else aggregate_time + if effective_aggregate_time == "mean": + res = da.mean("time") + elif effective_aggregate_time == "sum": res = da.sum("time", keep_attrs=True) - return maybe_progressbar(res, show_progress, **dask_kwargs) + else: + res = da + return maybe_progressbar(res, show_progress, **dask_kwargs) if matrix is not None: if shapes is not None: @@ -216,6 +259,12 @@ def convert_and_aggregate( else: results.attrs["units"] = "MW" + effective_aggregate_time = False if aggregate_time is None else aggregate_time + if effective_aggregate_time == "mean": + results = results.mean("time") + elif effective_aggregate_time == "sum": + results = results.sum("time", keep_attrs=True) + if return_capacity: return maybe_progressbar(results, show_progress, **dask_kwargs), capacity else: @@ -666,7 +715,7 @@ def wind( Get per-cell capacity factor time series (no aggregation): >>> cf = cutout.wind(turbine="Vestas_V112_3MW", - ... capacity_factor_timeseries=True) + ... aggregate_time=False) >>> cf.dims ('time', 'y', 'x') >>> location_cf = cf.sel(x=6.9, y=53.1, method="nearest") @@ -850,7 +899,7 @@ def pv(cutout, panel, orientation, tracking=None, clearsky_model=None, **params) Get per-cell capacity factor time series (no aggregation): >>> cf = cutout.pv(panel="CSi", orientation="latitude_optimal", - ... capacity_factor_timeseries=True) + ... aggregate_time=False) >>> location_cf = cf.sel(x=6.9, y=53.1, method="nearest") References diff --git a/test/test_aggregate_time.py b/test/test_aggregate_time.py new file mode 100644 index 00000000..8a3f4ee2 --- /dev/null +++ b/test/test_aggregate_time.py @@ -0,0 +1,181 @@ +# SPDX-FileCopyrightText: Contributors to atlite +# +# SPDX-License-Identifier: MIT + +import warnings + +import numpy as np +import pytest +import xarray as xr + +from atlite.convert import convert_and_aggregate + + +class MockCutout: + def __init__(self, data): + self.data = data + grid_coords = np.array([(x, y) for y in data.y.values for x in data.x.values]) + import pandas as pd + + self.grid = pd.DataFrame(grid_coords, columns=["x", "y"]) + + +def identity_convert(ds, **kwargs): + return ds["var"] + + +@pytest.fixture +def cutout(): + np.random.seed(42) + times = xr.date_range("2020-01-01", periods=24, freq="h") + data = xr.Dataset( + { + "var": xr.DataArray( + np.random.rand(24, 3, 4), + dims=["time", "y", "x"], + coords={ + "time": times, + "y": [50.0, 51.0, 52.0], + "x": [5.0, 6.0, 7.0, 8.0], + }, + ) + } + ) + return MockCutout(data) + + +class TestAggregateTimeNoSpatial: + def test_aggregate_time_false_returns_timeseries(self, cutout): + result = convert_and_aggregate(cutout, identity_convert, aggregate_time=False) + assert "time" in result.dims + + def test_aggregate_time_mean(self, cutout): + result = convert_and_aggregate(cutout, identity_convert, aggregate_time="mean") + assert "time" not in result.dims + expected = cutout.data["var"].mean("time") + np.testing.assert_allclose(result.values, expected.values) + + def test_aggregate_time_sum(self, cutout): + result = convert_and_aggregate(cutout, identity_convert, aggregate_time="sum") + assert "time" not in result.dims + expected = cutout.data["var"].sum("time") + np.testing.assert_allclose(result.values, expected.values) + + def test_default_no_spatial_aggregates_over_time(self, cutout): + result = convert_and_aggregate(cutout, identity_convert) + expected = cutout.data["var"].sum("time") + assert "time" not in result.dims + xr.testing.assert_identical(result, expected) + + +class TestAggregateTimeWithSpatial: + def test_aggregate_time_mean_with_layout(self, cutout): + layout = xr.DataArray( + np.ones((3, 4)), + dims=["y", "x"], + coords={"y": cutout.data.y, "x": cutout.data.x}, + ) + result_ts = convert_and_aggregate( + cutout, + identity_convert, + layout=layout, + aggregate_time=False, + ) + result_mean = convert_and_aggregate( + cutout, + identity_convert, + layout=layout, + aggregate_time="mean", + ) + assert "time" in result_ts.dims + assert "time" not in result_mean.dims + np.testing.assert_allclose(result_mean.values, result_ts.mean("time").values) + + def test_aggregate_time_sum_with_layout(self, cutout): + layout = xr.DataArray( + np.ones((3, 4)), + dims=["y", "x"], + coords={"y": cutout.data.y, "x": cutout.data.x}, + ) + result_ts = convert_and_aggregate( + cutout, + identity_convert, + layout=layout, + aggregate_time=False, + ) + result_sum = convert_and_aggregate( + cutout, + identity_convert, + layout=layout, + aggregate_time="sum", + ) + assert "time" not in result_sum.dims + np.testing.assert_allclose(result_sum.values, result_ts.sum("time").values) + + def test_aggregate_time_with_per_unit(self, cutout): + layout = xr.DataArray( + np.ones((3, 4)) * 2.0, + dims=["y", "x"], + coords={"y": cutout.data.y, "x": cutout.data.x}, + ) + result_pu = convert_and_aggregate( + cutout, + identity_convert, + layout=layout, + per_unit=True, + aggregate_time="mean", + ) + assert "time" not in result_pu.dims + + result_pu_ts = convert_and_aggregate( + cutout, + identity_convert, + layout=layout, + per_unit=True, + aggregate_time=False, + ) + np.testing.assert_allclose(result_pu.values, result_pu_ts.mean("time").values) + + +class TestDeprecatedParams: + def test_capacity_factor_warns(self, cutout): + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + result = convert_and_aggregate( + cutout, identity_convert, capacity_factor=True + ) + assert any( + "capacity_factor is deprecated" in str(warning.message) for warning in w + ) + assert "time" not in result.dims + + def test_capacity_factor_timeseries_warns(self, cutout): + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + result = convert_and_aggregate( + cutout, identity_convert, capacity_factor_timeseries=True + ) + assert any( + "capacity_factor_timeseries is deprecated" in str(warning.message) + for warning in w + ) + assert "time" in result.dims + + def test_capacity_factor_with_aggregate_time_raises(self, cutout): + with pytest.raises(ValueError, match="Cannot use"): + convert_and_aggregate( + cutout, + identity_convert, + capacity_factor=True, + aggregate_time="mean", + ) + + +class TestInvalidArgs: + def test_invalid_aggregate_time_value(self, cutout): + with pytest.raises(ValueError, match="aggregate_time must be"): + convert_and_aggregate(cutout, identity_convert, aggregate_time="invalid") + + def test_aggregate_time_true_raises(self, cutout): + with pytest.raises(ValueError, match="aggregate_time must be"): + convert_and_aggregate(cutout, identity_convert, aggregate_time=True) From 5543e1aca61f07cec2f4f7e3ba2f769a29564f4c Mon Sep 17 00:00:00 2001 From: Fabian Date: Mon, 16 Mar 2026 08:29:59 +0100 Subject: [PATCH 02/27] add missing docstrings --- atlite/aggregate.py | 17 ++ atlite/convert.py | 326 +++++++++++++++++++++++++++++---- atlite/data.py | 52 +++++- atlite/datasets/cordex.py | 142 ++++++++++++++ atlite/datasets/era5.py | 262 ++++++++++++++++++-------- atlite/datasets/gebco.py | 17 ++ atlite/datasets/ncep.py | 303 ++++++++++++++++++++++++++++++ atlite/gis.py | 191 +++++++++++++------ atlite/hydro.py | 68 +++++++ atlite/pv/irradiation.py | 108 +++++++++++ atlite/pv/orientation.py | 96 +++++++++- atlite/pv/solar_panel_model.py | 17 ++ atlite/resource.py | 26 +++ 13 files changed, 1446 insertions(+), 179 deletions(-) diff --git a/atlite/aggregate.py b/atlite/aggregate.py index e3d3b3a4..3fc40585 100644 --- a/atlite/aggregate.py +++ b/atlite/aggregate.py @@ -10,6 +10,23 @@ def aggregate_matrix(da, matrix, index): + """ + Aggregate spatial data with a sparse matrix. + + Parameters + ---------- + da : xarray.DataArray + DataArray with spatial dimensions ``y`` and ``x``. + matrix : scipy.sparse.spmatrix + Aggregation matrix mapping flattened spatial cells to ``index``. + index : pandas.Index + Index defining the aggregated dimension. + + Returns + ------- + xarray.DataArray + Aggregated data indexed by ``index`` and, if present, time. + """ if index.name is None: index = index.rename("dim_0") if isinstance(da.data, dask.array.core.Array): diff --git a/atlite/convert.py b/atlite/convert.py index 0a1a89cc..e7a09163 100644 --- a/atlite/convert.py +++ b/atlite/convert.py @@ -273,7 +273,21 @@ def convert_and_aggregate( def maybe_progressbar(ds, show_progress, **kwargs): """ - Load a xr.dataset with dask arrays either with or without progressbar. + Load a dataset or data array, optionally showing a dask progress bar. + + Parameters + ---------- + ds : xr.Dataset or xr.DataArray + Object backed by dask arrays. + show_progress : bool + Whether to display a progress bar while loading. + **kwargs + Keyword arguments passed to ``load``. + + Returns + ------- + xr.Dataset or xr.DataArray + Loaded object. """ if show_progress: with ProgressBar(minimum=2): @@ -286,8 +300,17 @@ def maybe_progressbar(ds, show_progress, **kwargs): # temperature def convert_temperature(ds): """ - Return outside temperature (useful for e.g. heat pump T-dependent - coefficient of performance). + Convert ambient air temperature from Kelvin to degrees Celsius. + + Parameters + ---------- + ds : xr.Dataset + Dataset containing ``temperature``. + + Returns + ------- + xr.DataArray + Ambient temperature in degrees Celsius. """ # Temperature is in Kelvin return ds["temperature"] - 273.15 @@ -300,8 +323,17 @@ def temperature(cutout, **params): # soil temperature def convert_soil_temperature(ds): """ - Return soil temperature (useful for e.g. heat pump T-dependent coefficient - of performance). + Convert soil temperature from Kelvin to degrees Celsius. + + Parameters + ---------- + ds : xr.Dataset + Dataset containing ``soil temperature``. + + Returns + ------- + xr.DataArray + Soil temperature in degrees Celsius with missing values filled by zero. """ # Temperature is in Kelvin @@ -318,7 +350,17 @@ def soil_temperature(cutout, **params): # dewpoint temperature def convert_dewpoint_temperature(ds): """ - Return dewpoint temperature. + Convert dew point temperature from Kelvin to degrees Celsius. + + Parameters + ---------- + ds : xr.Dataset + Dataset containing ``dewpoint temperature``. + + Returns + ------- + xr.DataArray + Dew point temperature in degrees Celsius. """ # Temperature is in Kelvin return ds["dewpoint temperature"] - 273.15 @@ -331,6 +373,26 @@ def dewpoint_temperature(cutout, **params): def convert_coefficient_of_performance(ds, source, sink_T, c0, c1, c2): + """ + Convert source temperatures to heat pump COP values. + + Parameters + ---------- + ds : xr.Dataset + Dataset containing the required temperature variables. + source : {"air", "soil"} + Heat source used for the heat pump. + sink_T : float + Sink temperature in degrees Celsius. + c0, c1, c2 : float or None + Quadratic regression coefficients. If ``None``, source-specific + defaults are used. + + Returns + ------- + xr.DataArray + Coefficient of performance for each time step and grid cell. + """ assert source in ["air", "soil"], NotImplementedError( "'source' must be one of ['air', 'soil']" ) @@ -398,6 +460,27 @@ def coefficient_of_performance( # heat demand def convert_heat_demand(ds, threshold, a, constant, hour_shift): + """ + Convert ambient temperature to daily heat demand by degree days. + + Parameters + ---------- + ds : xr.Dataset + Dataset containing ``temperature``. + threshold : float + Heating threshold temperature in degrees Celsius. + a : float + Linear scaling factor. + constant : float + Constant demand component added to the result. + hour_shift : float + Time shift in hours applied before daily averaging. + + Returns + ------- + xr.DataArray + Daily heat demand. + """ # Temperature is in Kelvin; take daily average T = ds["temperature"] T = T.assign_coords( @@ -468,6 +551,27 @@ def heat_demand(cutout, threshold=15.0, a=1.0, constant=0.0, hour_shift=0.0, **p # cooling demand def convert_cooling_demand(ds, threshold, a, constant, hour_shift): + """ + Convert ambient temperature to daily cooling demand by degree days. + + Parameters + ---------- + ds : xr.Dataset + Dataset containing ``temperature``. + threshold : float + Cooling threshold temperature in degrees Celsius. + a : float + Linear scaling factor. + constant : float + Constant demand component added to the result. + hour_shift : float + Time shift in hours applied before daily averaging. + + Returns + ------- + xr.DataArray + Daily cooling demand. + """ # Temperature is in Kelvin; take daily average T = ds["temperature"] T = T.assign_coords( @@ -545,6 +649,29 @@ def cooling_demand( def convert_solar_thermal( ds, orientation, trigon_model, clearsky_model, c0, c1, t_store ): + """ + Convert weather data to solar thermal collector output. + + Parameters + ---------- + ds : xr.Dataset + Dataset containing radiation and temperature variables. + orientation : callable + Surface orientation callback. + trigon_model : str + Trigonometric irradiation model. + clearsky_model : str or None + Clear-sky model used for diffuse irradiation. + c0, c1 : float + Collector efficiency parameters. + t_store : float + Storage temperature in degrees Celsius. + + Returns + ------- + xr.DataArray + Specific solar thermal output. + """ # convert storage temperature to Kelvin in line with reanalysis data t_store += 273.15 @@ -632,7 +759,21 @@ def convert_wind( interpolation_method: Literal["logarithmic", "power"], ) -> xr.DataArray: """ - Convert wind speeds for turbine to wind energy generation. + Convert wind speeds to turbine-specific generation. + + Parameters + ---------- + ds : xr.Dataset + Dataset containing wind speed data. + turbine : TurbineConfig + Turbine configuration with power curve and hub height. + interpolation_method : {"logarithmic", "power"} + Method used to extrapolate wind speed to hub height. + + Returns + ------- + xr.DataArray + Wind power output as specific yield per unit of installed capacity. """ V, POW, hub_height, P = itemgetter("V", "POW", "hub_height", "P")(turbine) @@ -748,6 +889,29 @@ def convert_irradiation( trigon_model="simple", clearsky_model="simple", ): + """ + Convert weather data to irradiation on a tilted surface. + + Parameters + ---------- + ds : xr.Dataset + Dataset containing radiation and meteorological variables. + orientation : callable + Surface orientation callback. + tracking : {None, "horizontal", "tilted_horizontal", "vertical", "dual"}, optional + Tracking mode of the surface. + irradiation : {"total", "direct", "diffuse", "ground"}, default "total" + Irradiation component to return. + trigon_model : str, default "simple" + Trigonometric irradiation model. + clearsky_model : str or None, default "simple" + Clear-sky model used for diffuse irradiation. + + Returns + ------- + xr.DataArray + Tilted surface irradiation. + """ solar_position = SolarPosition(ds) surface_orientation = SurfaceOrientation(ds, solar_position, orientation, tracking) irradiation = TiltedIrradiation( @@ -835,6 +999,29 @@ def irradiation( def convert_pv( ds, panel, orientation, tracking, trigon_model="simple", clearsky_model="simple" ): + """ + Convert weather data to photovoltaic specific generation. + + Parameters + ---------- + ds : xr.Dataset + Dataset containing radiation and temperature variables. + panel : dict + Solar panel configuration. + orientation : callable + Surface orientation callback. + tracking : {None, "horizontal", "tilted_horizontal", "vertical", "dual"} + Tracking mode of the panel. + trigon_model : str, default "simple" + Trigonometric irradiation model. + clearsky_model : str or None, default "simple" + Clear-sky model used for diffuse irradiation. + + Returns + ------- + xr.DataArray + PV power output as specific yield per unit of installed capacity. + """ solar_position = SolarPosition(ds) surface_orientation = SurfaceOrientation(ds, solar_position, orientation, tracking) irradiation = TiltedIrradiation( @@ -933,6 +1120,21 @@ def pv(cutout, panel, orientation, tracking=None, clearsky_model=None, **params) # solar CSP def convert_csp(ds, installation): + """ + Convert direct solar radiation to CSP specific generation. + + Parameters + ---------- + ds : xr.Dataset + Dataset containing direct radiation variables. + installation : dict + CSP installation configuration. + + Returns + ------- + xr.DataArray + CSP output as specific yield per unit of reference capacity. + """ solar_position = SolarPosition(ds) tech = installation["technology"] @@ -1021,6 +1223,21 @@ def csp(cutout, installation, technology=None, **params): # hydro def convert_runoff(ds, weight_with_height=True): + """ + Convert runoff data, optionally weighting by surface height. + + Parameters + ---------- + ds : xr.Dataset + Dataset containing ``runoff`` and, if needed, ``height``. + weight_with_height : bool, default True + Whether to weight runoff by terrain height. + + Returns + ------- + xr.DataArray + Runoff field, optionally weighted by surface height. + """ runoff = ds["runoff"] if weight_with_height: @@ -1036,6 +1253,40 @@ def runoff( normalize_using_yearly=None, **params, ): + """ + Compute aggregated surface runoff output with optional smoothing, + thresholding, and normalization. + + Parameters + ---------- + cutout : atlite.Cutout + Cutout providing weather data with runoff variables. + smooth : bool or int, optional + If True, apply a rolling mean with the default window of ``24 * 7`` + hours. If an integer, use it as the rolling window size. + lower_threshold_quantile : bool or float, optional + If True, use the default quantile ``5e-3``. If a float, set values + below that quantile to zero. + normalize_using_yearly : pd.Series, optional + Annual country totals used to scale ``countries``-indexed results over + overlapping full years. One factor per country is derived from the + summed reference values across the overlap. + **params + Additional keyword arguments passed to ``convert_and_aggregate()``, + including ``weight_with_height`` for the underlying runoff + conversion. + + Returns + ------- + xr.DataArray or tuple[xr.DataArray, xr.DataArray] + Runoff output from ``convert_and_aggregate``. Smoothing also supports + the tuple return form used with ``return_capacity=True``. Thresholding + and normalization are only supported for ``xr.DataArray`` results. + + See Also + -------- + convert_and_aggregate : General conversion/aggregation arguments. + """ result = cutout.convert_and_aggregate(convert_func=convert_runoff, **params) if smooth is not None: @@ -1148,45 +1399,31 @@ def convert_line_rating( ds, psi, R, D=0.028, Ts=373, epsilon=0.6, alpha=0.6, per_unit=False ): """ - Convert the cutout data to dynamic line rating time series. - - The formulation is based on: - - [1]“IEEE Std 738™-2012 (Revision of IEEE Std 738-2006/Incorporates IEEE Std - 738-2012/Cor 1-2013), IEEE Standard for Calculating the Current-Temperature - Relationship of Bare Overhead Conductors,” p. 72. - - The following simplifications/assumptions were made: - 1. Wind speed are taken at height 100 meters above ground. However, ironmen - and transmission lines are typically at 50-60 meters. - 2. Solar heat influx is set proportionally to solar short wave influx. - 3. The incidence angle of the solar heat influx is assumed to be 90 degree. - + Convert weather data to dynamic line rating time series. Parameters ---------- ds : xr.Dataset - Subset of the cutout data including all weather cells overlapping with - the line. - psi : int/float - Azimuth angle of the line in degree, that is the incidence angle of the line - with a pointer directing north (90 is east, 180 is south, 270 is west). + Dataset for the cells intersecting a line. + psi : float + Line azimuth in degrees clockwise from north. R : float - Resistance of the conductor in [Ω/m] at maximally allowed temperature Ts. - D : float, - Conductor diameter. - Ts : float - Maximally allowed surface temperature (typically 100°C). - epsilon : float + Conductor resistance in ohm per meter at temperature ``Ts``. + D : float, default 0.028 + Conductor diameter in meters. + Ts : float, default 373 + Maximum conductor surface temperature in Kelvin. + epsilon : float, default 0.6 Conductor emissivity. - alpha : float + alpha : float, default 0.6 Conductor absorptivity. + per_unit : bool, default False + Unused compatibility parameter. Returns ------- - Imax - xr.DataArray giving the maximal current capacity per timestep in Ampere. - + xr.DataArray or numpy.ndarray + Maximum current per time step in ampere. """ Ta = ds["temperature"] Tfilm = (Ta + Ts) / 2 @@ -1326,13 +1563,26 @@ def line_rating( data = cutout.data.stack(spatial=["y", "x"]) def get_azimuth(shape): + """ + Return the line azimuth in degrees from its end points. + + Parameters + ---------- + shape : shapely.geometry.base.BaseGeometry + Line geometry. + + Returns + ------- + float + Azimuth angle in degrees computed from the line end points. + """ coords = np.array(shape.coords) start = coords[0] end = coords[-1] - return np.arctan2(start[0] - end[0], start[1] - end[1]) + return np.degrees(np.arctan2(start[0] - end[0], start[1] - end[1])) azimuth = shapes.apply(get_azimuth) - azimuth = azimuth.where(azimuth >= 0, azimuth + np.pi) + azimuth = azimuth.where(azimuth >= 0, azimuth + 180.0) params.setdefault("D", 0.028) params.setdefault("Ts", 373) diff --git a/atlite/data.py b/atlite/data.py index fece0cf6..769d65ad 100644 --- a/atlite/data.py +++ b/atlite/data.py @@ -34,10 +34,29 @@ def get_features( concurrent_requests=False, ): """ - Load the feature data for a given module. + Load feature datasets for a cutout module. - This get the data for a set of features from a module. All modules - in `atlite.datasets` are allowed. + Parameters + ---------- + cutout : atlite.Cutout + Cutout for which data is retrieved. + module : str + Name of the dataset module. + features : iterable of str + Feature names to retrieve from the module. + data_format : str + Data format requested from the dataset backend. + tmpdir : str or pathlib.Path, optional + Directory for intermediate files. + monthly_requests : bool, optional + Whether to split requests into monthly chunks. + concurrent_requests : bool, optional + Whether to issue monthly requests concurrently. + + Returns + ------- + xarray.Dataset + Merged dataset containing the requested features. """ parameters = cutout.data.attrs lock = SerializableLock() @@ -106,13 +125,36 @@ def available_features(module=None): def non_bool_dict(d): """ - Convert bool to int for netCDF4 storing. + Convert boolean dictionary values to integers. + + Parameters + ---------- + d : dict + Dictionary to convert. + + Returns + ------- + dict + Dictionary with boolean values replaced by ``0`` or ``1``. """ return {k: v if not isinstance(v, bool) else int(v) for k, v in d.items()} def maybe_remove_tmpdir(func): - """Use this wrapper to make tempfile deletion compatible with windows machines.""" + """ + Wrap a function to manage a temporary directory. + + Parameters + ---------- + func : callable + Function accepting a ``tmpdir`` keyword argument. + + Returns + ------- + callable + Wrapped function that creates and removes a temporary directory when + ``tmpdir`` is not provided. + """ @wraps(func) def wrapper(*args, **kwargs): diff --git a/atlite/datasets/cordex.py b/atlite/datasets/cordex.py index 0e95b4f2..2c70828e 100644 --- a/atlite/datasets/cordex.py +++ b/atlite/datasets/cordex.py @@ -30,6 +30,20 @@ def rename_and_clean_coords(ds): + """ + Rename CORDEX grid coordinates and drop unused metadata. + + Parameters + ---------- + ds : xarray.Dataset + Input CORDEX dataset. + + Returns + ------- + xarray.Dataset + Dataset with ``rlon`` and ``rlat`` renamed to ``x`` and ``y`` and + unused coordinates or variables removed. + """ ds = ds.rename({"rlon": "x", "rlat": "y"}) # drop some coordinates and variables we do not use ds = ds.drop( @@ -39,6 +53,31 @@ def rename_and_clean_coords(ds): def prepare_data_cordex(fn, year, months, oldname, newname, xs, ys): + """ + Prepare time-varying CORDEX data for selected months. + + Parameters + ---------- + fn : str or path-like + Source file path. + year : int + Target year. + months : list[int] + Months to extract from the file. + oldname : str + Original variable name in the source dataset. + newname : str + Target variable name. + xs : slice or array-like + X-coordinate selection. + ys : slice or array-like + Y-coordinate selection. + + Yields + ------ + tuple[tuple[int, int], xarray.Dataset] + Each requested year-month together with the prepared dataset slice. + """ with xr.open_dataset(fn) as ds: ds = rename_and_clean_coords(ds) ds = ds.rename({oldname: newname}) @@ -61,6 +100,31 @@ def prepare_data_cordex(fn, year, months, oldname, newname, xs, ys): def prepare_static_data_cordex(fn, year, months, oldname, newname, xs, ys): + """ + Prepare static CORDEX data for selected months. + + Parameters + ---------- + fn : str or path-like + Source file path. + year : int + Target year. + months : list[int] + Months to associate with the static data. + oldname : str + Original variable name in the source dataset. + newname : str + Target variable name. + xs : slice or array-like + X-coordinate selection. + ys : slice or array-like + Y-coordinate selection. + + Yields + ------ + tuple[tuple[int, int], xarray.Dataset] + Each requested year-month together with the static dataset. + """ with xr.open_dataset(fn) as ds: ds = rename_and_clean_coords(ds) ds = ds.rename({oldname: newname}) @@ -71,6 +135,30 @@ def prepare_static_data_cordex(fn, year, months, oldname, newname, xs, ys): def prepare_weather_types_cordex(fn, year, months, oldname, newname, xs, ys): + """ + Prepare monthly CORDEX weather type slices. + + Parameters + ---------- + fn : str or path-like + Source file path. + year : int + Target year. + months : list[int] + Months to extract from the file. + oldname : str + Original variable name in the source dataset. + newname : str + Target variable name. + xs, ys : slice or array-like + Unused placeholders kept for API compatibility with other CORDEX + preparation helpers. + + Yields + ------ + tuple[tuple[int, int], xarray.Dataset] + Each requested year-month together with the renamed dataset slice. + """ with xr.open_dataset(fn) as ds: ds = ds.rename({oldname: newname}) for m in months: @@ -80,6 +168,33 @@ def prepare_weather_types_cordex(fn, year, months, oldname, newname, xs, ys): def prepare_meta_cordex( xs, ys, year, month, template, height_config, module, model=model ): + """ + Prepare CORDEX metadata for a cutout month. + + Parameters + ---------- + xs : slice or array-like + X-coordinate selection. + ys : slice or array-like + Y-coordinate selection. + year : int + Target year. + month : int + Target month. + template : str + File name template for the reference dataset. + height_config : dict + Height dataset configuration. + module : module + Dataset module namespace. + model : str, default ``model`` + CORDEX model identifier. + + Returns + ------- + xarray.Dataset + Metadata dataset with spatial, temporal, and height information. + """ fn = next(glob.iglob(template.format(year=year, model=model))) with xr.open_dataset(fn) as ds: ds = rename_and_clean_coords(ds) @@ -105,6 +220,33 @@ def prepare_meta_cordex( def tasks_yearly_cordex( xs, ys, yearmonths, prepare_func, template, oldname, newname, meta_attrs ): + """ + Create yearly CORDEX preparation task specifications. + + Parameters + ---------- + xs : slice or array-like + X-coordinate selection. + ys : slice or array-like + Y-coordinate selection. + yearmonths : list[tuple[int, int]] + Requested year-month pairs. + prepare_func : callable + Preparation function to execute for each task. + template : str + File name template for yearly input files. + oldname : str + Original variable name in the source dataset. + newname : str + Target variable name. + meta_attrs : dict + Metadata attributes containing the CORDEX model identifier. + + Returns + ------- + list[dict] + Task dictionaries grouped by year. + """ model = meta_attrs["model"] if not isinstance(xs, slice): diff --git a/atlite/datasets/era5.py b/atlite/datasets/era5.py index f2f957a7..26363636 100644 --- a/atlite/datasets/era5.py +++ b/atlite/datasets/era5.py @@ -103,7 +103,17 @@ def _rename_and_clean_coords(ds, add_lon_lat=True): def get_data_wind(retrieval_params): """ - Get wind data for given retrieval parameters. + Retrieve and derive wind variables from ERA5 data. + + Parameters + ---------- + retrieval_params : dict + Parameters passed to :func:`retrieve_data`. + + Returns + ------- + xarray.Dataset + Dataset containing wind speed, shear exponent, azimuth, and roughness. """ ds = retrieve_data( variable=[ @@ -137,7 +147,17 @@ def get_data_wind(retrieval_params): def sanitize_wind(ds): """ - Sanitize retrieved wind data. + Sanitize wind data variables. + + Parameters + ---------- + ds : xarray.Dataset + Retrieved wind dataset. + + Returns + ------- + xarray.Dataset + Dataset with non-physical roughness values replaced. """ ds["roughness"] = ds["roughness"].where(ds["roughness"] >= 0.0, 2e-4) return ds @@ -145,7 +165,18 @@ def sanitize_wind(ds): def get_data_influx(retrieval_params): """ - Get influx data for given retrieval parameters. + Retrieve and derive solar influx variables from ERA5 data. + + Parameters + ---------- + retrieval_params : dict + Parameters passed to :func:`retrieve_data`. + + Returns + ------- + xarray.Dataset + Dataset containing direct, diffuse, and top-of-atmosphere influx, + albedo, and solar position variables. """ ds = retrieve_data( variable=[ @@ -192,7 +223,17 @@ def get_data_influx(retrieval_params): def sanitize_influx(ds): """ - Sanitize retrieved influx data. + Sanitize solar influx data. + + Parameters + ---------- + ds : xarray.Dataset + Retrieved influx dataset. + + Returns + ------- + xarray.Dataset + Dataset with negative influx values clipped to zero. """ for a in ("influx_direct", "influx_diffuse", "influx_toa"): ds[a] = ds[a].clip(min=0.0) @@ -201,7 +242,17 @@ def sanitize_influx(ds): def get_data_temperature(retrieval_params): """ - Get wind temperature for given retrieval parameters. + Retrieve temperature-related ERA5 variables. + + Parameters + ---------- + retrieval_params : dict + Parameters passed to :func:`retrieve_data`. + + Returns + ------- + xarray.Dataset + Dataset containing air, soil, and dewpoint temperature variables. """ ds = retrieve_data( variable=[ @@ -226,7 +277,17 @@ def get_data_temperature(retrieval_params): def get_data_runoff(retrieval_params): """ - Get runoff data for given retrieval parameters. + Retrieve runoff data from ERA5. + + Parameters + ---------- + retrieval_params : dict + Parameters passed to :func:`retrieve_data`. + + Returns + ------- + xarray.Dataset + Dataset containing runoff values. """ ds = retrieve_data(variable=["runoff"], **retrieval_params) @@ -238,7 +299,17 @@ def get_data_runoff(retrieval_params): def sanitize_runoff(ds): """ - Sanitize retrieved runoff data. + Sanitize runoff data. + + Parameters + ---------- + ds : xarray.Dataset + Retrieved runoff dataset. + + Returns + ------- + xarray.Dataset + Dataset with negative runoff values clipped to zero. """ ds["runoff"] = ds["runoff"].clip(min=0.0) return ds @@ -246,7 +317,17 @@ def sanitize_runoff(ds): def get_data_height(retrieval_params): """ - Get height data for given retrieval parameters. + Retrieve geopotential height data from ERA5. + + Parameters + ---------- + retrieval_params : dict + Parameters passed to :func:`retrieve_data`. + + Returns + ------- + xarray.Dataset + Dataset containing surface height derived from geopotential. """ ds = retrieve_data(variable="geopotential", **retrieval_params) @@ -322,7 +403,16 @@ def retrieval_times(coords, static=False, monthly_requests=False): def noisy_unlink(path): """ - Delete file at given path. + Delete a file and log failures. + + Parameters + ---------- + path : str | Path + File path to delete. + + Returns + ------- + None """ logger.debug(f"Deleting file {path}") try: @@ -332,11 +422,41 @@ def noisy_unlink(path): def add_finalizer(ds: xr.Dataset, target: str | Path): + """ + Register deletion of a temporary file when a dataset is closed. + + Parameters + ---------- + ds : xarray.Dataset + Dataset associated with the temporary file. + target : str | Path + Path to the temporary file. + + Returns + ------- + None + """ logger.debug(f"Adding finalizer for {target}") weakref.finalize(ds._close.__self__.ds, noisy_unlink, target) def sanitize_chunks(chunks, **dim_mapping): + """ + Map internal chunk dimension names to ERA5 dataset dimensions. + + Parameters + ---------- + chunks : dict or Any + Chunk specification passed to xarray. + **dim_mapping + Additional mappings from internal to external dimension names. + + Returns + ------- + dict or Any + Chunk mapping with renamed dimensions, or the original value if no + mapping is needed. + """ dim_mapping = dict(time="valid_time", x="longitude", y="latitude") | dim_mapping if not isinstance(chunks, dict): # preserve "auto" or None @@ -353,25 +473,23 @@ def open_with_grib_conventions( grib_file: str | Path, chunks=None, tmpdir: str | Path | None = None ) -> xr.Dataset: """ - Convert grib file of ERA5 data from the CDS to netcdf file. - - The function does the same thing as the CDS backend does, but locally. - This is needed, as the grib file is the recommended download file type for CDS, with conversion to netcdf locally. - The routine is a reduced version based on the documentation here: - https://confluence.ecmwf.int/display/CKB/GRIB+to+netCDF+conversion+on+new+CDS+and+ADS+systems#GRIBtonetCDFconversiononnewCDSandADSsystems-jupiternotebook + Open a CDS GRIB file using ERA5-compatible conventions. Parameters ---------- grib_file : str | Path - Path to the grib file to be converted. - chunks - Chunks - tmpdir : Path, optional - If None adds a finalizer to the dataset object + Path to the GRIB file. + chunks : dict, optional + Chunk specification passed to xarray. + tmpdir : str | Path, optional + Temporary directory. If ``None``, the source file is removed when the + dataset is closed. Returns ------- - xr.Dataset + xarray.Dataset + Dataset with renamed variables and expanded dimensions matching CDS + netCDF conventions. """ # # Open grib file as dataset @@ -398,8 +516,19 @@ def open_with_grib_conventions( def safely_expand_dims(dataset: xr.Dataset, expand_dims: list[str]) -> xr.Dataset: """ - Expand dimensions in an xarray dataset, ensuring that the new dimensions are not already in the dataset - and that the order of dimensions is preserved. + Expand missing dimensions while preserving dimension order. + + Parameters + ---------- + dataset : xarray.Dataset + Dataset to expand. + expand_dims : list of str + Dimensions that should exist in the dataset. + + Returns + ------- + xarray.Dataset + Dataset with missing dimensions inserted in a stable order. """ dims_required = [ c for c in dataset.coords if c in expand_dims + list(dataset.dims) @@ -437,45 +566,26 @@ def retrieve_data( **updates, ) -> xr.Dataset: """ - Download data like ERA5 from the Climate Data Store (CDS). - - If you want to track the state of your request go to - https://cds-beta.climate.copernicus.eu/requests?tab=all + Retrieve ERA5 data from the Climate Data Store. Parameters ---------- product : str - Product name, e.g. 'reanalysis-era5-single-levels'. + CDS product name. chunks : dict, optional - Chunking for xarray dataset, e.g. {'time': 1, 'x': 100, 'y': 100}. - Default is None. - tmpdir : str, optional - Directory where the downloaded data is temporarily stored. - Default is None, which uses the system's temporary directory. + Chunk specification passed to xarray. + tmpdir : str | Path, optional + Directory used for temporary downloads. lock : dask.utils.SerializableLock, optional - Lock for thread-safe file writing. Default is None. - updates : dict - Additional parameters for the request. - Must include 'year', 'month', and 'variable'. - Can include e.g. 'data_format'. + Lock used while writing downloaded files. + **updates + Additional CDS request parameters. Must include ``year``, ``month``, + and ``variable``. Returns ------- xarray.Dataset - Dataset with the retrieved variables. - - Examples - -------- - >>> ds = retrieve_data( - ... product='reanalysis-era5-single-levels', - ... chunks={'time': 1, 'x': 100, 'y': 100}, - ... tmpdir='/tmp', - ... lock=None, - ... year='2020', - ... month='01', - ... variable=['10m_u_component_of_wind', '10m_v_component_of_wind'], - ... data_format='netcdf' - ... ) + Dataset containing the requested variables. """ request = {"product_type": ["reanalysis"], "download_format": "unarchived"} request.update(updates) @@ -528,38 +638,31 @@ def get_data( **creation_parameters, ): """ - Retrieve data from ECMWFs ERA5 dataset (via CDS). - - This front-end function downloads data for a specific feature and formats - it to match the given Cutout. + Retrieve and format ERA5 data for a cutout feature. Parameters ---------- cutout : atlite.Cutout + Cutout defining the requested spatiotemporal domain. feature : str - Name of the feature data to retrieve. Must be in - `atlite.datasets.era5.features` - tmpdir : str/Path - Directory where the temporary netcdf files are stored. - monthly_requests : bool, optional - If True, the data is requested on a monthly basis in ERA5. This is useful for - large cutouts, where the data is requested in smaller chunks. The - default is False + Feature name defined in :data:`atlite.datasets.era5.features`. + tmpdir : str | Path + Directory used for temporary files. + lock : dask.utils.SerializableLock, optional + Lock used while writing downloaded files. data_format : str, optional - The format of the data to be downloaded. Can be either 'grib' or 'netcdf', - 'grib' highly recommended because CDSAPI limits request size for netcdf. + Download format, typically ``"grib"`` or ``"netcdf"``. + monthly_requests : bool, optional + Whether to split requests by month. concurrent_requests : bool, optional - If True, the monthly data requests are posted concurrently. - Only has an effect if `monthly_requests` is True. - **creation_parameters : - Additional keyword arguments. The only effective argument is 'sanitize' - (default True) which sets sanitization of the data on or off. + Whether monthly requests should be submitted concurrently. + **creation_parameters + Additional creation options. Supports ``sanitize``. Returns ------- xarray.Dataset - Dataset of dask arrays of the retrieved variables. - + Dataset containing the requested feature variables. """ coords = cutout.coords @@ -581,6 +684,19 @@ def get_data( logger.info(f"Requesting data for feature {feature}...") def retrieve_once(time): + """ + Retrieve and optionally sanitize one temporal ERA5 request. + + Parameters + ---------- + time : dict + Time selection arguments for a single CDS request. + + Returns + ------- + xarray.Dataset + Retrieved dataset for the requested time slice. + """ ds = func({**retrieval_params, **time}) if sanitize and sanitize_func is not None: ds = sanitize_func(ds) diff --git a/atlite/datasets/gebco.py b/atlite/datasets/gebco.py index 948e862c..31ce30b8 100755 --- a/atlite/datasets/gebco.py +++ b/atlite/datasets/gebco.py @@ -21,6 +21,23 @@ def get_data_gebco_height(xs, ys, gebco_path): + """ + Load GEBCO height data for a target grid. + + Parameters + ---------- + xs : xarray.DataArray + X coordinates of the target grid. + ys : xarray.DataArray + Y coordinates of the target grid. + gebco_path : str or path-like + Path to the GEBCO raster file. + + Returns + ------- + xarray.DataArray + Height data on the target grid. + """ x, X = xs.data[[0, -1]] y, Y = ys.data[[0, -1]] diff --git a/atlite/datasets/ncep.py b/atlite/datasets/ncep.py index 115e4d24..8e9d91d1 100644 --- a/atlite/datasets/ncep.py +++ b/atlite/datasets/ncep.py @@ -24,6 +24,24 @@ def convert_lons_lats_ncep(ds, xs, ys): + """ + Subset and rename NCEP longitude and latitude coordinates. + + Parameters + ---------- + ds : xarray.Dataset + Input dataset with ``lon_0`` and ``lat_0`` coordinates. + xs : slice or array-like + Longitude selection in degrees east. + ys : slice or array-like + Latitude selection in degrees north. + + Returns + ------- + xarray.Dataset + Dataset restricted to the requested area with coordinates renamed to + ``x`` and ``y`` and duplicated as ``lon`` and ``lat``. + """ if not isinstance(xs, slice): first, second, last = np.asarray(xs)[[0, 1, -1]] xs = slice(first - 0.1 * (second - first), last + 0.1 * (second - first)) @@ -55,6 +73,21 @@ def convert_lons_lats_ncep(ds, xs, ys): def convert_time_hourly_ncep(ds, drop_time_vars=True): + """ + Convert NCEP time coordinates to a single hourly time index. + + Parameters + ---------- + ds : xarray.Dataset + Input dataset with ``initial_time0_hours`` and ``forecast_time0``. + drop_time_vars : bool, default True + Whether to drop auxiliary initial time variables after stacking. + + Returns + ------- + xarray.Dataset + Dataset with a stacked ``time`` coordinate. + """ # Combine initial_time0 and forecast_time0 ds = ds.stack(time=("initial_time0_hours", "forecast_time0")).assign_coords( time=np.ravel( @@ -69,6 +102,19 @@ def convert_time_hourly_ncep(ds, drop_time_vars=True): def convert_unaverage_ncep(ds): + """ + Convert averaged NCEP variables to per-step values. + + Parameters + ---------- + ds : xarray.Dataset + Dataset containing variables with names ending in ``_avg``. + + Returns + ------- + xarray.Dataset + Dataset with unaveraged variables stored without the ``_avg`` suffix. + """ # the fields ending in _avg contain averages which have to be unaveraged by using # \begin{equation} # \tilde x_1 = x_1 \quad \tilde x_i = i \cdot x_i - (i - 1) \cdot x_{i-1} \quad \forall i > 1 @@ -90,6 +136,19 @@ def unaverage(da, dim="forecast_time0"): def convert_unaccumulate_ncep(ds): + """ + Convert accumulated NCEP variables to incremental values. + + Parameters + ---------- + ds : xarray.Dataset + Dataset containing variables with names ending in ``_acc``. + + Returns + ------- + xarray.Dataset + Dataset with unaccumulated variables stored without the ``_acc`` suffix. + """ # the fields ending in _acc contain values that are accumulated over the # forecast_time which have to be unaccumulated by using: # \begin{equation} @@ -121,6 +180,27 @@ def convert_clip_lower(ds, variable, a_min, value): def prepare_wnd10m_ncep(fn, yearmonth, xs, ys, engine=engine): + """ + Prepare monthly 10 m wind speed data from NCEP. + + Parameters + ---------- + fn : str or path-like + Source file path. + yearmonth : tuple[int, int] + Target year and month. + xs : slice or array-like + Longitude selection. + ys : slice or array-like + Latitude selection. + engine : str, default ``engine`` + Xarray backend engine. + + Yields + ------ + tuple[tuple[int, int], xarray.Dataset] + The selected month together with a dataset containing ``wnd10m``. + """ with xr.open_dataset(fn, engine=engine) as ds: ds = convert_lons_lats_ncep(ds, xs, ys) ds = convert_time_hourly_ncep(ds) @@ -132,6 +212,27 @@ def prepare_wnd10m_ncep(fn, yearmonth, xs, ys, engine=engine): def prepare_influx_ncep(fn, yearmonth, xs, ys, engine=engine): + """ + Prepare monthly downward shortwave flux data from NCEP. + + Parameters + ---------- + fn : str or path-like + Source file path. + yearmonth : tuple[int, int] + Target year and month. + xs : slice or array-like + Longitude selection. + ys : slice or array-like + Latitude selection. + engine : str, default ``engine`` + Xarray backend engine. + + Yields + ------ + tuple[tuple[int, int], xarray.Dataset] + The selected month together with a dataset containing ``influx``. + """ with xr.open_dataset(fn, engine=engine) as ds: ds = convert_lons_lats_ncep(ds, xs, ys) ds = convert_unaverage_ncep(ds) @@ -144,6 +245,27 @@ def prepare_influx_ncep(fn, yearmonth, xs, ys, engine=engine): def prepare_outflux_ncep(fn, yearmonth, xs, ys, engine=engine): + """ + Prepare monthly upward shortwave flux data from NCEP. + + Parameters + ---------- + fn : str or path-like + Source file path. + yearmonth : tuple[int, int] + Target year and month. + xs : slice or array-like + Longitude selection. + ys : slice or array-like + Latitude selection. + engine : str, default ``engine`` + Xarray backend engine. + + Yields + ------ + tuple[tuple[int, int], xarray.Dataset] + The selected month together with a dataset containing ``outflux``. + """ with xr.open_dataset(fn, engine=engine) as ds: ds = convert_lons_lats_ncep(ds, xs, ys) ds = convert_unaverage_ncep(ds) @@ -156,6 +278,27 @@ def prepare_outflux_ncep(fn, yearmonth, xs, ys, engine=engine): def prepare_temperature_ncep(fn, yearmonth, xs, ys, engine=engine): + """ + Prepare monthly near-surface temperature data from NCEP. + + Parameters + ---------- + fn : str or path-like + Source file path. + yearmonth : tuple[int, int] + Target year and month. + xs : slice or array-like + Longitude selection. + ys : slice or array-like + Latitude selection. + engine : str, default ``engine`` + Xarray backend engine. + + Yields + ------ + tuple[tuple[int, int], xarray.Dataset] + The selected month together with a dataset containing ``temperature``. + """ with xr.open_dataset(fn, engine=engine) as ds: ds = convert_lons_lats_ncep(ds, xs, ys) ds = convert_time_hourly_ncep(ds) @@ -165,6 +308,28 @@ def prepare_temperature_ncep(fn, yearmonth, xs, ys, engine=engine): def prepare_soil_temperature_ncep(fn, yearmonth, xs, ys, engine=engine): + """ + Prepare monthly soil temperature data from NCEP. + + Parameters + ---------- + fn : str or path-like + Source file path. + yearmonth : tuple[int, int] + Target year and month. + xs : slice or array-like + Longitude selection. + ys : slice or array-like + Latitude selection. + engine : str, default ``engine`` + Xarray backend engine. + + Yields + ------ + tuple[tuple[int, int], xarray.Dataset] + The selected month together with a dataset containing + ``soil temperature``. + """ with xr.open_dataset(fn, engine=engine) as ds: ds = convert_lons_lats_ncep(ds, xs, ys) ds = convert_time_hourly_ncep(ds) @@ -174,6 +339,27 @@ def prepare_soil_temperature_ncep(fn, yearmonth, xs, ys, engine=engine): def prepare_runoff_ncep(fn, yearmonth, xs, ys, engine=engine): + """ + Prepare monthly runoff data from NCEP. + + Parameters + ---------- + fn : str or path-like + Source file path. + yearmonth : tuple[int, int] + Target year and month. + xs : slice or array-like + Longitude selection. + ys : slice or array-like + Latitude selection. + engine : str, default ``engine`` + Xarray backend engine. + + Yields + ------ + tuple[tuple[int, int], xarray.Dataset] + The selected month together with a dataset containing ``runoff``. + """ with xr.open_dataset(fn, engine=engine) as ds: ds = convert_lons_lats_ncep(ds, xs, ys) # runoff has missing values: set nans to 0 @@ -186,6 +372,27 @@ def prepare_runoff_ncep(fn, yearmonth, xs, ys, engine=engine): def prepare_height_ncep(fn, xs, ys, yearmonths, engine=engine): + """ + Prepare static height data from NCEP for multiple months. + + Parameters + ---------- + fn : str or path-like + Source file path. + xs : slice or array-like + Longitude selection. + ys : slice or array-like + Latitude selection. + yearmonths : list[tuple[int, int]] + Year-month pairs to attach to the static dataset. + engine : str, default ``engine`` + Xarray backend engine. + + Yields + ------ + tuple[tuple[int, int], xarray.Dataset] + Each requested year-month together with a dataset containing ``height``. + """ with xr.open_dataset(fn, engine=engine) as ds: ds = convert_lons_lats_ncep(ds, xs, ys) ds = ds.rename({"HGT_P0_L105_GGA0": "height"}) @@ -194,6 +401,27 @@ def prepare_height_ncep(fn, xs, ys, yearmonths, engine=engine): def prepare_roughness_ncep(fn, yearmonth, xs, ys, engine=engine): + """ + Prepare monthly surface roughness data from NCEP. + + Parameters + ---------- + fn : str or path-like + Source file path. + yearmonth : tuple[int, int] + Target year and month. + xs : slice or array-like + Longitude selection. + ys : slice or array-like + Latitude selection. + engine : str, default ``engine`` + Xarray backend engine. + + Yields + ------ + tuple[tuple[int, int], xarray.Dataset] + The selected month together with a dataset containing ``roughness``. + """ with xr.open_dataset(fn, engine=engine) as ds: ds = convert_lons_lats_ncep(ds, xs, ys) ds = ds.rename({"SFCR_P8_L1_GGA0": "roughness"}) @@ -204,6 +432,33 @@ def prepare_roughness_ncep(fn, yearmonth, xs, ys, engine=engine): def prepare_meta_ncep( xs, ys, year, month, template, height_config, module, engine=engine ): + """ + Prepare NCEP metadata for a cutout month. + + Parameters + ---------- + xs : slice or array-like + Longitude selection. + ys : slice or array-like + Latitude selection. + year : int + Target year. + month : int + Target month. + template : str + File name template for the reference dataset. + height_config : dict + Height dataset configuration. + module : module + Dataset module namespace. + engine : str, default ``engine`` + Xarray backend engine. + + Returns + ------- + xarray.Dataset + Metadata dataset with spatial, temporal, and height information. + """ fn = next(glob.iglob(template.format(year=year, month=month))) with xr.open_dataset(fn, engine=engine) as ds: ds = ds.coords.to_dataset() @@ -228,6 +483,29 @@ def prepare_meta_ncep( def tasks_monthly_ncep(xs, ys, yearmonths, prepare_func, template, meta_attrs): + """ + Create monthly NCEP preparation task specifications. + + Parameters + ---------- + xs : slice or array-like + Longitude selection. + ys : slice or array-like + Latitude selection. + yearmonths : list[tuple[int, int]] + Requested year-month pairs. + prepare_func : callable + Preparation function to execute for each task. + template : str + File name template for monthly input files. + meta_attrs : dict + Unused metadata attributes. + + Returns + ------- + list[dict] + Task dictionaries for monthly data preparation. + """ return [ dict( prepare_func=prepare_func, @@ -244,6 +522,31 @@ def tasks_monthly_ncep(xs, ys, yearmonths, prepare_func, template, meta_attrs): def tasks_height_ncep( xs, ys, yearmonths, prepare_func, template, meta_attrs, **extra_args ): + """ + Create NCEP task specifications for static height data. + + Parameters + ---------- + xs : slice or array-like + Longitude selection. + ys : slice or array-like + Latitude selection. + yearmonths : list[tuple[int, int]] + Requested year-month pairs. + prepare_func : callable + Preparation function to execute. + template : str + File name template for the static height file. + meta_attrs : dict + Unused metadata attributes. + **extra_args + Additional keyword arguments passed to ``prepare_func``. + + Returns + ------- + list[dict] + A single task dictionary covering all requested months. + """ return [ dict( prepare_func=prepare_func, diff --git a/atlite/gis.py b/atlite/gis.py index 9cf9aa49..63040408 100644 --- a/atlite/gis.py +++ b/atlite/gis.py @@ -35,30 +35,29 @@ def get_coords(x, y, time, dx=0.25, dy=0.25, dt="h", **kwargs): """ - Create an cutout coordinate system on the basis of slices and step sizes. + Create cutout coordinates from slices and resolutions. Parameters ---------- x : slice - Numerical slices with lower and upper bound of the x dimension. + Bounds of the x dimension. y : slice - Numerical slices with lower and upper bound of the y dimension. + Bounds of the y dimension. time : slice - Slice with strings with lower and upper bound of the time dimension. + Bounds of the time dimension. dx : float, optional - Step size of the x coordinate. The default is 0.25. + Step size of the x coordinate. dy : float, optional - Step size of the y coordinate. The default is 0.25. + Step size of the y coordinate. dt : str, optional - Frequency of the time coordinate. The default is 'h'. Valid are all - pandas offset aliases. + Frequency of the time coordinate. + **kwargs + Unused keyword arguments. Returns ------- - ds : xarray.Dataset - Dataset with x, y and time variables, representing the whole coordinate - system. - + xarray.Dataset + Dataset containing ``x``, ``y``, and ``time`` coordinates. """ x = slice(*sorted([x.start, x.stop])) y = slice(*sorted([y.start, y.stop])) @@ -77,7 +76,17 @@ def get_coords(x, y, time, dx=0.25, dy=0.25, dt="h", **kwargs): def spdiag(v): """ - Create a sparse diagonal matrix from a 1-dimensional array. + Create a sparse diagonal matrix. + + Parameters + ---------- + v : array-like + Values placed on the diagonal. + + Returns + ------- + scipy.sparse.csr_matrix + Sparse diagonal matrix with ``v`` on the diagonal. """ N = len(v) inds = np.arange(N + 1, dtype=np.int32) @@ -86,7 +95,21 @@ def spdiag(v): def reproject_shapes(shapes, crs1, crs2): """ - Project a collection of shapes from one crs to another. + Reproject a collection of geometries. + + Parameters + ---------- + shapes : iterable, pandas.Series, or dict + Shapes to reproject. + crs1 : any + Source coordinate reference system. + crs2 : any + Target coordinate reference system. + + Returns + ------- + iterable, pandas.Series, or collections.OrderedDict + Reprojected shapes with the same container type where applicable. """ transformer = Transformer.from_crs(crs1, crs2, always_xy=True) @@ -185,8 +208,19 @@ def compute_intersectionmatrix(orig, dest, orig_crs=4326, dest_crs=4326): def padded_transform_and_shape(bounds, res): """ - Get the (transform, shape) tuple of a raster with resolution `res` and - bounds `bounds`. + Return a padded raster transform and shape. + + Parameters + ---------- + bounds : tuple + Bounding box as ``(left, bottom, right, top)``. + res : float + Raster resolution. + + Returns + ------- + tuple + Affine transform and raster shape covering the padded bounds. """ left, bottom = ((b // res) * res for b in bounds[:2]) right, top = ((b // res + 1) * res for b in bounds[2:]) @@ -198,7 +232,29 @@ def projected_mask( raster, geom, transform=None, shape=None, crs=None, allow_no_overlap=False, **kwargs ): """ - Load a mask and optionally project it to target resolution and shape. + Load a raster mask and optionally reproject it. + + Parameters + ---------- + raster : rasterio.DatasetReader + Raster source used to build the mask. + geom : geopandas.GeoSeries + Geometry used for masking. + transform : rasterio.Affine, optional + Target transform. + shape : tuple, optional + Target array shape. + crs : any, optional + Target coordinate reference system. + allow_no_overlap : bool, optional + Whether to return a nodata mask when geometry and raster do not overlap. + **kwargs + Additional keyword arguments passed to ``rasterio.mask.mask``. + + Returns + ------- + tuple + Masked array and its affine transform. """ nodata = kwargs.get("nodata", 255) kwargs.setdefault("indexes", 1) @@ -232,12 +288,27 @@ def projected_mask( def pad_extent(src, src_transform, dst_transform, src_crs, dst_crs, **kwargs): """ - Pad the extent of `src` by an equivalent of one cell of the target raster. + Pad an array before reprojection. - This ensures that the array is large enough to not be treated as - nodata in all cells of the destination raster. If src.ndim > 2, the - function expects the last two dimensions to be y,x. Additional - keyword arguments are used in `np.pad()`. + Parameters + ---------- + src : numpy.ndarray + Source array with spatial axes in the last two dimensions. + src_transform : rasterio.Affine + Transform of the source array. + dst_transform : rasterio.Affine + Transform of the destination raster. + src_crs : any + Source coordinate reference system. + dst_crs : any + Destination coordinate reference system. + **kwargs + Keyword arguments passed to ``numpy.pad``. + + Returns + ------- + tuple + Padded array and updated affine transform. """ if src.size == 0: return src, src_transform @@ -329,34 +400,25 @@ def shape_availability_reprojected( geometry, excluder, dst_transform, dst_crs, dst_shape ): """ - Compute and reproject the eligible area of one or more geometries. - - The function executes `shape_availability` and reprojects the calculated - mask onto a new raster defined by (dst_transform, dst_crs, dst_shape). - Before reprojecting, the function pads the mask such all non-nodata data - points are projected in full cells of the target raster. The ensures that - all data within the mask are projected correclty (GDAL inherent 'problem'). + Compute availability and reproject it to a target raster. + Parameters ---------- - geometry : geopandas.Series - Geometry in which the eligible area is computed. If the series contains - more than one geometry, the eligble area of the combined geometries is - computed. + geometry : geopandas.GeoSeries + Geometry for which availability is computed. excluder : atlite.gis.ExclusionContainer - Container of all meta data or objects which to exclude, i.e. - rasters and geometries. + Exclusion container defining masked areas. dst_transform : rasterio.Affine Transform of the target raster. - dst_crs : rasterio.CRS/proj.CRS - CRS of the target raster. + dst_crs : any + Coordinate reference system of the target raster. dst_shape : tuple Shape of the target raster. - masked : np.array - Average share of available area per grid cell. 0 indicates excluded, - 1 is fully included. - transform : rasterio.Affine - Affine transform of the mask. + Returns + ------- + tuple + Reprojected availability array and destination transform. """ masked, transform = shape_availability(geometry, excluder) masked, transform = pad_extent( @@ -759,7 +821,21 @@ def compute_availabilitymatrix( def maybe_swap_spatial_dims(ds, namex="x", namey="y"): """ - Swap order of spatial dimensions according to atlite concention. + Ensure spatial coordinates follow atlite's axis ordering. + + Parameters + ---------- + ds : xarray.Dataset or xarray.DataArray + Object with spatial coordinates. + namex : str, optional + Name of the x dimension. + namey : str, optional + Name of the y dimension. + + Returns + ------- + xarray.Dataset or xarray.DataArray + Input object with spatial dimensions reversed if needed. """ swaps = {} lx, rx = ds.indexes[namex][[0, -1]] @@ -785,26 +861,23 @@ def _as_transform(x, y): def regrid(ds, dimx, dimy, **kwargs): """ - Interpolate Dataset or DataArray `ds` to a new grid, using rasterio's - reproject facility. - - See also: https://mapbox.github.io/rasterio/topics/resampling.html + Reproject data to a new spatial grid. Parameters ---------- - ds : xr.Dataset|xr.DataArray - N-dim data on a spatial grid - dimx : pd.Index - New x-coordinates in destination crs. - dimx.name MUST refer to x-coord of ds. - dimy : pd.Index - New y-coordinates in destination crs. - dimy.name MUST refer to y-coord of ds. - **kwargs : - Arguments passed to rio.wrap.reproject; of note: - - resampling is one of gis.Resampling.{average,cubic,bilinear,nearest} - - src_crs, dst_crs define the different crs (default: EPSG 4326, ie latlong) + ds : xarray.Dataset or xarray.DataArray + Data on a spatial grid. + dimx : pandas.Index + Target x coordinates. ``dimx.name`` must match the source x dimension. + dimy : pandas.Index + Target y coordinates. ``dimy.name`` must match the source y dimension. + **kwargs + Keyword arguments passed to ``rasterio.warp.reproject``. + Returns + ------- + xarray.Dataset or xarray.DataArray + Regridded object on the target coordinates. """ namex = dimx.name namey = dimy.name diff --git a/atlite/hydro.py b/atlite/hydro.py index df08aacc..a3734715 100644 --- a/atlite/hydro.py +++ b/atlite/hydro.py @@ -21,6 +21,23 @@ def find_basin(shapes, lon, lat): + """ + Find the basin containing a point. + + Parameters + ---------- + shapes : geopandas.GeoSeries + Basin geometries indexed by basin id. + lon : float + Longitude of the point. + lat : float + Latitude of the point. + + Returns + ------- + int + Basin id containing the point. + """ hids = shapes.index[shapes.intersects(Point(lon, lat))] if len(hids) > 1: logger.warning( @@ -31,6 +48,21 @@ def find_basin(shapes, lon, lat): def find_upstream_basins(meta, hid): + """ + Collect all upstream basins of a basin. + + Parameters + ---------- + meta : pandas.DataFrame + Basin metadata with a ``NEXT_DOWN`` column. + hid : int + Basin id from which to start. + + Returns + ------- + list[int] + Basin ids including the selected basin and all upstream basins. + """ hids = [hid] i = 0 while i < len(hids): @@ -40,6 +72,23 @@ def find_upstream_basins(meta, hid): def determine_basins(plants, hydrobasins, show_progress=False): + """ + Determine local and upstream basins for hydro plants. + + Parameters + ---------- + plants : pandas.DataFrame + Plant table with ``lon`` and ``lat`` columns. + hydrobasins : str or geopandas.GeoDataFrame + HydroBASINS data source or loaded basin geometries. + show_progress : bool, default False + Whether to show a progress bar. + + Returns + ------- + Basins + Basin assignments, metadata, and geometries for the plants. + """ if isinstance(hydrobasins, str): hydrobasins = gpd.read_file(hydrobasins) @@ -81,6 +130,25 @@ def determine_basins(plants, hydrobasins, show_progress=False): def shift_and_aggregate_runoff_for_plants( basins, runoff, flowspeed=1, show_progress=False ): + """ + Shift basin runoff in time and aggregate it per plant. + + Parameters + ---------- + basins : Basins + Basin mappings and metadata for the plants. + runoff : xarray.DataArray + Runoff time series indexed by ``hid`` and ``time``. + flowspeed : float, default 1 + Flow speed in m/s used to convert distance to travel time. + show_progress : bool, default False + Whether to show a progress bar. + + Returns + ------- + xarray.DataArray + Plant inflow time series indexed by ``plant`` and ``time``. + """ inflow = xr.DataArray( np.zeros((len(basins.plants), runoff.indexes["time"].size)), [("plant", basins.plants.index), runoff.coords["time"]], diff --git a/atlite/pv/irradiation.py b/atlite/pv/irradiation.py index 8275d58f..f254e6f7 100644 --- a/atlite/pv/irradiation.py +++ b/atlite/pv/irradiation.py @@ -11,6 +11,27 @@ def DiffuseHorizontalIrrad(ds, solar_position, clearsky_model, influx): + """ + Estimate diffuse horizontal irradiation from total horizontal irradiation. + + Parameters + ---------- + ds : xarray.Dataset + Dataset containing top-of-atmosphere irradiation and, for the enhanced + model, temperature and humidity. + solar_position : xarray.Dataset + Solar position with an ``altitude`` variable in radians. + clearsky_model : str or None + Reindl clearsky model to use, either ``"simple"`` or ``"enhanced"``. + If None, the model is chosen from the available data. + influx : xarray.DataArray + Total horizontal irradiation. + + Returns + ------- + xarray.DataArray + Diffuse horizontal irradiation. + """ # Clearsky model from Reindl 1990 to split downward radiation into direct # and diffuse contributions. Should switch to more up-to-date model, f.ex. # Ridley et al. (2010) http://dx.doi.org/10.1016/j.renene.2009.07.018 , @@ -74,6 +95,27 @@ def DiffuseHorizontalIrrad(ds, solar_position, clearsky_model, influx): def TiltedDiffuseIrrad(ds, solar_position, surface_orientation, direct, diffuse): + """ + Calculate diffuse irradiation on a tilted surface. + + Parameters + ---------- + ds : xarray.Dataset + Dataset containing top-of-atmosphere irradiation. + solar_position : xarray.Dataset + Solar position with an ``altitude`` variable in radians. + surface_orientation : xarray.Dataset + Surface orientation including ``cosincidence`` and ``slope``. + direct : xarray.DataArray + Direct horizontal irradiation. + diffuse : xarray.DataArray + Diffuse horizontal irradiation. + + Returns + ------- + xarray.DataArray + Diffuse tilted irradiation. + """ # Hay-Davies Model sinaltitude = sin(solar_position["altitude"]) @@ -116,6 +158,23 @@ def TiltedDiffuseIrrad(ds, solar_position, surface_orientation, direct, diffuse) def TiltedDirectIrrad(solar_position, surface_orientation, direct): + """ + Calculate direct irradiation on a tilted surface. + + Parameters + ---------- + solar_position : xarray.Dataset + Solar position with an ``altitude`` variable in radians. + surface_orientation : xarray.Dataset + Surface orientation including ``cosincidence``. + direct : xarray.DataArray + Direct horizontal irradiation. + + Returns + ------- + xarray.DataArray + Direct tilted irradiation. + """ sinaltitude = sin(solar_position["altitude"]) cosincidence = surface_orientation["cosincidence"] @@ -126,6 +185,21 @@ def TiltedDirectIrrad(solar_position, surface_orientation, direct): def _albedo(ds, influx): + """ + Retrieve or derive surface albedo from the dataset. + + Parameters + ---------- + ds : xarray.Dataset + Dataset containing either ``albedo`` or ``outflux``. + influx : xarray.DataArray + Downward surface irradiation used when deriving albedo from outflux. + + Returns + ------- + xarray.DataArray + Surface albedo. + """ if "albedo" in ds: albedo = ds["albedo"] elif "outflux" in ds: @@ -140,6 +214,25 @@ def _albedo(ds, influx): def TiltedGroundIrrad(ds, solar_position, surface_orientation, influx): + """ + Calculate ground-reflected irradiation on a tilted surface. + + Parameters + ---------- + ds : xarray.Dataset + Dataset containing albedo information or reflected outflux. + solar_position : xarray.Dataset + Solar position dataset. + surface_orientation : xarray.Dataset + Surface orientation including ``slope``. + influx : xarray.DataArray + Total horizontal irradiation. + + Returns + ------- + xarray.DataArray + Ground-reflected tilted irradiation. + """ surface_slope = surface_orientation["slope"] ground_t = influx * _albedo(ds, influx) * (1.0 - cos(surface_slope)) / 2.0 return ground_t.rename("ground tilted") @@ -196,6 +289,21 @@ def TiltedIrradiation( influx_toa = ds["influx_toa"] def clip(influx, influx_max): + """ + Clip irradiation to physically admissible bounds. + + Parameters + ---------- + influx : xarray.DataArray + Irradiation to clip. + influx_max : xarray.DataArray + Upper bound for the irradiation. + + Returns + ------- + xarray.DataArray + Clipped irradiation. + """ # use .data in clip due to dask-xarray incompatibilities return influx.clip(min=0, max=influx_max.transpose(*influx.dims).data) diff --git a/atlite/pv/orientation.py b/atlite/pv/orientation.py index d5240546..fb4f9e08 100644 --- a/atlite/pv/orientation.py +++ b/atlite/pv/orientation.py @@ -12,10 +12,19 @@ def get_orientation(name, **params): """ - Definitions: - -`slope` is the angle between ground and panel. - -`azimuth` is the clockwise angle from North. - i.e. azimuth = 180 faces exactly South + Return an orientation factory by name. + + Parameters + ---------- + name : str or dict + Orientation name or parameter dictionary containing ``name``. + **params + Parameters passed to the orientation factory. + + Returns + ------- + callable + Orientation function returning ``slope`` and ``azimuth``. """ if isinstance(name, dict): params = name @@ -48,6 +57,23 @@ def make_latitude_optimal(): """ def latitude_optimal(lon, lat, solar_position): + """ + Build an orientation with latitude-dependent optimal tilt. + + Parameters + ---------- + lon : xarray.DataArray + Longitudes in radians. + lat : xarray.DataArray + Latitudes in radians. + solar_position : xarray.Dataset + Solar position dataset. + + Returns + ------- + dict + Mapping with ``slope`` and ``azimuth``. + """ slope = np.empty_like(lat.values) below_25 = np.abs(lat.values) <= np.radians(25) @@ -70,19 +96,81 @@ def latitude_optimal(lon, lat, solar_position): def make_constant(slope, azimuth): + """ + Create an orientation function with constant slope and azimuth. + + Parameters + ---------- + slope : float + Surface slope in degrees. + azimuth : float + Surface azimuth in degrees clockwise from north. + + Returns + ------- + callable + Orientation function returning constant ``slope`` and ``azimuth``. + """ slope = radians(slope) azimuth = radians(azimuth) def constant(lon, lat, solar_position): + """ + Return the configured constant panel orientation. + + Parameters + ---------- + lon : xarray.DataArray + Longitudes in radians. + lat : xarray.DataArray + Latitudes in radians. + solar_position : xarray.Dataset + Solar position dataset. + + Returns + ------- + dict + Mapping with constant ``slope`` and ``azimuth``. + """ return dict(slope=slope, azimuth=azimuth) return constant def make_latitude(azimuth=180): + """ + Create an orientation function with slope equal to latitude. + + Parameters + ---------- + azimuth : float, default 180 + Surface azimuth in degrees clockwise from north. + + Returns + ------- + callable + Orientation function using latitude as slope. + """ azimuth = radians(azimuth) def latitude(lon, lat, solar_position): + """ + Return an orientation with slope equal to latitude. + + Parameters + ---------- + lon : xarray.DataArray + Longitudes in radians. + lat : xarray.DataArray + Latitudes in radians. + solar_position : xarray.Dataset + Solar position dataset. + + Returns + ------- + dict + Mapping with latitude-based ``slope`` and constant ``azimuth``. + """ return dict(slope=lat, azimuth=azimuth) return latitude diff --git a/atlite/pv/solar_panel_model.py b/atlite/pv/solar_panel_model.py index e713635d..eea17406 100644 --- a/atlite/pv/solar_panel_model.py +++ b/atlite/pv/solar_panel_model.py @@ -75,6 +75,23 @@ def _power_bofinger(irradiance, t_amb, pc): def SolarPanelModel(ds, irradiance, pc): + """ + Compute PV power output for the selected panel model. + + Parameters + ---------- + ds : xarray.Dataset + Dataset containing ambient temperature. + irradiance : xarray.DataArray + Plane-of-array irradiation. + pc : dict + Panel configuration including the model parameters. + + Returns + ------- + xarray.DataArray + Specific PV power output. + """ model = pc.get("model", "huld") if model == "huld": diff --git a/atlite/resource.py b/atlite/resource.py index 0cc0443a..01dae541 100644 --- a/atlite/resource.py +++ b/atlite/resource.py @@ -202,6 +202,19 @@ def get_cspinstallationconfig(installation): def solarpanel_rated_capacity_per_unit(panel): + """ + Return the rated capacity per unit of a solar panel configuration. + + Parameters + ---------- + panel : str or pathlib.Path or dict + Solar panel configuration or reference to one. + + Returns + ------- + float + Rated capacity per unit area or per panel, depending on the model. + """ # unit is m^2 here if isinstance(panel, (str | Path)): @@ -218,6 +231,19 @@ def solarpanel_rated_capacity_per_unit(panel): def windturbine_rated_capacity_per_unit(turbine): + """ + Return the rated capacity of a wind turbine configuration. + + Parameters + ---------- + turbine : str or pathlib.Path or dict + Wind turbine configuration or reference to one. + + Returns + ------- + float + Rated turbine capacity. + """ if isinstance(turbine, (str | Path)): turbine = get_windturbineconfig(turbine) From e0c511c08e58afede285c170725742bced4ab4fd Mon Sep 17 00:00:00 2001 From: Fabian Date: Mon, 16 Mar 2026 10:04:06 +0100 Subject: [PATCH 03/27] Add mypy type checking with strict settings and fix all type errors across codebase --- .pre-commit-config.yaml | 8 + atlite/__init__.py | 16 +- atlite/_types.py | 136 ++++++++ atlite/aggregate.py | 19 +- atlite/convert.py | 192 +++++++---- atlite/csp.py | 58 ++-- atlite/cutout.py | 111 +++--- atlite/data.py | 143 ++++---- atlite/datasets/__init__.py | 6 +- atlite/datasets/cordex.py | 442 ++++++++++-------------- atlite/datasets/era5.py | 424 ++++------------------- atlite/datasets/gebco.py | 67 +--- atlite/datasets/ncep.py | 611 ++++++++++----------------------- atlite/datasets/sarah.py | 114 ++---- atlite/gis.py | 334 ++++++++++++------ atlite/hydro.py | 44 ++- atlite/pv/__init__.py | 43 ++- atlite/pv/irradiation.py | 122 +++---- atlite/pv/orientation.py | 108 +++--- atlite/pv/solar_panel_model.py | 29 +- atlite/pv/solar_position.py | 21 +- atlite/resource.py | 128 +++---- atlite/utils.py | 45 ++- atlite/wind.py | 35 +- pyproject.toml | 11 +- 25 files changed, 1533 insertions(+), 1734 deletions(-) create mode 100644 atlite/_types.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7cba5223..f3fda553 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -23,6 +23,14 @@ repos: # Run the formatter. - id: ruff-format +# Type checking with mypy +- repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.15.0 + hooks: + - id: mypy + additional_dependencies: ['types-PyYAML', 'types-requests'] + exclude: ^(tests/|docs/|examples/) + # Find common spelling mistakes in comments and docstrings - repo: https://github.com/codespell-project/codespell rev: v2.4.2 diff --git a/atlite/__init__.py b/atlite/__init__.py index d21aaf5c..e05ca56f 100644 --- a/atlite/__init__.py +++ b/atlite/__init__.py @@ -37,12 +37,12 @@ release_version = match.group(0) __all__ = [ - Cutout, - ExclusionContainer, - compute_indicatormatrix, - regrid, - cspinstallations, - solarpanels, - windturbines, - __version__, + "Cutout", + "ExclusionContainer", + "compute_indicatormatrix", + "regrid", + "cspinstallations", + "solarpanels", + "windturbines", + "__version__", ] diff --git a/atlite/_types.py b/atlite/_types.py new file mode 100644 index 00000000..995c1051 --- /dev/null +++ b/atlite/_types.py @@ -0,0 +1,136 @@ +# SPDX-FileCopyrightText: Contributors to atlite +# +# SPDX-License-Identifier: MIT + +from __future__ import annotations + +from collections.abc import Callable, Sequence +from pathlib import Path +from typing import Any, Literal, TypeAlias, TypedDict + +import geopandas as gpd +import numpy as np +import scipy.sparse as sp +import xarray as xr +from pyproj import CRS +from shapely.geometry.base import BaseGeometry + +NDArray: TypeAlias = np.ndarray[Any, np.dtype[np.floating[Any]]] +NDArrayInt: TypeAlias = np.ndarray[Any, np.dtype[np.signedinteger[Any]]] +NDArrayBool: TypeAlias = np.ndarray[Any, np.dtype[np.bool_]] +DataArray: TypeAlias = xr.DataArray +Dataset: TypeAlias = xr.Dataset +PathLike: TypeAlias = str | Path +NumericArray: TypeAlias = NDArray | DataArray +Number: TypeAlias = int | float | np.number[Any] +GeoDataFrame: TypeAlias = gpd.GeoDataFrame +GeoSeries: TypeAlias = gpd.GeoSeries +Geometry: TypeAlias = BaseGeometry +CrsLike: TypeAlias = str | int | CRS | dict[str, Any] | None +SparseMatrix: TypeAlias = sp.lil_matrix | sp.csr_matrix + + +class CutoutPrepareConfig(TypedDict, total=False): + datasets: list[str] + months: list[int] + start_year: int + end_year: int + + +class DatasetConfig(TypedDict, total=False): + module: str + version: str + years: list[int] + + +class ConversionConfig(TypedDict, total=False): + data_source: str + temperature: bool + wind_speed: bool + solar_irradiance: bool + + +class PVConfig(TypedDict, total=False): + tracking: Literal["fixed", "horizontal", "vertical", "two_axis"] + orientation: Literal["south", "fixed"] + tilt: float | None + azimuth: float | None + racking: Literal[ + "open_rack_cell_glued_back", + "close_mount_cell_glued_back", + "open_rack_polymer_thinfilm_copper_covered_edge", + ] + + +class ERA5RetrievalParams(TypedDict, total=False): + product: str + area: list[float] + grid: str + chunks: dict[str, int] | None + tmpdir: str | Path | None + lock: Any | None + data_format: Literal["grib", "netcdf"] + year: list[str] + month: list[str] | str + day: list[str] | str + time: str | list[str] + variable: str | list[str] + + +class SarahCreationParams(TypedDict, total=False): + sarah_dir: str | Path + parallel: bool + sarah_interpolate: bool + + +class GebcoCreationParams(TypedDict, total=False): + gebco_path: str | Path + + +class TaskDict(TypedDict, total=False): + prepare_func: Callable[..., Any] + xs: Any + ys: Any + yearmonths: list[tuple[int, int]] + fn: str | Path + year: int + month: int | list[int] + yearmonth: tuple[int, int] + engine: str + oldname: str + newname: str + template: str + drop_time_vars: bool + + +class CSPConfig(TypedDict, total=False): + turbine: str + capacity: float + + +class WindConfig(TypedDict, total=False): + turbine: str + capacity: float + hub_height: float | None + + +class LayoutConfig(TypedDict, total=False): + layout: DataArray | None + capacity: float | None + + +class ShapeConfig(TypedDict, total=False): + shapes: Sequence[Geometry] | None + shapes_crs: CrsLike + + +class AggregationConfig(TypedDict, total=False): + matrix: SparseMatrix | DataArray | None + index: Any + per_unit: bool + return_capacity: bool + aggregate_time: Literal["sum", "mean"] | bool | None + capacity_factor: bool + capacity_factor_timeseries: bool + show_progress: bool + dask_kwargs: dict[str, Any] diff --git a/atlite/aggregate.py b/atlite/aggregate.py index 3fc40585..c9da58f7 100644 --- a/atlite/aggregate.py +++ b/atlite/aggregate.py @@ -5,11 +5,25 @@ Functions for aggregating results. """ +from __future__ import annotations + +from typing import TYPE_CHECKING, cast + import dask +import pandas as pd import xarray as xr +from atlite._types import DataArray + +if TYPE_CHECKING: + from scipy.sparse import spmatrix + -def aggregate_matrix(da, matrix, index): +def aggregate_matrix( + da: DataArray, + matrix: spmatrix, + index: pd.Index, +) -> DataArray: """ Aggregate spatial data with a sparse matrix. @@ -32,7 +46,7 @@ def aggregate_matrix(da, matrix, index): if isinstance(da.data, dask.array.core.Array): da = da.stack(spatial=("y", "x")) da = da.chunk(dict(spatial=-1)) - return xr.apply_ufunc( + result = xr.apply_ufunc( lambda da: da * matrix.T, da, input_core_dims=[["spatial"]], @@ -41,6 +55,7 @@ def aggregate_matrix(da, matrix, index): output_dtypes=[da.dtype], dask_gufunc_kwargs=dict(output_sizes={index.name: index.size}), ).assign_coords(**{index.name: index}) + return cast(DataArray, result) else: da = da.stack(spatial=("y", "x")).transpose("spatial", "time") return xr.DataArray(matrix * da, [index, da.coords["time"]]) diff --git a/atlite/convert.py b/atlite/convert.py index e7a09163..3118a6e1 100644 --- a/atlite/convert.py +++ b/atlite/convert.py @@ -11,9 +11,10 @@ import logging import warnings from collections import namedtuple +from collections.abc import Callable from operator import itemgetter from pathlib import Path -from typing import TYPE_CHECKING, Literal +from typing import TYPE_CHECKING, Any, Literal import geopandas as gpd import numpy as np @@ -28,6 +29,7 @@ from atlite import csp as cspm from atlite import hydro as hydrom from atlite import wind as windm +from atlite._types import DataArray, Dataset, NumericArray from atlite.aggregate import aggregate_matrix from atlite.gis import spdiag from atlite.pv.irradiation import TiltedIrradiation @@ -44,26 +46,27 @@ logger = logging.getLogger(__name__) if TYPE_CHECKING: + from atlite.cutout import Cutout from atlite.resource import TurbineConfig def convert_and_aggregate( - cutout, - convert_func, - matrix=None, - index=None, - layout=None, - shapes=None, - shapes_crs=4326, - per_unit=False, - return_capacity=False, + cutout: Cutout, + convert_func: Callable[..., Any], + matrix: Any = None, + index: Any = None, + layout: Any = None, + shapes: Any = None, + shapes_crs: int = 4326, + per_unit: bool = False, + return_capacity: bool = False, aggregate_time: Literal["sum", "mean", False] | None = None, - capacity_factor=False, - capacity_factor_timeseries=False, - show_progress=False, - dask_kwargs={}, - **convert_kwds, -): + capacity_factor: bool = False, + capacity_factor_timeseries: bool = False, + show_progress: bool = False, + dask_kwargs: dict[str, Any] = {}, + **convert_kwds: Any, +) -> Any: """ Convert and aggregate a weather-based renewable generation time-series. @@ -211,7 +214,7 @@ def convert_and_aggregate( ) if isinstance(matrix, xr.DataArray): - coords = matrix.indexes.get(matrix.dims[1]).to_frame(index=False) + coords = matrix.indexes.get(matrix.dims[1]).to_frame(index=False) # type: ignore[union-attr] if not np.array_equal(coords[["x", "y"]], cutout.grid[["x", "y"]]): raise ValueError( "Matrix spatial coordinates not aligned with cutout spatial " @@ -271,7 +274,9 @@ def convert_and_aggregate( return maybe_progressbar(results, show_progress, **dask_kwargs) -def maybe_progressbar(ds, show_progress, **kwargs): +def maybe_progressbar( + ds: Dataset | DataArray, show_progress: bool, **kwargs: Any +) -> Dataset | DataArray: """ Load a dataset or data array, optionally showing a dask progress bar. @@ -298,7 +303,7 @@ def maybe_progressbar(ds, show_progress, **kwargs): # temperature -def convert_temperature(ds): +def convert_temperature(ds: Dataset) -> DataArray: """ Convert ambient air temperature from Kelvin to degrees Celsius. @@ -313,15 +318,15 @@ def convert_temperature(ds): Ambient temperature in degrees Celsius. """ # Temperature is in Kelvin - return ds["temperature"] - 273.15 + return ds["temperature"] - 273.15 # type: ignore[no-any-return] -def temperature(cutout, **params): - return cutout.convert_and_aggregate(convert_func=convert_temperature, **params) +def temperature(cutout: Cutout, **params: Any) -> DataArray | NumericArray: + return cutout.convert_and_aggregate(convert_func=convert_temperature, **params) # type: ignore[no-any-return] # soil temperature -def convert_soil_temperature(ds): +def convert_soil_temperature(ds: Dataset) -> DataArray: """ Convert soil temperature from Kelvin to degrees Celsius. @@ -340,15 +345,15 @@ def convert_soil_temperature(ds): # There are nans where there is sea; by setting them # to zero we guarantee they do not contribute when multiplied # by matrix in atlite/aggregate.py - return (ds["soil temperature"] - 273.15).fillna(0.0) + return (ds["soil temperature"] - 273.15).fillna(0.0) # type: ignore[no-any-return] -def soil_temperature(cutout, **params): - return cutout.convert_and_aggregate(convert_func=convert_soil_temperature, **params) +def soil_temperature(cutout: Cutout, **params: Any) -> DataArray | NumericArray: + return cutout.convert_and_aggregate(convert_func=convert_soil_temperature, **params) # type: ignore[no-any-return] # dewpoint temperature -def convert_dewpoint_temperature(ds): +def convert_dewpoint_temperature(ds: Dataset) -> DataArray: """ Convert dew point temperature from Kelvin to degrees Celsius. @@ -363,16 +368,23 @@ def convert_dewpoint_temperature(ds): Dew point temperature in degrees Celsius. """ # Temperature is in Kelvin - return ds["dewpoint temperature"] - 273.15 + return ds["dewpoint temperature"] - 273.15 # type: ignore[no-any-return] -def dewpoint_temperature(cutout, **params): - return cutout.convert_and_aggregate( +def dewpoint_temperature(cutout: Cutout, **params: Any) -> DataArray | NumericArray: + return cutout.convert_and_aggregate( # type: ignore[no-any-return] convert_func=convert_dewpoint_temperature, **params ) -def convert_coefficient_of_performance(ds, source, sink_T, c0, c1, c2): +def convert_coefficient_of_performance( + ds: Dataset, + source: str, + sink_T: float, + c0: float | None, + c1: float | None, + c2: float | None, +) -> DataArray: """ Convert source temperatures to heat pump COP values. @@ -416,12 +428,18 @@ def convert_coefficient_of_performance(ds, source, sink_T, c0, c1, c2): delta_T = sink_T - source_T - return c0 + c1 * delta_T + c2 * delta_T**2 + return c0 + c1 * delta_T + c2 * delta_T**2 # type: ignore[no-any-return] def coefficient_of_performance( - cutout, source="air", sink_T=55.0, c0=None, c1=None, c2=None, **params -): + cutout: Cutout, + source: str = "air", + sink_T: float = 55.0, + c0: float | None = None, + c1: float | None = None, + c2: float | None = None, + **params: Any, +) -> DataArray | NumericArray: """ Convert ambient or soil temperature to coefficient of performance (COP) of air- or ground-sourced heat pumps. The COP is a function of temperature @@ -447,7 +465,7 @@ def coefficient_of_performance( Energy & Environmental Science (2012), 5, 9291-9306, https://doi.org/10.1039/C2EE22653G. """ - return cutout.convert_and_aggregate( + return cutout.convert_and_aggregate( # type: ignore[no-any-return] convert_func=convert_coefficient_of_performance, source=source, sink_T=sink_T, @@ -459,7 +477,13 @@ def coefficient_of_performance( # heat demand -def convert_heat_demand(ds, threshold, a, constant, hour_shift): +def convert_heat_demand( + ds: Dataset, + threshold: float, + a: float, + constant: float, + hour_shift: float, +) -> DataArray: """ Convert ambient temperature to daily heat demand by degree days. @@ -493,10 +517,17 @@ def convert_heat_demand(ds, threshold, a, constant, hour_shift): heat_demand = heat_demand.clip(min=0.0) - return (constant + heat_demand).rename("heat_demand") + return (constant + heat_demand).rename("heat_demand") # type: ignore[no-any-return] -def heat_demand(cutout, threshold=15.0, a=1.0, constant=0.0, hour_shift=0.0, **params): +def heat_demand( + cutout: Cutout, + threshold: float = 15.0, + a: float = 1.0, + constant: float = 0.0, + hour_shift: float = 0.0, + **params: Any, +) -> DataArray | NumericArray: """ Convert outside temperature into daily heat demand using the degree-day approximation. @@ -539,7 +570,7 @@ def heat_demand(cutout, threshold=15.0, a=1.0, constant=0.0, hour_shift=0.0, **p documented in the `convert_and_aggregate` function. """ - return cutout.convert_and_aggregate( + return cutout.convert_and_aggregate( # type: ignore[no-any-return] convert_func=convert_heat_demand, threshold=threshold, a=a, @@ -550,7 +581,13 @@ def heat_demand(cutout, threshold=15.0, a=1.0, constant=0.0, hour_shift=0.0, **p # cooling demand -def convert_cooling_demand(ds, threshold, a, constant, hour_shift): +def convert_cooling_demand( + ds: Dataset, + threshold: float, + a: float, + constant: float, + hour_shift: float, +) -> DataArray: """ Convert ambient temperature to daily cooling demand by degree days. @@ -584,12 +621,17 @@ def convert_cooling_demand(ds, threshold, a, constant, hour_shift): cooling_demand = cooling_demand.clip(min=0.0) - return (constant + cooling_demand).rename("cooling_demand") + return (constant + cooling_demand).rename("cooling_demand") # type: ignore[no-any-return] def cooling_demand( - cutout, threshold=23.0, a=1.0, constant=0.0, hour_shift=0.0, **params -): + cutout: Cutout, + threshold: float = 23.0, + a: float = 1.0, + constant: float = 0.0, + hour_shift: float = 0.0, + **params: Any, +) -> DataArray | NumericArray: """ Convert outside temperature into daily cooling demand using the degree-day approximation. @@ -635,7 +677,7 @@ def cooling_demand( documented in the `convert_and_aggregate` function. """ - return cutout.convert_and_aggregate( + return cutout.convert_and_aggregate( # type: ignore[no-any-return] convert_func=convert_cooling_demand, threshold=threshold, a=a, @@ -647,8 +689,14 @@ def cooling_demand( # solar thermal collectors def convert_solar_thermal( - ds, orientation, trigon_model, clearsky_model, c0, c1, t_store -): + ds: Dataset, + orientation: Callable, + trigon_model: str, + clearsky_model: str | None, + c0: float, + c1: float, + t_store: float, +) -> DataArray: """ Convert weather data to solar thermal collector output. @@ -691,19 +739,19 @@ def convert_solar_thermal( output = irradiation * eta - return output.where(output > 0.0, 0.0) + return output.where(output > 0.0, 0.0) # type: ignore[no-any-return] def solar_thermal( - cutout, - orientation={"slope": 45.0, "azimuth": 180.0}, - trigon_model="simple", - clearsky_model="simple", - c0=0.8, - c1=3.0, - t_store=80.0, - **params, -): + cutout: Cutout, + orientation: dict[str, float] = {"slope": 45.0, "azimuth": 180.0}, + trigon_model: str = "simple", + clearsky_model: str = "simple", + c0: float = 0.8, + c1: float = 3.0, + t_store: float = 80.0, + **params: Any, +) -> DataArray | NumericArray: """ Convert downward short-wave radiation flux and outside temperature into time series for solar thermal collectors. @@ -738,9 +786,9 @@ def solar_thermal( """ if not callable(orientation): - orientation = get_orientation(orientation) + orientation = get_orientation(orientation) # type: ignore[assignment] - return cutout.convert_and_aggregate( + return cutout.convert_and_aggregate( # type: ignore[no-any-return] convert_func=convert_solar_thermal, orientation=orientation, trigon_model=trigon_model, @@ -795,16 +843,16 @@ def apply_power_curve(da): da.attrs["units"] = "MWh/MWp" da = da.rename("specific generation") - return da + return da # type: ignore[no-any-return] def wind( - cutout, - turbine: str | Path | dict, - smooth: bool | dict = False, + cutout: Cutout, + turbine: str | Path | dict[str, Any], + smooth: bool | dict[str, Any] = False, add_cutout_windspeed: bool = False, interpolation_method: Literal["logarithmic", "power"] = "logarithmic", - **params, + **params: Any, ) -> xr.DataArray: """ Generate wind generation time-series. @@ -867,14 +915,16 @@ def wind( 1074 – 1088. doi:10.1016/j.energy.2015.09.071 """ - turbine = get_windturbineconfig(turbine, add_cutout_windspeed=add_cutout_windspeed) + turbine_config = get_windturbineconfig( + turbine, add_cutout_windspeed=add_cutout_windspeed + ) if smooth: - turbine = windturbine_smooth(turbine, params=smooth) + turbine_config = windturbine_smooth(turbine_config, params=smooth) - return cutout.convert_and_aggregate( + return cutout.convert_and_aggregate( # type: ignore[no-any-return, return-value] convert_func=convert_wind, - turbine=turbine, + turbine=turbine_config, interpolation_method=interpolation_method, **params, ) @@ -1470,10 +1520,10 @@ def convert_line_rating( A = D * 1 # projected area of conductor in square meters if isinstance(ds, dict): - Position = namedtuple("solarposition", ["altitude", "azimuth"]) - solar_position = Position(ds["solar_altitude"], ds["solar_azimuth"]) + Position = namedtuple("Position", ["altitude", "azimuth"]) + solar_position = Position(ds["solar_altitude"], ds["solar_azimuth"]) # type: ignore[assignment] else: - solar_position = SolarPosition(ds) + solar_position = SolarPosition(ds) # type: ignore[assignment] Phi_s = arccos( cos(solar_position.altitude) * cos((solar_position.azimuth) - radians(psi)) ) @@ -1609,4 +1659,4 @@ def get_azimuth(shape): else: res = compute(res, **dask_kwargs) - return xr.concat(*res, dim=df.index).assign_attrs(units="A") + return xr.concat(*res, dim=df.index).assign_attrs(units="A") # type: ignore[call-overload] diff --git a/atlite/csp.py b/atlite/csp.py index df23a0ad..a590e06d 100644 --- a/atlite/csp.py +++ b/atlite/csp.py @@ -5,17 +5,34 @@ Functions for use in conjunction with csp data generation. """ +from __future__ import annotations + import logging +from typing import TYPE_CHECKING, Literal, TypeAlias import numpy as np +import xarray as xr from dask.array import radians, sin from atlite.pv.solar_position import SolarPosition +if TYPE_CHECKING: + from dask.array import Array + logger = logging.getLogger(__name__) +NDArray: TypeAlias = np.ndarray +DataArray: TypeAlias = xr.DataArray +Dataset: TypeAlias = xr.Dataset +CSPTechnology = Literal["parabolic trough", "solar tower"] +FieldOrientation = Literal["horizontal", "tilted", "single-axis", "two-axis"] -def calculate_dni(ds, solar_position=None, altitude_threshold=3.75): + +def calculate_dni( + ds: Dataset, + solar_position: Dataset | None = None, + altitude_threshold: float = 3.75, +) -> DataArray: """ Calculate DNI on a perpendicular plane. @@ -24,35 +41,34 @@ def calculate_dni(ds, solar_position=None, altitude_threshold=3.75): Parameters ---------- - ds : xarray.Dataset + ds : xr.Dataset Dataset containing the direct influx (influx_direct) into a horizontal plane. - solar_position : xarray.Dataset (optional) - solar_position containing a solar 'altitude' (in rad, 0 to pi/2) for the 'ds' dataset. - Is calculated using atlite.pv.SolarPosition if omitted. - altitude_threshold : float (default: 3.75 degrees) + solar_position : xr.Dataset | None + Dataset containing solar altitude (in rad, 0 to pi/2) for the input dataset. + Calculated using atlite.pv.SolarPosition if not provided. + altitude_threshold : float Threshold for solar altitude in degrees. Values in range (0, altitude_threshold] - will be set to the altitude_threshold to avoid numerical issues when dividing by - the sine of very low solar altitude. - The default values '3.75 deg' corresponds to - the solar altitude traversed by the sun within about 15 minutes in a location with - maximum solar altitude of 60 deg and 10h day time. + are set to altitude_threshold to prevent numerical issues when dividing by + the sine of very low solar altitude. Default: 3.75 degrees corresponds to + approximately 15 minutes of solar movement at 60 deg maximum altitude. + + Returns + ------- + xr.DataArray + Direct Normal Irradiance (DNI) in W/m^2 on a plane perpendicular to solar rays. """ if solar_position is None: solar_position = SolarPosition(ds) - # solar altitude expected in rad, convert degrees (easier to specifcy) to match - altitude_threshold = radians(altitude_threshold) + altitude_threshold_rad: Array = radians(altitude_threshold) - # Sanitation of altitude values: - # Prevent high calculated DNI values during low solar altitudes (sunset / dawn) - # where sin() results in a very low denominator in the DNI calculation - altitude = solar_position["altitude"] + altitude: DataArray = solar_position["altitude"] altitude = altitude.where(lambda x: x > 0, np.nan) - altitude = altitude.where(lambda x: x > altitude_threshold, altitude_threshold) + altitude = altitude.where( + lambda x: x > altitude_threshold_rad, altitude_threshold_rad + ) - # Calculate DNI and remove NaNs introduced during altitude sanitation - # DNI is determined either by dividing by cos(azimuth) or sin(altitude) - dni = ds["influx_direct"] / sin(altitude) + dni: DataArray = ds["influx_direct"] / sin(altitude) return dni diff --git a/atlite/cutout.py b/atlite/cutout.py index db747869..e06c50e8 100644 --- a/atlite/cutout.py +++ b/atlite/cutout.py @@ -14,9 +14,13 @@ # https://github.com/pydata/xarray/issues/2535, # https://github.com/rasterio/rasterio-wheels/issues/12 +from __future__ import annotations + import logging +from collections.abc import Sequence from pathlib import Path from tempfile import mktemp +from typing import TYPE_CHECKING, Any from warnings import warn import geopandas as gpd @@ -28,6 +32,20 @@ from pyproj import CRS from shapely.geometry import box +from atlite._types import ( + CrsLike, + DataArray, + GeoDataFrame, + Geometry, + NDArray, + Number, + PathLike, + SparseMatrix, +) + +if TYPE_CHECKING: + pass + from atlite.convert import ( coefficient_of_performance, convert_and_aggregate, @@ -66,7 +84,7 @@ class Cutout: functionalities. """ - def __init__(self, path, **cutoutparams): + def __init__(self, path: PathLike, **cutoutparams: Any) -> None: """ Provide an atlite cutout object. @@ -201,7 +219,7 @@ def __init__(self, path, **cutoutparams): data = xr.Dataset(coords=coords, attrs=attrs) # Check compatibility of CRS - modules = atleast_1d(data.attrs.get("module")) + modules = atleast_1d(data.attrs.get("module")) # type: ignore[arg-type] crs = set(CRS(datamodules[m].crs) for m in modules) assert len(crs) == 1, f"CRS of {module} not compatible" @@ -209,35 +227,35 @@ def __init__(self, path, **cutoutparams): self.data = data @property - def name(self): + def name(self) -> str: """ Name of the cutout. """ return self.path.stem @property - def module(self): + def module(self) -> str | list[str]: """ Data module of the cutout. """ - return self.data.attrs.get("module") + return self.data.attrs.get("module") # type: ignore[no-any-return, return-value] @property - def crs(self): + def crs(self) -> CRS: """ Coordinate Reference System of the cutout. """ return CRS(datamodules[atleast_1d(self.module)[0]].crs) @property - def available_features(self): + def available_features(self) -> pd.Index: """ List of available weather data features for the cutout. """ return available_features(self.module) @property - def chunks(self): + def chunks(self) -> dict[str, int] | None: """ Chunking of the cutout data used by dask. """ @@ -249,21 +267,21 @@ def chunks(self): return None if chunks == {} else chunks @property - def coords(self): + def coords(self) -> xr.Coordinates: """ Geographic coordinates of the cutout. """ return self.data.coords @property - def shape(self): + def shape(self) -> tuple[int, int]: """ Size of spatial dimensions (y, x) of the cutout data. """ return len(self.coords["y"]), len(self.coords["x"]) @property - def extent(self): + def extent(self) -> NDArray: """ Total extent of the area covered by the cutout (x, X, y, Y). """ @@ -274,14 +292,14 @@ def extent(self): ) @property - def bounds(self): + def bounds(self) -> NDArray: """ Total bounds of the area covered by the cutout (x, y, X, Y). """ return self.extent[[0, 2, 1, 3]] @property - def transform(self): + def transform(self) -> rio.Affine: """ Get the affine transform of the cutout. """ @@ -295,7 +313,7 @@ def transform(self): ) @property - def transform_r(self): + def transform_r(self) -> rio.Affine: """ Get the affine transform of the cutout with reverse y-order. """ @@ -309,39 +327,39 @@ def transform_r(self): ) @property - def dx(self): + def dx(self) -> float: """ Spatial resolution on the x coordinates. """ x = self.coords["x"] - return round((x[-1] - x[0]).item() / (x.size - 1), 8) + return round((x[-1] - x[0]).item() / (x.size - 1), 8) # type: ignore[no-any-return] @property - def dy(self): + def dy(self) -> float: """ Spatial resolution on the y coordinates. """ y = self.coords["y"] - return round((y[-1] - y[0]).item() / (y.size - 1), 8) + return round((y[-1] - y[0]).item() / (y.size - 1), 8) # type: ignore[no-any-return] @property - def dt(self): + def dt(self) -> str | None: """ Time resolution of the cutout. """ - return pd.infer_freq(self.coords["time"].to_index()) + return pd.infer_freq(self.coords["time"].to_index()) # type: ignore[no-any-return] @property - def prepared(self): + def prepared(self) -> bool: """ Boolean indicating whether all available features are prepared. """ - return self.prepared_features.sort_index().equals( + return self.prepared_features.sort_index().equals( # type: ignore[no-any-return] self.available_features.sort_index() ) @property - def prepared_features(self): + def prepared_features(self) -> pd.Series[Any]: """ Get the list of prepared features in the cutout. """ @@ -353,7 +371,7 @@ def prepared_features(self): return pd.Series(list(self.data), index, dtype=object) @CachedAttribute - def grid(self): + def grid(self) -> GeoDataFrame: """ Cutout grid with coordinates and geometries. @@ -375,7 +393,13 @@ def grid(self): crs=self.crs, ) - def sel(self, path=None, bounds=None, buffer=0, **kwargs): + def sel( + self, + path: PathLike | None = None, + bounds: Sequence[float] | None = None, + buffer: float = 0, + **kwargs: Any, + ) -> Cutout: """ Select parts of the cutout. @@ -406,13 +430,15 @@ def sel(self, path=None, bounds=None, buffer=0, **kwargs): if bounds is not None: if buffer > 0: - bounds = box(*bounds).buffer(buffer).bounds - x1, y1, x2, y2 = bounds + bounds = box(*bounds).buffer(buffer).bounds # type: ignore[assignment] + x1, y1, x2, y2 = bounds # type: ignore[misc] kwargs.update(x=slice(x1, x2), y=slice(y1, y2)) data = self.data.sel(**kwargs) return Cutout(path, data=data) - def merge(self, other, path=None, **kwargs): + def merge( + self, other: Cutout, path: PathLike | None = None, **kwargs: Any + ) -> Cutout: """ Merge two cutouts into a single cutout. @@ -450,7 +476,7 @@ def merge(self, other, path=None, **kwargs): return Cutout(path, data=data) - def to_file(self, fn=None): + def to_file(self, fn: PathLike | None = None) -> None: """ Save cutout to a NetCDF file. @@ -464,7 +490,7 @@ def to_file(self, fn=None): fn = self.path self.data.to_netcdf(fn) - def __repr__(self): + def __repr__(self) -> str: start = np.datetime_as_string(self.coords["time"].values[0], unit="D") end = np.datetime_as_string(self.coords["time"].values[-1], unit="D") return ( @@ -489,7 +515,9 @@ def __repr__(self): ) ) - def indicatormatrix(self, shapes, shapes_crs=4326): + def indicatormatrix( + self, shapes: Sequence[Geometry], shapes_crs: CrsLike = 4326 + ) -> SparseMatrix: """ Compute the indicatormatrix. @@ -514,7 +542,9 @@ def indicatormatrix(self, shapes, shapes_crs=4326): """ return compute_indicatormatrix(self.grid, shapes, self.crs, shapes_crs) - def intersectionmatrix(self, shapes, shapes_crs=4326): + def intersectionmatrix( + self, shapes: Sequence[Geometry], shapes_crs: CrsLike = 4326 + ) -> SparseMatrix: """ Compute the intersectionmatrix. @@ -536,7 +566,7 @@ def intersectionmatrix(self, shapes, shapes_crs=4326): """ return compute_intersectionmatrix(self.grid, shapes, self.crs, shapes_crs) - def area(self, crs=None): + def area(self, crs: CrsLike = None) -> DataArray: """ Get the area per grid cell as a DataArray with coords (x,y). @@ -561,13 +591,15 @@ def area(self, crs=None): [self.coords["y"], self.coords["x"]], ) - def uniform_layout(self): + def uniform_layout(self) -> DataArray: """ Get a uniform capacity layout for all grid cells. """ return xr.DataArray(1, [self.coords["y"], self.coords["x"]]) - def uniform_density_layout(self, capacity_density, crs=None): + def uniform_density_layout( + self, capacity_density: Number, crs: CrsLike = None + ) -> DataArray: """ Get a capacity layout from a uniform capacity density. @@ -586,16 +618,15 @@ def uniform_density_layout(self, capacity_density, crs=None): capacity placed within one grid cell. """ - return capacity_density * self.area(crs) + return capacity_density * self.area(crs) # type: ignore[no-any-return] - def equals(self, other): + def equals(self, other: Any) -> bool: """ It overrides xarray.Dataset.equals and ignores the path attribute in the comparison """ if not isinstance(other, Cutout): - return NotImplemented - # Compare cutouts data attributes - return self.data.equals(other.data) + return NotImplemented # type: ignore[no-any-return] + return self.data.equals(other.data) # type: ignore[no-any-return] def layout_from_capacity_list(self, data, col="Capacity"): """ diff --git a/atlite/data.py b/atlite/data.py index 769d65ad..caf7538f 100644 --- a/atlite/data.py +++ b/atlite/data.py @@ -5,34 +5,46 @@ Management of data retrieval and structure. """ +from __future__ import annotations + import logging import os +from collections.abc import Callable, Iterable from functools import wraps from pathlib import Path from shutil import rmtree from tempfile import mkdtemp, mkstemp +from typing import TYPE_CHECKING, Any -import pandas as pd +import numpy as np +import pandas as pd # type: ignore[import-untyped] import xarray as xr -from dask import compute, delayed -from dask.diagnostics import ProgressBar +from dask import compute as dask_compute # type: ignore[attr-defined] +from dask import delayed +from dask.diagnostics import ProgressBar # type: ignore[attr-defined] from dask.utils import SerializableLock from numpy import atleast_1d +from atlite._types import DataArray, Dataset, PathLike from atlite.datasets import modules as datamodules +if TYPE_CHECKING: + from collections.abc import Sequence + + from atlite.cutout import Cutout + logger = logging.getLogger(__name__) def get_features( - cutout, - module, - features, - data_format, - tmpdir=None, - monthly_requests=False, - concurrent_requests=False, -): + cutout: Cutout, + module: str, + features: Iterable[str], + data_format: str, + tmpdir: PathLike | None = None, + monthly_requests: bool = False, + concurrent_requests: bool = False, +) -> Dataset: """ Load feature datasets for a cutout module. @@ -58,13 +70,13 @@ def get_features( xarray.Dataset Merged dataset containing the requested features. """ - parameters = cutout.data.attrs - lock = SerializableLock() - datasets = [] - get_data = datamodules[module].get_data + parameters: dict[str, Any] = cutout.data.attrs + lock: SerializableLock = SerializableLock() + datasets: list[Any] = [] + get_data: Callable[..., Any] = datamodules[module].get_data for feature in features: - feature_data = delayed(get_data)( + feature_data: Any = delayed(get_data)( cutout, feature, tmpdir=tmpdir, @@ -76,23 +88,23 @@ def get_features( ) datasets.append(feature_data) - datasets = compute(*datasets) + datasets = dask_compute(*datasets) # type: ignore[no-untyped-call] - ds = xr.merge(datasets, compat="equals") + ds: Dataset = xr.merge(datasets, compat="equals") for v in ds: - da = ds[v] + da: DataArray = ds[v] da.attrs["module"] = module - fd = datamodules[module].features.items() + fd: Iterable[tuple[str, Any]] = datamodules[module].features.items() da.attrs["feature"] = [k for k, l in fd if v in l].pop() if da.chunks is not None: - chunksizes = [c[0] for c in da.chunks] + chunksizes: list[int] = [c[0] for c in da.chunks] da.encoding["chunksizes"] = chunksizes return ds -def available_features(module=None): +def available_features(module: str | Sequence[str] | None = None) -> pd.Series[str]: """ Inspect the available features of all or a selection of modules. @@ -110,8 +122,8 @@ def available_features(module=None): obtained. """ - features = {name: m.features for name, m in datamodules.items()} - features = ( + features: dict[str, Any] = {name: m.features for name, m in datamodules.items()} + features_frame: pd.Series[Any] = ( pd.DataFrame(features) .unstack() .dropna() @@ -119,11 +131,11 @@ def available_features(module=None): .rename("variables") ) if module is not None: - features = features.reindex(atleast_1d(module), level="module") - return features.explode() + features_frame = features_frame.reindex(atleast_1d(module), level="module") + return features_frame.explode() -def non_bool_dict(d): +def non_bool_dict(d: dict[str, Any]) -> dict[str, Any]: """ Convert boolean dictionary values to integers. @@ -140,7 +152,9 @@ def non_bool_dict(d): return {k: v if not isinstance(v, bool) else int(v) for k, v in d.items()} -def maybe_remove_tmpdir(func): +def maybe_remove_tmpdir( + func: Callable[..., Any], +) -> Callable[..., Any]: """ Wrap a function to manage a temporary directory. @@ -157,9 +171,9 @@ def maybe_remove_tmpdir(func): """ @wraps(func) - def wrapper(*args, **kwargs): + def wrapper(*args: Any, **kwargs: Any) -> Any: if kwargs.get("tmpdir", None): - res = func(*args, **kwargs) + res: Any = func(*args, **kwargs) else: kwargs["tmpdir"] = mkdtemp() try: @@ -173,17 +187,17 @@ def wrapper(*args, **kwargs): @maybe_remove_tmpdir def cutout_prepare( - cutout, - features=None, - tmpdir=None, - data_format="grib", - overwrite=False, - compression={"zlib": True, "complevel": 9, "shuffle": True}, - show_progress=False, - dask_kwargs=None, - monthly_requests=False, - concurrent_requests=False, -): + cutout: Cutout, + features: str | Sequence[str] | None = None, + tmpdir: PathLike | None = None, + data_format: str = "grib", + overwrite: bool = False, + compression: dict[str, Any] | None = None, + show_progress: bool = False, + dask_kwargs: dict[str, Any] | None = None, + monthly_requests: bool = False, + concurrent_requests: bool = False, +) -> Cutout: """ Prepare all or a selection of features in a cutout. @@ -244,65 +258,74 @@ def cutout_prepare( if dask_kwargs is None: dask_kwargs = {} + if compression is None: + compression = {"zlib": True, "complevel": 9, "shuffle": True} + if cutout.prepared and not overwrite: logger.info("Cutout already prepared.") return cutout - # ensure that the tmpdir actually exists - temp_dir_path = Path(tmpdir) + if tmpdir is None: + raise ValueError("tmpdir cannot be None") + temp_dir_path: Path = Path(tmpdir) if not temp_dir_path.is_dir(): raise FileNotFoundError(f"The tmpdir: {temp_dir_path} does not exist.") logger.info(f"Storing temporary files in {tmpdir}") - modules = atleast_1d(cutout.module) - features = atleast_1d(features) if features else slice(None) - prepared = set(atleast_1d(cutout.data.attrs["prepared_features"])) + modules_array: np.ndarray[Any, np.dtype[Any]] = atleast_1d(cutout.module) + modules_list: list[str] = modules_array.tolist() + features_normalized: np.ndarray[Any, np.dtype[Any]] | slice = ( + atleast_1d(features) if features else slice(None) + ) + prepared: set[str] = set(atleast_1d(cutout.data.attrs["prepared_features"])) - # target is series of all available variables for given module and features - target = available_features(modules).loc[:, features].drop_duplicates() + target: pd.Series[str] = ( + available_features(modules_list).loc[:, features_normalized].drop_duplicates() + ) for module in target.index.unique("module"): - missing_vars = target[module] + missing_vars: pd.Series[str] = target[module] if not overwrite: missing_vars = missing_vars[lambda v: ~v.isin(cutout.data)] if missing_vars.empty: continue logger.info(f"Calculating and writing with module {module}:") - missing_features = missing_vars.index.unique("feature") - ds = get_features( + missing_features: np.ndarray[Any, np.dtype[Any]] = missing_vars.index.unique( + "feature" + ) + ds: Dataset = get_features( cutout, module, missing_features, - tmpdir=tmpdir, data_format=data_format, + tmpdir=tmpdir, monthly_requests=monthly_requests, concurrent_requests=concurrent_requests, ) prepared |= set(missing_features) cutout.data.attrs.update(dict(prepared_features=list(prepared))) - attrs = non_bool_dict(cutout.data.attrs) + attrs: dict[str, Any] = non_bool_dict(cutout.data.attrs) attrs.update(ds.attrs) - # Add optional compression to the newly prepared features if compression: for v in missing_vars: ds[v].encoding.update(compression) ds = cutout.data.merge(ds[missing_vars.values]).assign_attrs(**attrs) - # write data to tmp file, copy it to original data, this is much safer - # than appending variables + directory: str + filename: str directory, filename = os.path.split(str(cutout.path)) + fd: int + tmp: str fd, tmp = mkstemp(suffix=filename, dir=directory) os.close(fd) logger.debug("Writing cutout to file...") - # Delayed writing for large cutout - # cf. https://stackoverflow.com/questions/69810367/python-how-to-write-large-netcdf-with-xarray - write_job = ds.to_netcdf(tmp, compute=False) + write_job: Any = ds.to_netcdf(tmp, compute=False) if show_progress: - with ProgressBar(minimum=2): + with ProgressBar(minimum=2): # type: ignore[no-untyped-call] write_job.compute(**dask_kwargs) else: write_job.compute(**dask_kwargs) diff --git a/atlite/datasets/__init__.py b/atlite/datasets/__init__.py index 045c59d8..39e11a03 100644 --- a/atlite/datasets/__init__.py +++ b/atlite/datasets/__init__.py @@ -6,6 +6,10 @@ atlite datasets. """ +from __future__ import annotations + +from types import ModuleType + from atlite.datasets import era5, gebco, sarah -modules = {"era5": era5, "sarah": sarah, "gebco": gebco} +modules: dict[str, ModuleType] = {"era5": era5, "sarah": sarah, "gebco": gebco} diff --git a/atlite/datasets/cordex.py b/atlite/datasets/cordex.py index 2c70828e..f759ccef 100644 --- a/atlite/datasets/cordex.py +++ b/atlite/datasets/cordex.py @@ -12,14 +12,21 @@ for the time being! """ +from __future__ import annotations + import glob import os +from collections.abc import Generator from itertools import groupby from operator import itemgetter +from typing import Any +import numpy as np import pandas as pd import xarray as xr +from atlite._types import PathLike + # Model and CRS Settings model = "MPI-M-MPI-ESM-LR" @@ -29,69 +36,35 @@ # RotProj(dict(proj='ob_tran', o_proj='latlong', lon_0=180, o_lon_p=-162, o_lat_p=39.25)) -def rename_and_clean_coords(ds): - """ - Rename CORDEX grid coordinates and drop unused metadata. - - Parameters - ---------- - ds : xarray.Dataset - Input CORDEX dataset. - - Returns - ------- - xarray.Dataset - Dataset with ``rlon`` and ``rlat`` renamed to ``x`` and ``y`` and - unused coordinates or variables removed. - """ +def rename_and_clean_coords(ds: xr.Dataset) -> xr.Dataset: ds = ds.rename({"rlon": "x", "rlat": "y"}) - # drop some coordinates and variables we do not use ds = ds.drop( (set(ds.coords) | set(ds.data_vars)) & {"bnds", "height", "rotated_pole"} ) return ds -def prepare_data_cordex(fn, year, months, oldname, newname, xs, ys): - """ - Prepare time-varying CORDEX data for selected months. - - Parameters - ---------- - fn : str or path-like - Source file path. - year : int - Target year. - months : list[int] - Months to extract from the file. - oldname : str - Original variable name in the source dataset. - newname : str - Target variable name. - xs : slice or array-like - X-coordinate selection. - ys : slice or array-like - Y-coordinate selection. - - Yields - ------ - tuple[tuple[int, int], xarray.Dataset] - Each requested year-month together with the prepared dataset slice. - """ +def prepare_data_cordex( + fn: PathLike, + year: int, + months: list[int], + oldname: str, + newname: str, + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], +) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: with xr.open_dataset(fn) as ds: ds = rename_and_clean_coords(ds) ds = ds.rename({oldname: newname}) ds = ds.sel(x=xs, y=ys) if newname in {"influx", "outflux"}: - # shift averaged data to beginning of bin ds = ds.assign_coords( time=( pd.to_datetime(ds.coords["time"].values) - pd.Timedelta(hours=1.5) ) ) elif newname in {"runoff"}: - # shift and fill 6hr average data to beginning of 3hr bins t = pd.to_datetime(ds.coords["time"].values) ds = ds.reindex(method="bfill", time=(t - pd.Timedelta(hours=3.0)).union(t)) @@ -99,32 +72,15 @@ def prepare_data_cordex(fn, year, months, oldname, newname, xs, ys): yield (year, m), ds.sel(time=f"{year}-{m}") -def prepare_static_data_cordex(fn, year, months, oldname, newname, xs, ys): - """ - Prepare static CORDEX data for selected months. - - Parameters - ---------- - fn : str or path-like - Source file path. - year : int - Target year. - months : list[int] - Months to associate with the static data. - oldname : str - Original variable name in the source dataset. - newname : str - Target variable name. - xs : slice or array-like - X-coordinate selection. - ys : slice or array-like - Y-coordinate selection. - - Yields - ------ - tuple[tuple[int, int], xarray.Dataset] - Each requested year-month together with the static dataset. - """ +def prepare_static_data_cordex( + fn: PathLike, + year: int, + months: list[int], + oldname: str, + newname: str, + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], +) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: with xr.open_dataset(fn) as ds: ds = rename_and_clean_coords(ds) ds = ds.rename({oldname: newname}) @@ -134,31 +90,15 @@ def prepare_static_data_cordex(fn, year, months, oldname, newname, xs, ys): yield (year, m), ds -def prepare_weather_types_cordex(fn, year, months, oldname, newname, xs, ys): - """ - Prepare monthly CORDEX weather type slices. - - Parameters - ---------- - fn : str or path-like - Source file path. - year : int - Target year. - months : list[int] - Months to extract from the file. - oldname : str - Original variable name in the source dataset. - newname : str - Target variable name. - xs, ys : slice or array-like - Unused placeholders kept for API compatibility with other CORDEX - preparation helpers. - - Yields - ------ - tuple[tuple[int, int], xarray.Dataset] - Each requested year-month together with the renamed dataset slice. - """ +def prepare_weather_types_cordex( + fn: PathLike, + year: int, + months: list[int], + oldname: str, + newname: str, + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], +) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: with xr.open_dataset(fn) as ds: ds = ds.rename({oldname: newname}) for m in months: @@ -166,35 +106,15 @@ def prepare_weather_types_cordex(fn, year, months, oldname, newname, xs, ys): def prepare_meta_cordex( - xs, ys, year, month, template, height_config, module, model=model -): - """ - Prepare CORDEX metadata for a cutout month. - - Parameters - ---------- - xs : slice or array-like - X-coordinate selection. - ys : slice or array-like - Y-coordinate selection. - year : int - Target year. - month : int - Target month. - template : str - File name template for the reference dataset. - height_config : dict - Height dataset configuration. - module : module - Dataset module namespace. - model : str, default ``model`` - CORDEX model identifier. - - Returns - ------- - xarray.Dataset - Metadata dataset with spatial, temporal, and height information. - """ + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], + year: int, + month: int, + template: str, + height_config: dict[str, Any], + module: Any, + model: str = "MPI-M-MPI-ESM-LR", +) -> xr.Dataset: fn = next(glob.iglob(template.format(year=year, model=model))) with xr.open_dataset(fn) as ds: ds = rename_and_clean_coords(ds) @@ -214,46 +134,26 @@ def prepare_meta_cordex( meta["height"] = ds["height"] - return meta + return meta # type: ignore[no-any-return] def tasks_yearly_cordex( - xs, ys, yearmonths, prepare_func, template, oldname, newname, meta_attrs -): - """ - Create yearly CORDEX preparation task specifications. - - Parameters - ---------- - xs : slice or array-like - X-coordinate selection. - ys : slice or array-like - Y-coordinate selection. - yearmonths : list[tuple[int, int]] - Requested year-month pairs. - prepare_func : callable - Preparation function to execute for each task. - template : str - File name template for yearly input files. - oldname : str - Original variable name in the source dataset. - newname : str - Target variable name. - meta_attrs : dict - Metadata attributes containing the CORDEX model identifier. - - Returns - ------- - list[dict] - Task dictionaries grouped by year. - """ + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], + yearmonths: list[tuple[int, int]], + prepare_func: Any, + template: str, + oldname: str, + newname: str, + meta_attrs: dict[str, Any], +) -> list[dict[str, Any]]: model = meta_attrs["model"] if not isinstance(xs, slice): - first, second, last = xs.values[[0, 1, -1]] + first, second, last = xs.values[[0, 1, -1]] # type: ignore[attr-defined] xs = slice(first - 0.1 * (second - first), last + 0.1 * (second - first)) if not isinstance(ys, slice): - first, second, last = ys.values[[0, 1, -1]] + first, second, last = ys.values[[0, 1, -1]] # type: ignore[attr-defined] ys = slice(first - 0.1 * (second - first), last + 0.1 * (second - first)) return [ @@ -271,124 +171,136 @@ def tasks_yearly_cordex( ] -weather_data_config = { - "influx": dict( - tasks_func=tasks_yearly_cordex, - prepare_func=prepare_data_cordex, - oldname="rsds", - newname="influx", - template=os.path.join( - config.cordex_dir, # noqa: F821 - "{model}", - "influx", - "rsds_*_{year}*.nc", +weather_data_config: dict[str, dict[str, Any]] = {} +try: + from atlite import config # type: ignore[attr-defined] # noqa: F401 + + weather_data_config = { + "influx": dict( + tasks_func=tasks_yearly_cordex, + prepare_func=prepare_data_cordex, + oldname="rsds", + newname="influx", + template=os.path.join( + config.cordex_dir, # noqa: F821 + "{model}", + "influx", + "rsds_*_{year}*.nc", + ), ), - ), - "outflux": dict( - tasks_func=tasks_yearly_cordex, - prepare_func=prepare_data_cordex, - oldname="rsus", - newname="outflux", - template=os.path.join( - config.cordex_dir, # noqa: F821 - "{model}", - "outflux", - "rsus_*_{year}*.nc", + "outflux": dict( + tasks_func=tasks_yearly_cordex, + prepare_func=prepare_data_cordex, + oldname="rsus", + newname="outflux", + template=os.path.join( + config.cordex_dir, # noqa: F821 + "{model}", + "outflux", + "rsus_*_{year}*.nc", + ), ), - ), - "temperature": dict( - tasks_func=tasks_yearly_cordex, - prepare_func=prepare_data_cordex, - oldname="tas", - newname="temperature", - template=os.path.join( - config.cordex_dir, # noqa: F821 - "{model}", - "temperature", - "tas_*_{year}*.nc", + "temperature": dict( + tasks_func=tasks_yearly_cordex, + prepare_func=prepare_data_cordex, + oldname="tas", + newname="temperature", + template=os.path.join( + config.cordex_dir, # noqa: F821 + "{model}", + "temperature", + "tas_*_{year}*.nc", + ), ), - ), - "humidity": dict( - tasks_func=tasks_yearly_cordex, - prepare_func=prepare_data_cordex, - oldname="hurs", - newname="humidity", - template=os.path.join( - config.cordex_dir, # noqa: F821 - "{model}", - "humidity", - "hurs_*_{year}*.nc", + "humidity": dict( + tasks_func=tasks_yearly_cordex, + prepare_func=prepare_data_cordex, + oldname="hurs", + newname="humidity", + template=os.path.join( + config.cordex_dir, # noqa: F821 + "{model}", + "humidity", + "hurs_*_{year}*.nc", + ), ), - ), - "wnd10m": dict( - tasks_func=tasks_yearly_cordex, - prepare_func=prepare_data_cordex, - oldname="sfcWind", - newname="wnd10m", - template=os.path.join( - config.cordex_dir, # noqa: F821 - "{model}", - "wind", - "sfcWind_*_{year}*.nc", + "wnd10m": dict( + tasks_func=tasks_yearly_cordex, + prepare_func=prepare_data_cordex, + oldname="sfcWind", + newname="wnd10m", + template=os.path.join( + config.cordex_dir, # noqa: F821 + "{model}", + "wind", + "sfcWind_*_{year}*.nc", + ), ), - ), - "roughness": dict( - tasks_func=tasks_yearly_cordex, - prepare_func=prepare_static_data_cordex, - oldname="rlst", - newname="roughness", - template=os.path.join( - config.cordex_dir, # noqa: F821 - "{model}", - "roughness", - "rlst_*.nc", + "roughness": dict( + tasks_func=tasks_yearly_cordex, + prepare_func=prepare_static_data_cordex, + oldname="rlst", + newname="roughness", + template=os.path.join( + config.cordex_dir, # noqa: F821 + "{model}", + "roughness", + "rlst_*.nc", + ), ), - ), - "runoff": dict( - tasks_func=tasks_yearly_cordex, - prepare_func=prepare_data_cordex, - oldname="mrro", - newname="runoff", - template=os.path.join( - config.cordex_dir, # noqa: F821 - "{model}", - "runoff", - "mrro_*_{year}*.nc", + "runoff": dict( + tasks_func=tasks_yearly_cordex, + prepare_func=prepare_data_cordex, + oldname="mrro", + newname="runoff", + template=os.path.join( + config.cordex_dir, # noqa: F821 + "{model}", + "runoff", + "mrro_*_{year}*.nc", + ), ), - ), - "height": dict( - tasks_func=tasks_yearly_cordex, - prepare_func=prepare_static_data_cordex, - oldname="orog", - newname="height", - template=os.path.join( - config.cordex_dir, # noqa: F821 - "{model}", - "altitude", - "orog_*.nc", + "height": dict( + tasks_func=tasks_yearly_cordex, + prepare_func=prepare_static_data_cordex, + oldname="orog", + newname="height", + template=os.path.join( + config.cordex_dir, # noqa: F821 + "{model}", + "altitude", + "orog_*.nc", + ), ), - ), - "CWT": dict( - tasks_func=tasks_yearly_cordex, - prepare_func=prepare_weather_types_cordex, - oldname="CWT", - newname="CWT", + "CWT": dict( + tasks_func=tasks_yearly_cordex, + prepare_func=prepare_weather_types_cordex, + oldname="CWT", + newname="CWT", + template=os.path.join( + config.cordex_dir, # noqa: F821 + "{model}", + "weather_types", + "CWT_*_{year}*.nc", + ), + ), + } +except ImportError: + pass + +meta_data_config: dict[str, Any] = {} +try: + from atlite import config # type: ignore[attr-defined] # noqa: F401 + + meta_data_config = dict( + prepare_func=prepare_meta_cordex, template=os.path.join( config.cordex_dir, # noqa: F821 "{model}", - "weather_types", - "CWT_*_{year}*.nc", + "temperature", + "tas_*_{year}*.nc", ), - ), -} - -meta_data_config = dict( - prepare_func=prepare_meta_cordex, - template=os.path.join( - config.cordex_dir, # noqa: F821 - "{model}", - "temperature", - "tas_*_{year}*.nc", - ), - height_config=weather_data_config["height"], -) + height_config=weather_data_config["height"], + ) +except (ImportError, KeyError): + pass diff --git a/atlite/datasets/era5.py b/atlite/datasets/era5.py index 26363636..4dde6800 100644 --- a/atlite/datasets/era5.py +++ b/atlite/datasets/era5.py @@ -8,12 +8,15 @@ https://confluence.ecmwf.int/display/CKB/ERA5%3A+data+documentation """ +from __future__ import annotations + import logging import os import warnings import weakref -from pathlib import Path +from collections.abc import Callable from tempfile import mkstemp +from typing import Any, Literal import cdsapi import numpy as np @@ -24,6 +27,7 @@ from dask.utils import SerializableLock from numpy import atleast_1d +from atlite._types import ERA5RetrievalParams, PathLike from atlite.gis import maybe_swap_spatial_dims from atlite.pv.solar_position import SolarPosition @@ -34,8 +38,8 @@ # for Python verions < 3.7: import contextlib - @contextlib.contextmanager - def nullcontext(): + @contextlib.contextmanager # type: ignore[no-redef] + def nullcontext(): # type: ignore[misc] yield @@ -62,16 +66,7 @@ def nullcontext(): static_features = {"height"} -def _add_height(ds): - """ - Convert geopotential 'z' to geopotential height following [1]. - - References - ---------- - [1] ERA5: surface elevation and orography, retrieved: 10.02.2019 - https://confluence.ecmwf.int/display/CKB/ERA5%3A+surface+elevation+and+orography - - """ +def _add_height(ds: xr.Dataset) -> xr.Dataset: g0 = 9.80665 z = ds["z"] if "time" in z.coords: @@ -81,40 +76,19 @@ def _add_height(ds): return ds -def _rename_and_clean_coords(ds, add_lon_lat=True): - """ - Rename 'longitude' and 'latitude' columns to 'x' and 'y' and fix roundings. - - Optionally (add_lon_lat, default:True) preserves latitude and - longitude columns as 'lat' and 'lon'. - """ +def _rename_and_clean_coords(ds: xr.Dataset, add_lon_lat: bool = True) -> xr.Dataset: ds = ds.rename({"longitude": "x", "latitude": "y", "valid_time": "time"}) - # round coords since cds coords are float32 which would lead to mismatches ds = ds.assign_coords( x=np.round(ds.x.astype(float), 5), y=np.round(ds.y.astype(float), 5) ) - ds = maybe_swap_spatial_dims(ds) + ds = maybe_swap_spatial_dims(ds) # type: ignore[assignment] if add_lon_lat: ds = ds.assign_coords(lon=ds.coords["x"], lat=ds.coords["y"]) ds = ds.drop_vars(["expver", "number"], errors="ignore") + return ds # type: ignore[return-value] - return ds - - -def get_data_wind(retrieval_params): - """ - Retrieve and derive wind variables from ERA5 data. - - Parameters - ---------- - retrieval_params : dict - Parameters passed to :func:`retrieve_data`. - Returns - ------- - xarray.Dataset - Dataset containing wind speed, shear exponent, azimuth, and roughness. - """ +def get_data_wind(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: ds = retrieve_data( variable=[ "10m_u_component_of_wind", @@ -135,49 +109,21 @@ def get_data_wind(retrieval_params): np.log(ds["wnd10m"] / ds["wnd100m"]) / np.log(10 / 100) ).assign_attrs(units="", long_name="wind shear exponent") - # span the whole circle: 0 is north, π/2 is east, -π is south, 3π/2 is west azimuth = arctan2(ds["u100"], ds["v100"]) ds["wnd_azimuth"] = azimuth.where(azimuth >= 0, azimuth + 2 * np.pi) ds = ds.drop_vars(["u100", "v100", "u10", "v10", "wnd10m"]) ds = ds.rename({"fsr": "roughness"}) - return ds - + return ds # type: ignore[no-any-return] -def sanitize_wind(ds): - """ - Sanitize wind data variables. - Parameters - ---------- - ds : xarray.Dataset - Retrieved wind dataset. - - Returns - ------- - xarray.Dataset - Dataset with non-physical roughness values replaced. - """ +def sanitize_wind(ds: xr.Dataset) -> xr.Dataset: ds["roughness"] = ds["roughness"].where(ds["roughness"] >= 0.0, 2e-4) return ds -def get_data_influx(retrieval_params): - """ - Retrieve and derive solar influx variables from ERA5 data. - - Parameters - ---------- - retrieval_params : dict - Parameters passed to :func:`retrieve_data`. - - Returns - ------- - xarray.Dataset - Dataset containing direct, diffuse, and top-of-atmosphere influx, - albedo, and solar position variables. - """ +def get_data_influx(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: ds = retrieve_data( variable=[ "surface_net_solar_radiation", @@ -201,15 +147,10 @@ def get_data_influx(retrieval_params): ) ds = ds.drop_vars(["ssrd", "ssr"]) - # Convert from energy to power J m**-2 -> W m**-2 and clip negative fluxes for a in ("influx_direct", "influx_diffuse", "influx_toa"): ds[a] = ds[a] / (60.0 * 60.0) ds[a].attrs["units"] = "W m**-2" - # ERA5 variables are mean values for previous hour, i.e. 13:01 to 14:00 are labelled as "14:00" - # account by calculating the SolarPosition for the center of the interval for aggregation happens - # see https://github.com/PyPSA/atlite/issues/158 - # Do not show DeprecationWarning from new SolarPosition calculation (#199) with warnings.catch_warnings(): warnings.simplefilter("ignore", DeprecationWarning) time_shift = pd.to_timedelta("-30 minutes") @@ -218,42 +159,16 @@ def get_data_influx(retrieval_params): ds = xr.merge([ds, sp]) - return ds - + return ds # type: ignore[no-any-return] -def sanitize_influx(ds): - """ - Sanitize solar influx data. - Parameters - ---------- - ds : xarray.Dataset - Retrieved influx dataset. - - Returns - ------- - xarray.Dataset - Dataset with negative influx values clipped to zero. - """ +def sanitize_influx(ds: xr.Dataset) -> xr.Dataset: for a in ("influx_direct", "influx_diffuse", "influx_toa"): ds[a] = ds[a].clip(min=0.0) return ds -def get_data_temperature(retrieval_params): - """ - Retrieve temperature-related ERA5 variables. - - Parameters - ---------- - retrieval_params : dict - Parameters passed to :func:`retrieve_data`. - - Returns - ------- - xarray.Dataset - Dataset containing air, soil, and dewpoint temperature variables. - """ +def get_data_temperature(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: ds = retrieve_data( variable=[ "2m_temperature", @@ -272,63 +187,24 @@ def get_data_temperature(retrieval_params): } ) - return ds - + return ds # type: ignore[no-any-return] -def get_data_runoff(retrieval_params): - """ - Retrieve runoff data from ERA5. - Parameters - ---------- - retrieval_params : dict - Parameters passed to :func:`retrieve_data`. - - Returns - ------- - xarray.Dataset - Dataset containing runoff values. - """ +def get_data_runoff(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: ds = retrieve_data(variable=["runoff"], **retrieval_params) ds = _rename_and_clean_coords(ds) ds = ds.rename({"ro": "runoff"}) - return ds - + return ds # type: ignore[no-any-return] -def sanitize_runoff(ds): - """ - Sanitize runoff data. - Parameters - ---------- - ds : xarray.Dataset - Retrieved runoff dataset. - - Returns - ------- - xarray.Dataset - Dataset with negative runoff values clipped to zero. - """ +def sanitize_runoff(ds: xr.Dataset) -> xr.Dataset: ds["runoff"] = ds["runoff"].clip(min=0.0) return ds -def get_data_height(retrieval_params): - """ - Retrieve geopotential height data from ERA5. - - Parameters - ---------- - retrieval_params : dict - Parameters passed to :func:`retrieve_data`. - - Returns - ------- - xarray.Dataset - Dataset containing surface height derived from geopotential. - """ +def get_data_height(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: ds = retrieve_data(variable="geopotential", **retrieval_params) ds = _rename_and_clean_coords(ds) @@ -337,37 +213,17 @@ def get_data_height(retrieval_params): return ds -def _area(coords): - # North, West, South, East. Default: global +def _area(coords: dict[str, xr.DataArray]) -> list[float]: x0, x1 = coords["x"].min().item(), coords["x"].max().item() y0, y1 = coords["y"].min().item(), coords["y"].max().item() return [y1, x0, y0, x1] -def retrieval_times(coords, static=False, monthly_requests=False): - """ - Get list of retrieval cdsapi arguments for time dimension in coordinates. - - If static is False, this function creates a query for each month and year - in the time axis in coords. This ensures not running into size query limits - of the cdsapi even with very (spatially) large cutouts. - If static is True, the function return only one set of parameters - for the very first time point. - - Parameters - ---------- - coords : atlite.Cutout.coords - static : bool, optional - monthly_requests : bool, optional - If True, the data is requested on a monthly basis. This is useful for - large cutouts, where the data is requested in smaller chunks. The - default is False - - Returns - ------- - list of dicts witht retrieval arguments - - """ +def retrieval_times( + coords: dict[str, xr.DataArray], + static: bool = False, + monthly_requests: bool = False, +) -> dict[str, Any] | list[dict[str, Any]]: time = coords["time"].to_index() if static: return { @@ -377,8 +233,7 @@ def retrieval_times(coords, static=False, monthly_requests=False): "time": time[0].strftime("%H:00"), } - # Prepare request for all months and years - times = [] + times: list[dict[str, Any]] = [] for year in time.year.unique(): t = time[time.year == year] if monthly_requests: @@ -401,19 +256,7 @@ def retrieval_times(coords, static=False, monthly_requests=False): return times -def noisy_unlink(path): - """ - Delete a file and log failures. - - Parameters - ---------- - path : str | Path - File path to delete. - - Returns - ------- - None - """ +def noisy_unlink(path: PathLike) -> None: logger.debug(f"Deleting file {path}") try: os.unlink(path) @@ -421,45 +264,14 @@ def noisy_unlink(path): logger.error(f"Unable to delete file {path}, as it is still in use.") -def add_finalizer(ds: xr.Dataset, target: str | Path): - """ - Register deletion of a temporary file when a dataset is closed. +def add_finalizer(ds: xr.Dataset, target: PathLike) -> None: + logger.debug(f"Adding finalizer for {target}") + weakref.finalize(ds._close.__self__.ds, noisy_unlink, target) # type: ignore[union-attr] - Parameters - ---------- - ds : xarray.Dataset - Dataset associated with the temporary file. - target : str | Path - Path to the temporary file. - Returns - ------- - None - """ - logger.debug(f"Adding finalizer for {target}") - weakref.finalize(ds._close.__self__.ds, noisy_unlink, target) - - -def sanitize_chunks(chunks, **dim_mapping): - """ - Map internal chunk dimension names to ERA5 dataset dimensions. - - Parameters - ---------- - chunks : dict or Any - Chunk specification passed to xarray. - **dim_mapping - Additional mappings from internal to external dimension names. - - Returns - ------- - dict or Any - Chunk mapping with renamed dimensions, or the original value if no - mapping is needed. - """ +def sanitize_chunks(chunks: Any, **dim_mapping: str) -> Any: dim_mapping = dict(time="valid_time", x="longitude", y="latitude") | dim_mapping if not isinstance(chunks, dict): - # preserve "auto" or None return chunks return { @@ -470,38 +282,15 @@ def sanitize_chunks(chunks, **dim_mapping): def open_with_grib_conventions( - grib_file: str | Path, chunks=None, tmpdir: str | Path | None = None + grib_file: PathLike, + chunks: dict[str, int] | None = None, + tmpdir: PathLike | None = None, ) -> xr.Dataset: - """ - Open a CDS GRIB file using ERA5-compatible conventions. - - Parameters - ---------- - grib_file : str | Path - Path to the GRIB file. - chunks : dict, optional - Chunk specification passed to xarray. - tmpdir : str | Path, optional - Temporary directory. If ``None``, the source file is removed when the - dataset is closed. - - Returns - ------- - xarray.Dataset - Dataset with renamed variables and expanded dimensions matching CDS - netCDF conventions. - """ - # - # Open grib file as dataset - # Options to open different datasets into a datasets of consistent hypercubes which are compatible netCDF - # There are options that might be relevant for e.g. for wave model data, that have been removed here - # to keep the code cleaner and shorter ds = xr.open_dataset( grib_file, engine="cfgrib", time_dims=["valid_time"], ignore_keys=["edition"], - # extra_coords={"expver": "valid_time"}, coords_as_attributes=[ "surface", "depthBelowLandLayer", @@ -515,21 +304,6 @@ def open_with_grib_conventions( add_finalizer(ds, grib_file) def safely_expand_dims(dataset: xr.Dataset, expand_dims: list[str]) -> xr.Dataset: - """ - Expand missing dimensions while preserving dimension order. - - Parameters - ---------- - dataset : xarray.Dataset - Dataset to expand. - expand_dims : list of str - Dimensions that should exist in the dataset. - - Returns - ------- - xarray.Dataset - Dataset with missing dimensions inserted in a stable order. - """ dims_required = [ c for c in dataset.coords if c in expand_dims + list(dataset.dims) ] @@ -542,7 +316,6 @@ def safely_expand_dims(dataset: xr.Dataset, expand_dims: list[str]) -> xr.Datase return dataset logger.debug("Converting grib file to netcdf format") - # Variables and dimensions to rename if they exist in the dataset rename_vars = { "time": "forecast_reference_time", "step": "forecast_period", @@ -552,7 +325,6 @@ def safely_expand_dims(dataset: xr.Dataset, expand_dims: list[str]) -> xr.Datase rename_vars = {k: v for k, v in rename_vars.items() if k in ds} ds = ds.rename(rename_vars) - # safely expand dimensions in an xarray dataset to ensure that data for the new dimensions are in the dataset ds = safely_expand_dims(ds, ["valid_time", "pressure_level", "model_level"]) return ds @@ -561,33 +333,14 @@ def safely_expand_dims(dataset: xr.Dataset, expand_dims: list[str]) -> xr.Datase def retrieve_data( product: str, chunks: dict[str, int] | None = None, - tmpdir: str | Path | None = None, + tmpdir: PathLike | None = None, lock: SerializableLock | None = None, - **updates, + **updates: Any, ) -> xr.Dataset: - """ - Retrieve ERA5 data from the Climate Data Store. - - Parameters - ---------- - product : str - CDS product name. - chunks : dict, optional - Chunk specification passed to xarray. - tmpdir : str | Path, optional - Directory used for temporary downloads. - lock : dask.utils.SerializableLock, optional - Lock used while writing downloaded files. - **updates - Additional CDS request parameters. Must include ``year``, ``month``, - and ``variable``. - - Returns - ------- - xarray.Dataset - Dataset containing the requested variables. - """ - request = {"product_type": ["reanalysis"], "download_format": "unarchived"} + request: dict[str, Any] = { + "product_type": ["reanalysis"], + "download_format": "unarchived", + } request.update(updates) assert {"year", "month", "variable"}.issubset(request), ( @@ -602,73 +355,44 @@ def retrieve_data( result = client.retrieve(product, request) if lock is None: - lock = nullcontext() + lock = nullcontext() # type: ignore[assignment] - suffix = f".{request['data_format']}" # .netcdf or .grib - with lock: + suffix = f".{request['data_format']}" + with lock: # type: ignore[union-attr] fd, target = mkstemp(suffix=suffix, dir=tmpdir) os.close(fd) - # Inform user about data being downloaded as "* variable (year-month)" timestr = f"{request['year']}-{request['month']}" variables = atleast_1d(request["variable"]) varstr = "\n\t".join([f"{v} ({timestr})" for v in variables]) logger.info(f"CDS: Downloading variables\n\t{varstr}\n") result.download(target) - # Convert from grib to netcdf locally, same conversion as in CDS backend if request["data_format"] == "grib": ds = open_with_grib_conventions(target, chunks=chunks, tmpdir=tmpdir) else: ds = xr.open_dataset(target, chunks=sanitize_chunks(chunks)) if tmpdir is None: - add_finalizer(target) + add_finalizer(ds, target) return ds def get_data( - cutout, - feature, - tmpdir, - lock=None, - data_format="grib", - monthly_requests=False, - concurrent_requests=False, - **creation_parameters, -): - """ - Retrieve and format ERA5 data for a cutout feature. - - Parameters - ---------- - cutout : atlite.Cutout - Cutout defining the requested spatiotemporal domain. - feature : str - Feature name defined in :data:`atlite.datasets.era5.features`. - tmpdir : str | Path - Directory used for temporary files. - lock : dask.utils.SerializableLock, optional - Lock used while writing downloaded files. - data_format : str, optional - Download format, typically ``"grib"`` or ``"netcdf"``. - monthly_requests : bool, optional - Whether to split requests by month. - concurrent_requests : bool, optional - Whether monthly requests should be submitted concurrently. - **creation_parameters - Additional creation options. Supports ``sanitize``. - - Returns - ------- - xarray.Dataset - Dataset containing the requested feature variables. - """ + cutout: Any, + feature: str, + tmpdir: PathLike, + lock: SerializableLock | None = None, + data_format: Literal["grib", "netcdf"] = "grib", + monthly_requests: bool = False, + concurrent_requests: bool = False, + **creation_parameters: Any, +) -> xr.Dataset: coords = cutout.coords sanitize = creation_parameters.get("sanitize", True) - retrieval_params = { + retrieval_params: ERA5RetrievalParams = { "product": "reanalysis-era5-single-levels", "area": _area(coords), "chunks": cutout.chunks, @@ -678,38 +402,32 @@ def get_data( "data_format": data_format, } - func = globals().get(f"get_data_{feature}") - sanitize_func = globals().get(f"sanitize_{feature}") + func: Callable[[ERA5RetrievalParams], xr.Dataset] | None = globals().get( + f"get_data_{feature}" + ) + sanitize_func: Callable[[xr.Dataset], xr.Dataset] | None = globals().get( + f"sanitize_{feature}" + ) logger.info(f"Requesting data for feature {feature}...") - def retrieve_once(time): - """ - Retrieve and optionally sanitize one temporal ERA5 request. - - Parameters - ---------- - time : dict - Time selection arguments for a single CDS request. - - Returns - ------- - xarray.Dataset - Retrieved dataset for the requested time slice. - """ - ds = func({**retrieval_params, **time}) + def retrieve_once(time: dict[str, Any]) -> xr.Dataset: + ds = func({**retrieval_params, **time}) # type: ignore[misc, typeddict-item] if sanitize and sanitize_func is not None: ds = sanitize_func(ds) return ds if feature in static_features: - return retrieve_once(retrieval_times(coords, static=True)).squeeze() + static_times = retrieval_times(coords, static=True) + assert isinstance(static_times, dict) + return retrieve_once(static_times).squeeze() # type: ignore[no-any-return] time_chunks = retrieval_times(coords, monthly_requests=monthly_requests) + assert isinstance(time_chunks, list) if concurrent_requests: delayed_datasets = [delayed(retrieve_once)(chunk) for chunk in time_chunks] datasets = compute(*delayed_datasets) else: datasets = map(retrieve_once, time_chunks) - return xr.concat(datasets, dim="time").sel(time=coords["time"]) + return xr.concat(datasets, dim="time").sel(time=coords["time"]) # type: ignore[no-any-return] diff --git a/atlite/datasets/gebco.py b/atlite/datasets/gebco.py index 31ce30b8..154ef62d 100755 --- a/atlite/datasets/gebco.py +++ b/atlite/datasets/gebco.py @@ -7,37 +7,27 @@ Module for loading gebco data. """ +from __future__ import annotations + import logging +from typing import Any import rasterio as rio import xarray as xr from pandas import to_numeric from rasterio.warp import Resampling +from atlite._types import PathLike + logger = logging.getLogger(__name__) crs = 4326 features = {"height": ["height"]} -def get_data_gebco_height(xs, ys, gebco_path): - """ - Load GEBCO height data for a target grid. - - Parameters - ---------- - xs : xarray.DataArray - X coordinates of the target grid. - ys : xarray.DataArray - Y coordinates of the target grid. - gebco_path : str or path-like - Path to the GEBCO raster file. - - Returns - ------- - xarray.DataArray - Height data on the target grid. - """ +def get_data_gebco_height( + xs: xr.DataArray, ys: xr.DataArray, gebco_path: PathLike +) -> xr.DataArray: x, X = xs.data[[0, -1]] y, Y = ys.data[[0, -1]] @@ -52,7 +42,7 @@ def get_data_gebco_height(xs, ys, gebco_path): out_shape=(len(ys), len(xs)), resampling=Resampling.average, ) - gebco = gebco[::-1] # change inversed y-axis + gebco = gebco[::-1] tags = dataset.tags(bidx=1) tags = {k: to_numeric(v, errors="ignore") for k, v in tags.items()} @@ -62,42 +52,19 @@ def get_data_gebco_height(xs, ys, gebco_path): def get_data( - cutout, - feature, - tmpdir, - monthly_requests=False, - concurrent_requests=False, - **creation_parameters, -): - """ - Get the gebco height data. - - Parameters - ---------- - cutout : atlite.Cutout - feature : str - Takes no effect, only here for consistency with other dataset modules. - tmpdir : str - Takes no effect, only here for consistency with other dataset modules. - monthly_requests : bool - Takes no effect, only here for consistency with other dataset modules. - concurrent_requests : bool - Takes no effect, only here for consistency with other dataset modules. - **creation_parameters : - Must include `gebco_path`. - - Returns - ------- - xr.Dataset - - """ + cutout: Any, + feature: str, + tmpdir: PathLike, + monthly_requests: bool = False, + concurrent_requests: bool = False, + **creation_parameters: Any, +) -> xr.Dataset: if "gebco_path" not in creation_parameters: logger.error('Argument "gebco_path" not defined') path = creation_parameters["gebco_path"] coords = cutout.coords - # assign time dimension even if not used - return ( + return ( # type: ignore[no-any-return] get_data_gebco_height(coords["x"], coords["y"], path) .to_dataset() .assign_coords(cutout.coords) diff --git a/atlite/datasets/ncep.py b/atlite/datasets/ncep.py index 8e9d91d1..8793ce4e 100644 --- a/atlite/datasets/ncep.py +++ b/atlite/datasets/ncep.py @@ -12,36 +12,26 @@ for the time being! """ +from __future__ import annotations + import glob import os +from collections.abc import Generator +from typing import Any import numpy as np import pandas as pd import xarray as xr -engine = "pynio" -crs = 4326 - - -def convert_lons_lats_ncep(ds, xs, ys): - """ - Subset and rename NCEP longitude and latitude coordinates. - - Parameters - ---------- - ds : xarray.Dataset - Input dataset with ``lon_0`` and ``lat_0`` coordinates. - xs : slice or array-like - Longitude selection in degrees east. - ys : slice or array-like - Latitude selection in degrees north. - - Returns - ------- - xarray.Dataset - Dataset restricted to the requested area with coordinates renamed to - ``x`` and ``y`` and duplicated as ``lon`` and ``lat``. - """ +from atlite._types import PathLike + +engine: str = "pynio" +crs: int = 4326 + + +def convert_lons_lats_ncep( + ds: xr.Dataset, xs: slice | np.ndarray[Any, Any], ys: slice | np.ndarray[Any, Any] +) -> xr.Dataset: if not isinstance(xs, slice): first, second, last = np.asarray(xs)[[0, 1, -1]] xs = slice(first - 0.1 * (second - first), last + 0.1 * (second - first)) @@ -51,7 +41,6 @@ def convert_lons_lats_ncep(ds, xs, ys): ds = ds.sel(lat_0=ys) - # Lons should go from -180. to +180. if len(ds.coords["lon_0"].sel(lon_0=slice(xs.start + 360.0, xs.stop + 360.0))): ds = xr.concat( [ds.sel(lon_0=slice(xs.start + 360.0, xs.stop + 360.0)), ds.sel(lon_0=xs)], @@ -72,23 +61,7 @@ def convert_lons_lats_ncep(ds, xs, ys): return ds -def convert_time_hourly_ncep(ds, drop_time_vars=True): - """ - Convert NCEP time coordinates to a single hourly time index. - - Parameters - ---------- - ds : xarray.Dataset - Input dataset with ``initial_time0_hours`` and ``forecast_time0``. - drop_time_vars : bool, default True - Whether to drop auxiliary initial time variables after stacking. - - Returns - ------- - xarray.Dataset - Dataset with a stacked ``time`` coordinate. - """ - # Combine initial_time0 and forecast_time0 +def convert_time_hourly_ncep(ds: xr.Dataset, drop_time_vars: bool = True) -> xr.Dataset: ds = ds.stack(time=("initial_time0_hours", "forecast_time0")).assign_coords( time=np.ravel( ds.coords["initial_time0_hours"] @@ -101,106 +74,50 @@ def convert_time_hourly_ncep(ds, drop_time_vars=True): return ds -def convert_unaverage_ncep(ds): - """ - Convert averaged NCEP variables to per-step values. - - Parameters - ---------- - ds : xarray.Dataset - Dataset containing variables with names ending in ``_avg``. - - Returns - ------- - xarray.Dataset - Dataset with unaveraged variables stored without the ``_avg`` suffix. - """ - # the fields ending in _avg contain averages which have to be unaveraged by using - # \begin{equation} - # \tilde x_1 = x_1 \quad \tilde x_i = i \cdot x_i - (i - 1) \cdot x_{i-1} \quad \forall i > 1 - # \end{equation} - - def unaverage(da, dim="forecast_time0"): +def convert_unaverage_ncep(ds: xr.Dataset) -> xr.Dataset: + def unaverage(da: xr.DataArray, dim: str = "forecast_time0") -> xr.DataArray: coords = da.coords[dim] y = da * xr.DataArray( np.arange(1, len(coords) + 1), dims=[dim], coords={dim: coords} ) - return y - y.shift(**{dim: 1}).fillna(0.0) + return y - y.shift(**{dim: 1}).fillna(0.0) # type: ignore[no-any-return, arg-type] for k, da in ds.items(): + assert isinstance(k, str) if k.endswith("_avg"): ds[k[: -len("_avg")]] = unaverage(da) ds = ds.drop(k) - return ds + return ds # type: ignore[return-value] -def convert_unaccumulate_ncep(ds): - """ - Convert accumulated NCEP variables to incremental values. - - Parameters - ---------- - ds : xarray.Dataset - Dataset containing variables with names ending in ``_acc``. - - Returns - ------- - xarray.Dataset - Dataset with unaccumulated variables stored without the ``_acc`` suffix. - """ - # the fields ending in _acc contain values that are accumulated over the - # forecast_time which have to be unaccumulated by using: - # \begin{equation} - # \tilde x_1 = x_1 - # \tilde x_i = x_i - x_{i-1} \forall 1 < i <= 6 - # \end{equation} - # Source: - # http://rda.ucar.edu/datasets/ds094.1/#docs/FAQs_hrly_timeseries.html - - def unaccumulate(da, dim="forecast_time0"): - return da - da.shift(**{dim: 1}).fillna(0.0) +def convert_unaccumulate_ncep(ds: xr.Dataset) -> xr.Dataset: + def unaccumulate(da: xr.DataArray, dim: str = "forecast_time0") -> xr.DataArray: + return da - da.shift(**{dim: 1}).fillna(0.0) # type: ignore[no-any-return, arg-type] for k, da in ds.items(): + assert isinstance(k, str) if k.endswith("_acc"): ds[k[: -len("_acc")]] = unaccumulate(da) ds = ds.drop(k) - return ds - + return ds # type: ignore[return-value] -def convert_clip_lower(ds, variable, a_min, value): - """ - Set values of `variable` that are below `a_min` to `value`. - Similar to `numpy.clip`. - """ +def convert_clip_lower( + ds: xr.Dataset, variable: str, a_min: float, value: float +) -> xr.Dataset: ds[variable] = ds[variable].where(ds[variable] > a_min).fillna(value) return ds -def prepare_wnd10m_ncep(fn, yearmonth, xs, ys, engine=engine): - """ - Prepare monthly 10 m wind speed data from NCEP. - - Parameters - ---------- - fn : str or path-like - Source file path. - yearmonth : tuple[int, int] - Target year and month. - xs : slice or array-like - Longitude selection. - ys : slice or array-like - Latitude selection. - engine : str, default ``engine`` - Xarray backend engine. - - Yields - ------ - tuple[tuple[int, int], xarray.Dataset] - The selected month together with a dataset containing ``wnd10m``. - """ +def prepare_wnd10m_ncep( + fn: PathLike, + yearmonth: tuple[int, int], + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], + engine: str = engine, +) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: with xr.open_dataset(fn, engine=engine) as ds: ds = convert_lons_lats_ncep(ds, xs, ys) ds = convert_time_hourly_ncep(ds) @@ -211,94 +128,47 @@ def prepare_wnd10m_ncep(fn, yearmonth, xs, ys, engine=engine): yield yearmonth, ds -def prepare_influx_ncep(fn, yearmonth, xs, ys, engine=engine): - """ - Prepare monthly downward shortwave flux data from NCEP. - - Parameters - ---------- - fn : str or path-like - Source file path. - yearmonth : tuple[int, int] - Target year and month. - xs : slice or array-like - Longitude selection. - ys : slice or array-like - Latitude selection. - engine : str, default ``engine`` - Xarray backend engine. - - Yields - ------ - tuple[tuple[int, int], xarray.Dataset] - The selected month together with a dataset containing ``influx``. - """ +def prepare_influx_ncep( + fn: PathLike, + yearmonth: tuple[int, int], + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], + engine: str = engine, +) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: with xr.open_dataset(fn, engine=engine) as ds: ds = convert_lons_lats_ncep(ds, xs, ys) ds = convert_unaverage_ncep(ds) ds = convert_time_hourly_ncep(ds) ds = ds.rename({"DSWRF_P8_L1_GGA0": "influx"}) - # clipping random fluctuations around zero ds = convert_clip_lower(ds, "influx", a_min=0.1, value=0.0) yield yearmonth, ds -def prepare_outflux_ncep(fn, yearmonth, xs, ys, engine=engine): - """ - Prepare monthly upward shortwave flux data from NCEP. - - Parameters - ---------- - fn : str or path-like - Source file path. - yearmonth : tuple[int, int] - Target year and month. - xs : slice or array-like - Longitude selection. - ys : slice or array-like - Latitude selection. - engine : str, default ``engine`` - Xarray backend engine. - - Yields - ------ - tuple[tuple[int, int], xarray.Dataset] - The selected month together with a dataset containing ``outflux``. - """ +def prepare_outflux_ncep( + fn: PathLike, + yearmonth: tuple[int, int], + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], + engine: str = engine, +) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: with xr.open_dataset(fn, engine=engine) as ds: ds = convert_lons_lats_ncep(ds, xs, ys) ds = convert_unaverage_ncep(ds) ds = convert_time_hourly_ncep(ds) ds = ds.rename({"USWRF_P8_L1_GGA0": "outflux"}) - # clipping random fluctuations around zero ds = convert_clip_lower(ds, "outflux", a_min=3.0, value=0.0) yield yearmonth, ds -def prepare_temperature_ncep(fn, yearmonth, xs, ys, engine=engine): - """ - Prepare monthly near-surface temperature data from NCEP. - - Parameters - ---------- - fn : str or path-like - Source file path. - yearmonth : tuple[int, int] - Target year and month. - xs : slice or array-like - Longitude selection. - ys : slice or array-like - Latitude selection. - engine : str, default ``engine`` - Xarray backend engine. - - Yields - ------ - tuple[tuple[int, int], xarray.Dataset] - The selected month together with a dataset containing ``temperature``. - """ +def prepare_temperature_ncep( + fn: PathLike, + yearmonth: tuple[int, int], + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], + engine: str = engine, +) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: with xr.open_dataset(fn, engine=engine) as ds: ds = convert_lons_lats_ncep(ds, xs, ys) ds = convert_time_hourly_ncep(ds) @@ -307,29 +177,13 @@ def prepare_temperature_ncep(fn, yearmonth, xs, ys, engine=engine): yield yearmonth, ds -def prepare_soil_temperature_ncep(fn, yearmonth, xs, ys, engine=engine): - """ - Prepare monthly soil temperature data from NCEP. - - Parameters - ---------- - fn : str or path-like - Source file path. - yearmonth : tuple[int, int] - Target year and month. - xs : slice or array-like - Longitude selection. - ys : slice or array-like - Latitude selection. - engine : str, default ``engine`` - Xarray backend engine. - - Yields - ------ - tuple[tuple[int, int], xarray.Dataset] - The selected month together with a dataset containing - ``soil temperature``. - """ +def prepare_soil_temperature_ncep( + fn: PathLike, + yearmonth: tuple[int, int], + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], + engine: str = engine, +) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: with xr.open_dataset(fn, engine=engine) as ds: ds = convert_lons_lats_ncep(ds, xs, ys) ds = convert_time_hourly_ncep(ds) @@ -338,31 +192,15 @@ def prepare_soil_temperature_ncep(fn, yearmonth, xs, ys, engine=engine): yield yearmonth, ds -def prepare_runoff_ncep(fn, yearmonth, xs, ys, engine=engine): - """ - Prepare monthly runoff data from NCEP. - - Parameters - ---------- - fn : str or path-like - Source file path. - yearmonth : tuple[int, int] - Target year and month. - xs : slice or array-like - Longitude selection. - ys : slice or array-like - Latitude selection. - engine : str, default ``engine`` - Xarray backend engine. - - Yields - ------ - tuple[tuple[int, int], xarray.Dataset] - The selected month together with a dataset containing ``runoff``. - """ +def prepare_runoff_ncep( + fn: PathLike, + yearmonth: tuple[int, int], + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], + engine: str = engine, +) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: with xr.open_dataset(fn, engine=engine) as ds: ds = convert_lons_lats_ncep(ds, xs, ys) - # runoff has missing values: set nans to 0 ds = ds.fillna(0.0) ds = convert_unaccumulate_ncep(ds) ds = convert_time_hourly_ncep(ds) @@ -371,28 +209,13 @@ def prepare_runoff_ncep(fn, yearmonth, xs, ys, engine=engine): yield yearmonth, ds -def prepare_height_ncep(fn, xs, ys, yearmonths, engine=engine): - """ - Prepare static height data from NCEP for multiple months. - - Parameters - ---------- - fn : str or path-like - Source file path. - xs : slice or array-like - Longitude selection. - ys : slice or array-like - Latitude selection. - yearmonths : list[tuple[int, int]] - Year-month pairs to attach to the static dataset. - engine : str, default ``engine`` - Xarray backend engine. - - Yields - ------ - tuple[tuple[int, int], xarray.Dataset] - Each requested year-month together with a dataset containing ``height``. - """ +def prepare_height_ncep( + fn: PathLike, + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], + yearmonths: list[tuple[int, int]], + engine: str = engine, +) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: with xr.open_dataset(fn, engine=engine) as ds: ds = convert_lons_lats_ncep(ds, xs, ys) ds = ds.rename({"HGT_P0_L105_GGA0": "height"}) @@ -400,28 +223,13 @@ def prepare_height_ncep(fn, xs, ys, yearmonths, engine=engine): yield ym, ds -def prepare_roughness_ncep(fn, yearmonth, xs, ys, engine=engine): - """ - Prepare monthly surface roughness data from NCEP. - - Parameters - ---------- - fn : str or path-like - Source file path. - yearmonth : tuple[int, int] - Target year and month. - xs : slice or array-like - Longitude selection. - ys : slice or array-like - Latitude selection. - engine : str, default ``engine`` - Xarray backend engine. - - Yields - ------ - tuple[tuple[int, int], xarray.Dataset] - The selected month together with a dataset containing ``roughness``. - """ +def prepare_roughness_ncep( + fn: PathLike, + yearmonth: tuple[int, int], + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], + engine: str = engine, +) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: with xr.open_dataset(fn, engine=engine) as ds: ds = convert_lons_lats_ncep(ds, xs, ys) ds = ds.rename({"SFCR_P8_L1_GGA0": "roughness"}) @@ -430,35 +238,15 @@ def prepare_roughness_ncep(fn, yearmonth, xs, ys, engine=engine): def prepare_meta_ncep( - xs, ys, year, month, template, height_config, module, engine=engine -): - """ - Prepare NCEP metadata for a cutout month. - - Parameters - ---------- - xs : slice or array-like - Longitude selection. - ys : slice or array-like - Latitude selection. - year : int - Target year. - month : int - Target month. - template : str - File name template for the reference dataset. - height_config : dict - Height dataset configuration. - module : module - Dataset module namespace. - engine : str, default ``engine`` - Xarray backend engine. - - Returns - ------- - xarray.Dataset - Metadata dataset with spatial, temporal, and height information. - """ + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], + year: int, + month: int, + template: str, + height_config: dict[str, Any], + module: Any, + engine: str = engine, +) -> xr.Dataset: fn = next(glob.iglob(template.format(year=year, month=month))) with xr.open_dataset(fn, engine=engine) as ds: ds = ds.coords.to_dataset() @@ -479,33 +267,17 @@ def prepare_meta_ncep( meta["height"] = ds["height"] - return meta - - -def tasks_monthly_ncep(xs, ys, yearmonths, prepare_func, template, meta_attrs): - """ - Create monthly NCEP preparation task specifications. - - Parameters - ---------- - xs : slice or array-like - Longitude selection. - ys : slice or array-like - Latitude selection. - yearmonths : list[tuple[int, int]] - Requested year-month pairs. - prepare_func : callable - Preparation function to execute for each task. - template : str - File name template for monthly input files. - meta_attrs : dict - Unused metadata attributes. - - Returns - ------- - list[dict] - Task dictionaries for monthly data preparation. - """ + return meta # type: ignore[no-any-return] + + +def tasks_monthly_ncep( + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], + yearmonths: list[tuple[int, int]], + prepare_func: Any, + template: str, + meta_attrs: dict[str, Any], +) -> list[dict[str, Any]]: return [ dict( prepare_func=prepare_func, @@ -520,33 +292,14 @@ def tasks_monthly_ncep(xs, ys, yearmonths, prepare_func, template, meta_attrs): def tasks_height_ncep( - xs, ys, yearmonths, prepare_func, template, meta_attrs, **extra_args -): - """ - Create NCEP task specifications for static height data. - - Parameters - ---------- - xs : slice or array-like - Longitude selection. - ys : slice or array-like - Latitude selection. - yearmonths : list[tuple[int, int]] - Requested year-month pairs. - prepare_func : callable - Preparation function to execute. - template : str - File name template for the static height file. - meta_attrs : dict - Unused metadata attributes. - **extra_args - Additional keyword arguments passed to ``prepare_func``. - - Returns - ------- - list[dict] - A single task dictionary covering all requested months. - """ + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], + yearmonths: list[tuple[int, int]], + prepare_func: Any, + template: str, + meta_attrs: dict[str, Any], + **extra_args: Any, +) -> list[dict[str, Any]]: return [ dict( prepare_func=prepare_func, @@ -559,78 +312,90 @@ def tasks_height_ncep( ] -weather_data_config = { - "influx": dict( - tasks_func=tasks_monthly_ncep, - prepare_func=prepare_influx_ncep, - template=os.path.join( - config.ncep_dir, # noqa: F821 - "{year}{month:0>2}/dswsfc.*.grb2", +weather_data_config: dict[str, dict[str, Any]] = {} +try: + from atlite import config # type: ignore[attr-defined] # noqa: F401 + + weather_data_config = { + "influx": dict( + tasks_func=tasks_monthly_ncep, + prepare_func=prepare_influx_ncep, + template=os.path.join( + config.ncep_dir, # noqa: F821 + "{year}{month:0>2}/dswsfc.*.grb2", + ), ), - ), - "outflux": dict( - tasks_func=tasks_monthly_ncep, - prepare_func=prepare_outflux_ncep, - template=os.path.join( - config.ncep_dir, # noqa: F821 - "{year}{month:0>2}/uswsfc.*.grb2", + "outflux": dict( + tasks_func=tasks_monthly_ncep, + prepare_func=prepare_outflux_ncep, + template=os.path.join( + config.ncep_dir, # noqa: F821 + "{year}{month:0>2}/uswsfc.*.grb2", + ), ), - ), - "temperature": dict( - tasks_func=tasks_monthly_ncep, - prepare_func=prepare_temperature_ncep, - template=os.path.join( - config.ncep_dir, # noqa: F821 - "{year}{month:0>2}/tmp2m.*.grb2", + "temperature": dict( + tasks_func=tasks_monthly_ncep, + prepare_func=prepare_temperature_ncep, + template=os.path.join( + config.ncep_dir, # noqa: F821 + "{year}{month:0>2}/tmp2m.*.grb2", + ), ), - ), - "soil temperature": dict( - tasks_func=tasks_monthly_ncep, - prepare_func=prepare_soil_temperature_ncep, - template=os.path.join( - config.ncep_dir, # noqa: F821 - "{year}{month:0>2}/soilt1.*.grb2", + "soil temperature": dict( + tasks_func=tasks_monthly_ncep, + prepare_func=prepare_soil_temperature_ncep, + template=os.path.join( + config.ncep_dir, # noqa: F821 + "{year}{month:0>2}/soilt1.*.grb2", + ), ), - ), - "wnd10m": dict( - tasks_func=tasks_monthly_ncep, - prepare_func=prepare_wnd10m_ncep, - template=os.path.join( - config.ncep_dir, # noqa: F821 - "{year}{month:0>2}/wnd10m.*.grb2", + "wnd10m": dict( + tasks_func=tasks_monthly_ncep, + prepare_func=prepare_wnd10m_ncep, + template=os.path.join( + config.ncep_dir, # noqa: F821 + "{year}{month:0>2}/wnd10m.*.grb2", + ), ), - ), - "runoff": dict( - tasks_func=tasks_monthly_ncep, - prepare_func=prepare_runoff_ncep, - template=os.path.join( - config.ncep_dir, # noqa: F821 - "{year}{month:0>2}/runoff.*.grb2", + "runoff": dict( + tasks_func=tasks_monthly_ncep, + prepare_func=prepare_runoff_ncep, + template=os.path.join( + config.ncep_dir, # noqa: F821 + "{year}{month:0>2}/runoff.*.grb2", + ), ), - ), - "roughness": dict( - tasks_func=tasks_monthly_ncep, - prepare_func=prepare_roughness_ncep, - template=os.path.join( - config.ncep_dir, # noqa: F821 - "{year}{month:0>2}/flxf.gdas.*.grb2", + "roughness": dict( + tasks_func=tasks_monthly_ncep, + prepare_func=prepare_roughness_ncep, + template=os.path.join( + config.ncep_dir, # noqa: F821 + "{year}{month:0>2}/flxf.gdas.*.grb2", + ), + ), + "height": dict( + tasks_func=tasks_height_ncep, + prepare_func=prepare_height_ncep, + template=os.path.join( + config.ncep_dir, # noqa: F821 + "height/cdas1.20130101.splgrbanl.grb2", + ), ), - ), - "height": dict( - tasks_func=tasks_height_ncep, - prepare_func=prepare_height_ncep, + } +except ImportError: + pass + +meta_data_config: dict[str, Any] = {} +try: + from atlite import config # type: ignore[attr-defined] # noqa: F401 + + meta_data_config = dict( + prepare_func=prepare_meta_ncep, template=os.path.join( config.ncep_dir, # noqa: F821 - "height/cdas1.20130101.splgrbanl.grb2", + "{year}{month:0>2}/tmp2m.*.grb2", ), - ), -} - -meta_data_config = dict( - prepare_func=prepare_meta_ncep, - template=os.path.join( - config.ncep_dir, # noqa: F821 - "{year}{month:0>2}/tmp2m.*.grb2", - ), - height_config=weather_data_config["height"], -) + height_config=weather_data_config["height"], + ) +except (ImportError, KeyError): + pass diff --git a/atlite/datasets/sarah.py b/atlite/datasets/sarah.py index ec7851ff..14376b87 100644 --- a/atlite/datasets/sarah.py +++ b/atlite/datasets/sarah.py @@ -6,17 +6,21 @@ dataset. """ +from __future__ import annotations + import glob import logging import os import warnings from functools import partial +from typing import Any import numpy as np import pandas as pd import xarray as xr from rasterio.warp import Resampling +from atlite._types import PathLike from atlite.gis import regrid from atlite.pv.solar_position import SolarPosition @@ -36,29 +40,11 @@ "solar_azimuth", ], } -static_features = {} - - -def get_filenames(sarah_dir, coords): - """ - Get all files in directory `sarah_dir` relevent for coordinates `coords`. - - This function parses all files in the sarah directory which lay in the time - span of the coordinates. - - Parameters - ---------- - sarah_dir : str - coords : atlite.Cutout.coords +static_features: dict[str, list[str]] = {} - Returns - ------- - pd.DataFrame with two columns `sis` and `sid` for and timeindex for all - relevant files. - """ - - def _filenames_starting_with(name): +def get_filenames(sarah_dir: str | PathLike, coords: dict[str, Any]) -> pd.DataFrame: + def _filenames_starting_with(name: str) -> pd.Series[str]: pattern = os.path.join(sarah_dir, "**", f"{name}*.nc") files = pd.Series(glob.glob(pattern, recursive=True)) assert not files.empty, ( @@ -75,7 +61,6 @@ def _filenames_starting_with(name): axis=1, ) - # SARAH files are named based on day, need to .floor("D") to compare correctly start = coords["time"].to_index()[0].floor("D") end = coords["time"].to_index()[-1].floor("D") @@ -88,30 +73,24 @@ def _filenames_starting_with(name): return files.loc[(files.index >= start) & (files.index <= end)].sort_index() -def interpolate(ds, dim="time"): - """ - Interpolate NaNs in a dataset along a chunked dimension. - - This function is similar to xr.Dataset.interpolate_na but can be - used for interpolating along a chunked dimensions (default 'time''). - As the sarah data has mulitple NaN's in the areas of dawn and - nightfall and the data is per default chunked along the time axis, - use this function to interpolate. - """ - - def _interpolate1d(y): +def interpolate( + ds: xr.Dataset | xr.DataArray, dim: str = "time" +) -> xr.Dataset | xr.DataArray: + def _interpolate1d( + y: np.ndarray[Any, np.dtype[np.floating[Any]]], + ) -> np.ndarray[Any, np.dtype[np.floating[Any]]]: nan = np.isnan(y) if nan.all() or not nan.any(): return y - def x(z): + def x(z: np.ndarray[Any, np.dtype[Any]]) -> np.ndarray[Any, np.dtype[np.intp]]: return z.nonzero()[0] y = np.array(y) y[nan] = np.interp(x(nan), x(~nan), y[~nan]) return y - def _interpolate(a): + def _interpolate(a: Any) -> Any: return a.map_blocks( partial(np.apply_along_axis, _interpolate1d, -1), dtype=a.dtype ) @@ -120,7 +99,7 @@ def _interpolate(a): dtypes = {da.dtype for da in data_vars} assert len(dtypes) == 1, "interpolate only supports datasets with homogeneous dtype" - return xr.apply_ufunc( + return xr.apply_ufunc( # type: ignore[no-any-return] _interpolate, ds, input_core_dims=[[dim]], @@ -132,23 +111,14 @@ def _interpolate(a): ) -def as_slice(bounds, pad=True): - """ - Convert coordinate bounds to slice and pad by 0.01. - """ +def as_slice(bounds: slice | tuple[float, float], pad: bool = True) -> slice: if not isinstance(bounds, slice): - bounds = bounds + (-0.01, 0.01) + bounds = bounds + (-0.01, 0.01) # type: ignore[assignment] bounds = slice(*bounds) return bounds -def hourly_mean(ds): - """ - Resample time data to one hour frequency. - - In contrast to the standard xarray resample function this preserves - chunks sizes along the time dimension. - """ +def hourly_mean(ds: xr.Dataset) -> xr.Dataset: ds1 = ds.isel(time=slice(None, None, 2)) ds2 = ds.isel(time=slice(1, None, 2)) ds2 = ds2.assign_coords(time=ds2.indexes["time"] - pd.Timedelta(30, "m")) @@ -160,40 +130,13 @@ def hourly_mean(ds): def get_data( - cutout, feature, tmpdir, lock=None, monthly_requests=False, **creation_parameters -): - """ - Load stored SARAH data and reformat to matching the given cutout. - - This function loads and resamples the stored SARAH data for a given - `atlite.Cutout`. - - Parameters - ---------- - cutout : atlite.Cutout - feature : str - Name of the feature data to retrieve. Must be in - `atlite.datasets.sarah.features` - monthly_requests : bool - Takes no effect, only here for consistency with other dataset modules. - concurrent_requests : bool - Takes no effect, only here for consistency with other dataset modules. - **creation_parameters : - Mandatory arguments are: - * 'sarah_dir', str. Directory of the stored SARAH data. - Possible arguments are: - * 'parallel', bool. Whether to load stored files in parallel - mode. Default is False. - * 'sarah_interpolate', bool. Whether to interpolate areas of dawn - and nightfall. This might slow down the loading process if only - a few cores are available. Default is True. - - Returns - ------- - xarray.Dataset - Dataset of dask arrays of the retrieved variables. - - """ + cutout: Any, + feature: str, + tmpdir: PathLike, + lock: Any = None, + monthly_requests: bool = False, + **creation_parameters: Any, +) -> xr.Dataset: assert cutout.dt in ("30min", "30T", "h", "1h") coords = cutout.coords @@ -209,12 +152,10 @@ def get_data( ds_sid = xr.open_mfdataset(files.sid, combine="by_coords", **open_kwargs)[["SID"]] ds = xr.merge([ds_sis, ds_sid]) ds = ds.sel(lon=as_slice(cutout.extent[:2]), lat=as_slice(cutout.extent[2:])) - # fix float (im)precission ds = ds.assign_coords( lon=ds.lon.astype(float).round(4), lat=ds.lat.astype(float).round(4) ) - # Interpolate, resample and possible regrid if creation_parameters["sarah_interpolate"]: ds = interpolate(ds) else: @@ -233,7 +174,6 @@ def get_data( ds = ds.swap_dims({"lon": "x", "lat": "y"}) - # Do not show DeprecationWarning from new SolarPosition calculation (#199) with warnings.catch_warnings(): warnings.simplefilter("ignore", DeprecationWarning) sp = SolarPosition(ds, time_shift="0H") @@ -241,4 +181,4 @@ def get_data( ds = xr.merge([ds, sp]) - return ds + return ds # type: ignore[no-any-return] diff --git a/atlite/gis.py b/atlite/gis.py index 63040408..9092d983 100644 --- a/atlite/gis.py +++ b/atlite/gis.py @@ -5,10 +5,14 @@ Functions for Geographic Information System. """ +from __future__ import annotations + import logging import multiprocessing as mp from collections import OrderedDict +from collections.abc import Callable, Iterable, Sequence from pathlib import Path +from typing import TYPE_CHECKING, Any, cast from warnings import catch_warnings, simplefilter import geopandas as gpd @@ -30,10 +34,32 @@ from shapely.strtree import STRtree from tqdm import tqdm +from atlite._types import ( + CrsLike, + DataArray, + Dataset, + GeoDataFrame, + Geometry, + GeoSeries, + NDArray, + PathLike, +) + +if TYPE_CHECKING: + from matplotlib.axes import Axes + logger = logging.getLogger(__name__) -def get_coords(x, y, time, dx=0.25, dy=0.25, dt="h", **kwargs): +def get_coords( + x: slice, + y: slice, + time: slice, + dx: float = 0.25, + dy: float = 0.25, + dt: str = "h", + **kwargs: Any, +) -> Dataset: """ Create cutout coordinates from slices and resolutions. @@ -71,10 +97,10 @@ def get_coords(x, y, time, dx=0.25, dy=0.25, dt="h", **kwargs): ) ds = ds.assign_coords(lon=ds.coords["x"], lat=ds.coords["y"]) ds = ds.sel(x=x, y=y, time=time) - return ds + return cast(Dataset, ds) -def spdiag(v): +def spdiag(v: NDArray | Sequence[float]) -> sp.sparse.csr_matrix: """ Create a sparse diagonal matrix. @@ -93,7 +119,11 @@ def spdiag(v): return sp.sparse.csr_matrix((v, inds[:-1], inds), (N, N)) -def reproject_shapes(shapes, crs1, crs2): +def reproject_shapes( + shapes: Iterable[Geometry] | pd.Series | dict[Any, Geometry], + crs1: CrsLike, + crs2: CrsLike, +) -> Iterable[Geometry] | pd.Series | OrderedDict[Any, Geometry]: """ Reproject a collection of geometries. @@ -113,7 +143,7 @@ def reproject_shapes(shapes, crs1, crs2): """ transformer = Transformer.from_crs(crs1, crs2, always_xy=True) - def _reproject_shape(shape): + def _reproject_shape(shape: Geometry) -> Geometry: return transform(transformer.transform, shape) if isinstance(shapes, pd.Series): @@ -124,7 +154,12 @@ def _reproject_shape(shape): return list(map(_reproject_shape, shapes)) -def compute_indicatormatrix(orig, dest, orig_crs=4326, dest_crs=4326): +def compute_indicatormatrix( + orig: GeoDataFrame | GeoSeries | Iterable[Geometry], + dest: GeoDataFrame | GeoSeries | Iterable[Geometry], + orig_crs: CrsLike = 4326, + dest_crs: CrsLike = 4326, +) -> sp.sparse.lil_matrix: """ Compute the indicatormatrix. @@ -151,15 +186,21 @@ def compute_indicatormatrix(orig, dest, orig_crs=4326, dest_crs=4326): orig = orig.geometry if isinstance(orig, gpd.GeoDataFrame) else orig dest = dest.geometry if isinstance(dest, gpd.GeoDataFrame) else dest dest = reproject_shapes(dest, dest_crs, orig_crs) - indicator = sp.sparse.lil_matrix((len(dest), len(orig)), dtype=float) - tree = STRtree(orig) - idx = dict((hash(o.wkt), i) for i, o in enumerate(orig)) + orig_list: list[Any] | pd.Series = ( + list(orig) if not isinstance(orig, pd.Series) else orig + ) + dest_list: list[Any] | pd.Series = ( + list(dest) if not isinstance(dest, pd.Series) else dest + ) + indicator = sp.sparse.lil_matrix((len(dest_list), len(orig_list)), dtype=float) + tree = STRtree(orig_list) + idx = dict((hash(o.wkt), i) for i, o in enumerate(orig_list)) - for i, d in enumerate(dest): + for i, d in enumerate(dest_list): for o in tree.query(d): # STRtree query returns a list of indices for shapely >= v2.0 if isinstance(o, (int | np.integer)): - o = orig[o] + o = orig_list[o] if o.intersects(d): j = idx[hash(o.wkt)] area = d.intersection(o).area @@ -168,7 +209,12 @@ def compute_indicatormatrix(orig, dest, orig_crs=4326, dest_crs=4326): return indicator -def compute_intersectionmatrix(orig, dest, orig_crs=4326, dest_crs=4326): +def compute_intersectionmatrix( + orig: GeoDataFrame | GeoSeries | Iterable[Geometry], + dest: GeoDataFrame | GeoSeries | Iterable[Geometry], + orig_crs: CrsLike = 4326, + dest_crs: CrsLike = 4326, +) -> sp.sparse.lil_matrix: """ Compute the intersectionmatrix. @@ -191,22 +237,30 @@ def compute_intersectionmatrix(orig, dest, orig_crs=4326, dest_crs=4326): orig = orig.geometry if isinstance(orig, gpd.GeoDataFrame) else orig dest = dest.geometry if isinstance(dest, gpd.GeoDataFrame) else dest dest = reproject_shapes(dest, dest_crs, orig_crs) - intersection = sp.sparse.lil_matrix((len(dest), len(orig)), dtype=float) - tree = STRtree(orig) - idx = dict((hash(o.wkt), i) for i, o in enumerate(orig)) + orig_list: list[Any] | pd.Series = ( + list(orig) if not isinstance(orig, pd.Series) else orig + ) + dest_list: list[Any] | pd.Series = ( + list(dest) if not isinstance(dest, pd.Series) else dest + ) + intersection = sp.sparse.lil_matrix((len(dest_list), len(orig_list)), dtype=float) + tree = STRtree(orig_list) + idx = dict((hash(o.wkt), i) for i, o in enumerate(orig_list)) - for i, d in enumerate(dest): + for i, d in enumerate(dest_list): for o in tree.query(d): # STRtree query returns a list of indices for shapely >= v2.0 if isinstance(o, (int | np.integer)): - o = orig[o] + o = orig_list[o] j = idx[hash(o.wkt)] intersection[i, j] = o.intersects(d) return intersection -def padded_transform_and_shape(bounds, res): +def padded_transform_and_shape( + bounds: tuple[float, float, float, float], res: float +) -> tuple[rio.Affine, tuple[int, int]]: """ Return a padded raster transform and shape. @@ -229,8 +283,14 @@ def padded_transform_and_shape(bounds, res): def projected_mask( - raster, geom, transform=None, shape=None, crs=None, allow_no_overlap=False, **kwargs -): + raster: rio.DatasetReader, + geom: GeoSeries, + transform: rio.Affine | None = None, + shape: tuple[int, int] | None = None, + crs: CrsLike = None, + allow_no_overlap: bool = False, + **kwargs: Any, +) -> tuple[NDArray, rio.Affine]: """ Load a raster mask and optionally reproject it. @@ -275,7 +335,7 @@ def projected_mask( return masked, transform_ assert shape is not None and crs is not None - return rio.warp.reproject( + return rio.warp.reproject( # type: ignore[no-any-return] masked, empty(shape), src_crs=raster.crs, @@ -286,7 +346,14 @@ def projected_mask( ) -def pad_extent(src, src_transform, dst_transform, src_crs, dst_crs, **kwargs): +def pad_extent( + src: NDArray, + src_transform: rio.Affine, + dst_transform: rio.Affine, + src_crs: CrsLike, + dst_crs: CrsLike, + **kwargs: Any, +) -> tuple[NDArray, rio.Affine]: """ Pad an array before reprojection. @@ -313,7 +380,8 @@ def pad_extent(src, src_transform, dst_transform, src_crs, dst_crs, **kwargs): if src.size == 0: return src, src_transform - left, top, right, bottom = *(src_transform * (0, 0)), *(src_transform * (1, 1)) + left, top = src_transform * (0, 0) + right, bottom = src_transform * (1, 1) covered = transform_bounds(src_crs, dst_crs, left, bottom, right, top) covered_res = min(abs(covered[2] - covered[0]), abs(covered[3] - covered[1])) pad = int(dst_transform[0] // covered_res * 1.1) @@ -321,7 +389,7 @@ def pad_extent(src, src_transform, dst_transform, src_crs, dst_crs, **kwargs): kwargs.setdefault("mode", "constant") if src.ndim == 2: - return rio.pad(src, src_transform, pad, **kwargs) + return rio.pad(src, src_transform, pad, **kwargs) # type: ignore[no-any-return] npad = ((0, 0),) * (src.ndim - 2) + ((pad, pad), (pad, pad)) padded = np.pad(src, npad, **kwargs) @@ -331,7 +399,9 @@ def pad_extent(src, src_transform, dst_transform, src_crs, dst_crs, **kwargs): return padded, rio.Affine(*transform[:6]) -def shape_availability(geometry, excluder): +def shape_availability( + geometry: GeoSeries, excluder: ExclusionContainer +) -> tuple[NDArray, rio.Affine]: """ Compute the eligible area in one or more geometries. @@ -397,8 +467,12 @@ def shape_availability(geometry, excluder): def shape_availability_reprojected( - geometry, excluder, dst_transform, dst_crs, dst_shape -): + geometry: GeoSeries, + excluder: ExclusionContainer, + dst_transform: rio.Affine, + dst_crs: CrsLike, + dst_shape: tuple[int, int], +) -> tuple[NDArray, rio.Affine]: """ Compute availability and reproject it to a target raster. @@ -424,7 +498,7 @@ def shape_availability_reprojected( masked, transform = pad_extent( masked, transform, dst_transform, excluder.crs, dst_crs ) - return rio.warp.reproject( + return rio.warp.reproject( # type: ignore[no-any-return] masked.astype(np.uint8), empty(dst_shape), resampling=rio.warp.Resampling.average, @@ -440,7 +514,7 @@ class ExclusionContainer: Container for exclusion objects and meta data. """ - def __init__(self, crs=3035, res=100): + def __init__(self, crs: CrsLike = 3035, res: float = 100) -> None: """ Initialize a container for excluded areas. @@ -456,21 +530,21 @@ def __init__(self, crs=3035, res=100): The default is 100. """ - self.rasters = [] - self.geometries = [] - self.crs = crs - self.res = res + self.rasters: list[dict[str, Any]] = [] + self.geometries: list[dict[str, Any]] = [] + self.crs: CrsLike = crs + self.res: float = res def add_raster( self, - raster, - codes=None, - buffer=0, - invert=False, - nodata=255, - allow_no_overlap=False, - crs=None, - ): + raster: PathLike | rio.DatasetReader, + codes: int | list[int] | Callable[[NDArray], NDArray] | None = None, + buffer: int = 0, + invert: bool = False, + nodata: int = 255, + allow_no_overlap: bool = False, + crs: CrsLike = None, + ) -> None: """ Register a raster to the ExclusionContainer. @@ -499,7 +573,7 @@ def add_raster( CRS of the raster. Specify this if the raster has invalid crs. """ - d = dict( + d: dict[str, Any] = dict( raster=raster, codes=codes, buffer=buffer, @@ -510,7 +584,12 @@ def add_raster( ) self.rasters.append(d) - def add_geometry(self, geometry, buffer=0, invert=False): + def add_geometry( + self, + geometry: PathLike | GeoDataFrame | GeoSeries, + buffer: float = 0, + invert: bool = False, + ) -> None: """ Register a collection of geometries to the ExclusionContainer. @@ -526,10 +605,10 @@ def add_geometry(self, geometry, buffer=0, invert=False): of the geometries. The default is False. """ - d = dict(geometry=geometry, buffer=buffer, invert=invert) + d: dict[str, Any] = dict(geometry=geometry, buffer=buffer, invert=invert) self.geometries.append(d) - def open_files(self): + def open_files(self) -> None: """ Open rasters and load geometries. """ @@ -563,7 +642,7 @@ def open_files(self): d["geometry"] = geometry @property - def all_closed(self): + def all_closed(self) -> bool: """ Check whether all files in the raster container are closed. """ @@ -572,7 +651,7 @@ def all_closed(self): ) @property - def all_open(self): + def all_open(self) -> bool: """ Check whether all files in the raster container are open. """ @@ -580,7 +659,7 @@ def all_open(self): isinstance(d["raster"], rio.DatasetReader) for d in self.rasters ) and all(isinstance(d["geometry"], gpd.GeoSeries) for d in self.geometries) - def __repr__(self): + def __repr__(self) -> str: return ( f"Exclusion Container" f"\n registered rasters: {len(self.rasters)} " @@ -589,8 +668,12 @@ def __repr__(self): ) def compute_shape_availability( - self, geometry, dst_transform=None, dst_crs=None, dst_shape=None - ): + self, + geometry: GeoDataFrame | GeoSeries, + dst_transform: rio.Affine | None = None, + dst_crs: CrsLike = None, + dst_shape: tuple[int, int] | None = None, + ) -> tuple[NDArray, rio.Affine]: """ Compute the eligible area in one or more geometries and optionally reproject. @@ -633,6 +716,11 @@ def compute_shape_availability( "Arguments dst_transform, dst_crs, dst_shape " "should be all None or all defined." ) + assert ( + dst_transform is not None + and dst_crs is not None + and dst_shape is not None + ) return shape_availability_reprojected( geometry, self, dst_transform, dst_crs, dst_shape ) @@ -641,15 +729,15 @@ def compute_shape_availability( def plot_shape_availability( self, - geometry, - ax=None, - set_title=True, - dst_transform=None, - dst_crs=None, - dst_shape=None, - show_kwargs={}, - plot_kwargs={}, - ): + geometry: GeoDataFrame | GeoSeries, + ax: Axes | None = None, + set_title: bool = True, + dst_transform: rio.Affine | None = None, + dst_crs: CrsLike = None, + dst_shape: tuple[int, int] | None = None, + show_kwargs: dict[str, Any] | None = None, + plot_kwargs: dict[str, Any] | None = None, + ) -> Axes: """ Plot the eligible area for one or more geometries and optionally reproject. @@ -689,6 +777,10 @@ def plot_shape_availability( _description_ """ + if show_kwargs is None: + show_kwargs = {} + if plot_kwargs is None: + plot_kwargs = {} import matplotlib.pyplot as plt if isinstance(geometry, gpd.GeoDataFrame): @@ -715,22 +807,43 @@ def plot_shape_availability( return ax -def _init_process(shapes_, excluder_, dst_transform_, dst_crs_, dst_shapes_): - global shapes, excluder, dst_transform, dst_crs, dst_shapes - shapes, excluder = shapes_, excluder_ - dst_transform, dst_crs, dst_shapes = dst_transform_, dst_crs_, dst_shapes_ +_mp_shapes: GeoSeries +_mp_excluder: ExclusionContainer +_mp_dst_transform: rio.Affine +_mp_dst_crs: CrsLike +_mp_dst_shapes: tuple[int, int] + + +def _init_process( + shapes_: GeoSeries, + excluder_: ExclusionContainer, + dst_transform_: rio.Affine, + dst_crs_: CrsLike, + dst_shapes_: tuple[int, int], +) -> None: + global _mp_shapes, _mp_excluder, _mp_dst_transform, _mp_dst_crs, _mp_dst_shapes + _mp_shapes, _mp_excluder = shapes_, excluder_ + _mp_dst_transform, _mp_dst_crs, _mp_dst_shapes = ( + dst_transform_, + dst_crs_, + dst_shapes_, + ) -def _process_func(i): - args = (excluder, dst_transform, dst_crs, dst_shapes) +def _process_func(i: Any) -> NDArray: + args = (_mp_excluder, _mp_dst_transform, _mp_dst_crs, _mp_dst_shapes) with catch_warnings(): simplefilter("ignore") - return shape_availability_reprojected(shapes.loc[[i]], *args)[0] + return shape_availability_reprojected(_mp_shapes.loc[[i]], *args)[0] def compute_availabilitymatrix( - cutout, shapes, excluder, nprocesses=None, disable_progressbar=True -): + cutout: Any, + shapes: GeoDataFrame | GeoSeries, + excluder: ExclusionContainer, + nprocesses: int | None = None, + disable_progressbar: bool = True, +) -> DataArray: """ Compute the eligible share within cutout cells in the overlap with shapes. @@ -798,13 +911,12 @@ def compute_availabilitymatrix( assert excluder.all_closed, ( "For parallelization all raster files in excluder must be closed" ) - kwargs = { - "initializer": _init_process, - "initargs": (shapes, *args), - "maxtasksperchild": 20, - "processes": nprocesses, - } - with mp.get_context("spawn").Pool(**kwargs) as pool: + with mp.get_context("spawn").Pool( + processes=nprocesses, + initializer=_init_process, + initargs=(shapes, *args), + maxtasksperchild=20, + ) as pool: if disable_progressbar: availability = list(pool.map(_process_func, shapes.index)) else: @@ -812,14 +924,16 @@ def compute_availabilitymatrix( tqdm(pool.imap(_process_func, shapes.index), **tqdm_kwargs) ) - availability = np.stack(availability)[:, ::-1] # flip axis, see Notes - if availability.ndim == 4: - availability = availability.squeeze(axis=1) + availability_arr = np.stack(availability)[:, ::-1] # flip axis, see Notes + if availability_arr.ndim == 4: + availability_arr = availability_arr.squeeze(axis=1) coords = [(shapes.index), ("y", cutout.data.y.data), ("x", cutout.data.x.data)] - return xr.DataArray(availability, coords=coords) + return xr.DataArray(availability_arr, coords=coords) -def maybe_swap_spatial_dims(ds, namex="x", namey="y"): +def maybe_swap_spatial_dims( + ds: Dataset | DataArray, namex: str = "x", namey: str = "y" +) -> Dataset | DataArray: """ Ensure spatial coordinates follow atlite's axis ordering. @@ -846,10 +960,10 @@ def maybe_swap_spatial_dims(ds, namex="x", namey="y"): if uy < ly: swaps[namey] = slice(None, None, -1) - return ds.isel(**swaps) if swaps else ds + return ds.isel(swaps) if swaps else ds -def _as_transform(x, y): +def _as_transform(x: pd.Index, y: pd.Index) -> rio.Affine: lx, rx = x[[0, -1]] ly, uy = y[[0, -1]] @@ -859,7 +973,12 @@ def _as_transform(x, y): return rio.Affine(dx, 0, lx - dx / 2, 0, dy, ly - dy / 2) -def regrid(ds, dimx, dimy, **kwargs): +def regrid( + ds: Dataset | DataArray, + dimx: pd.Index, + dimy: pd.Index, + **kwargs: Any, +) -> Dataset | DataArray: """ Reproject data to a new spatial grid. @@ -892,7 +1011,7 @@ def regrid(ds, dimx, dimy, **kwargs): kwargs.setdefault("src_crs", CRS.from_epsg(4326)) kwargs.setdefault("dst_crs", CRS.from_epsg(4326)) - def _reproject(src, **kwargs): + def _reproject(src: NDArray, **kwargs: Any) -> NDArray: shape = src.shape[:-2] + dst_shape src, trans = pad_extent( src, @@ -909,31 +1028,34 @@ def _reproject(src, **kwargs): if reprojected.ndim != src.ndim: reprojected = reprojected.squeeze(axis=0) - return reprojected + return cast(NDArray, reprojected) data_vars = ds.data_vars.values() if isinstance(ds, xr.Dataset) else (ds,) dtypes = {da.dtype for da in data_vars} assert len(dtypes) == 1, "regrid can only reproject datasets with homogeneous dtype" - return ( - xr.apply_ufunc( - _reproject, - ds, - input_core_dims=[[namey, namex]], - output_core_dims=[["yout", "xout"]], - output_dtypes=[dtypes.pop()], - dask_gufunc_kwargs=dict( - output_sizes={"yout": dst_shape[0], "xout": dst_shape[1]} - ), - dask="parallelized", - kwargs=kwargs, - ) - .rename({"yout": namey, "xout": namex}) - .assign_coords( - **{ - namey: (namey, dimy.data, ds.coords[namey].attrs), - namex: (namex, dimx.data, ds.coords[namex].attrs), - } - ) - .assign_attrs(**ds.attrs) + return cast( + Dataset | DataArray, + ( + xr.apply_ufunc( + _reproject, + ds, + input_core_dims=[[namey, namex]], + output_core_dims=[["yout", "xout"]], + output_dtypes=[dtypes.pop()], + dask_gufunc_kwargs=dict( + output_sizes={"yout": dst_shape[0], "xout": dst_shape[1]} + ), + dask="parallelized", + kwargs=kwargs, + ) + .rename({"yout": namey, "xout": namex}) + .assign_coords( + **{ + namey: (namey, dimy.data, ds.coords[namey].attrs), + namex: (namex, dimx.data, ds.coords[namex].attrs), + } + ) + .assign_attrs(**ds.attrs) + ), ) diff --git a/atlite/hydro.py b/atlite/hydro.py index a3734715..5fe838d6 100644 --- a/atlite/hydro.py +++ b/atlite/hydro.py @@ -5,6 +5,8 @@ Module involving hydro operations in atlite. """ +from __future__ import annotations + import logging from collections import namedtuple @@ -20,7 +22,11 @@ Basins = namedtuple("Basins", ["plants", "meta", "shapes"]) -def find_basin(shapes, lon, lat): +def find_basin( + shapes: gpd.GeoSeries, + lon: float, + lat: float, +) -> int: """ Find the basin containing a point. @@ -44,10 +50,13 @@ def find_basin(shapes, lon, lat): f"The point ({lon}, {lat}) is in several basins: {hids}. " "Assuming the first one." ) - return hids[0] + return int(hids[0]) -def find_upstream_basins(meta, hid): +def find_upstream_basins( + meta: pd.DataFrame, + hid: int, +) -> list[int]: """ Collect all upstream basins of a basin. @@ -71,7 +80,11 @@ def find_upstream_basins(meta, hid): return hids -def determine_basins(plants, hydrobasins, show_progress=False): +def determine_basins( + plants: pd.DataFrame, + hydrobasins: str | gpd.GeoDataFrame, + show_progress: bool = False, +) -> Basins: """ Determine local and upstream basins for hydro plants. @@ -111,7 +124,7 @@ def determine_basins(plants, hydrobasins, show_progress=False): meta = hydrobasins[hydrobasins.columns.difference(("geometry",))] shapes = hydrobasins["geometry"] - plant_basins = [] + plant_basins: list[tuple[int, list[int]]] = [] for p in tqdm( plants.itertuples(), disable=not show_progress, @@ -119,17 +132,20 @@ def determine_basins(plants, hydrobasins, show_progress=False): ): hid = find_basin(shapes, p.lon, p.lat) plant_basins.append((hid, find_upstream_basins(meta, hid))) - plant_basins = pd.DataFrame( + plant_basins_df = pd.DataFrame( plant_basins, columns=["hid", "upstream"], index=plants.index ) - unique_basins = pd.Index(plant_basins["upstream"].sum()).unique().rename("hid") - return Basins(plant_basins, meta.loc[unique_basins], shapes.loc[unique_basins]) + unique_basins = pd.Index(plant_basins_df["upstream"].sum()).unique().rename("hid") + return Basins(plant_basins_df, meta.loc[unique_basins], shapes.loc[unique_basins]) def shift_and_aggregate_runoff_for_plants( - basins, runoff, flowspeed=1, show_progress=False -): + basins: Basins, + runoff: xr.DataArray, + flowspeed: float = 1, + show_progress: bool = False, +) -> xr.DataArray: """ Shift basin runoff in time and aggregate it per plant. @@ -149,7 +165,7 @@ def shift_and_aggregate_runoff_for_plants( xarray.DataArray Plant inflow time series indexed by ``plant`` and ``time``. """ - inflow = xr.DataArray( + inflow: xr.DataArray = xr.DataArray( np.zeros((len(basins.plants), runoff.indexes["time"].size)), [("plant", basins.plants.index), runoff.coords["time"]], ) @@ -159,12 +175,12 @@ def shift_and_aggregate_runoff_for_plants( disable=not show_progress, desc="Shift and aggregate runoff by plant", ): - inflow_plant = inflow.loc[dict(plant=ppl.Index)] - distances = ( + inflow_plant: xr.DataArray = inflow.loc[dict(plant=ppl.Index)] + distances: pd.Series = ( basins.meta.loc[ppl.upstream, "DIST_MAIN"] - basins.meta.at[ppl.hid, "DIST_MAIN"] ) - nhours = (distances / (flowspeed * 3.6) + 0.5).astype(int) + nhours: pd.Series = (distances / (flowspeed * 3.6) + 0.5).astype(int) for b in ppl.upstream: inflow_plant += runoff.sel(hid=b).roll(time=nhours.at[b]) diff --git a/atlite/pv/__init__.py b/atlite/pv/__init__.py index c5528ebf..a533f7f9 100644 --- a/atlite/pv/__init__.py +++ b/atlite/pv/__init__.py @@ -1,6 +1,43 @@ # SPDX-FileCopyrightText: Contributors to atlite # # SPDX-License-Identifier: MIT -""" -atlite pv module. -""" +"""Photovoltaic modeling functions.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from atlite.pv.irradiation import ( + DiffuseHorizontalIrrad, + TiltedDiffuseIrrad, + TiltedDirectIrrad, + TiltedGroundIrrad, + TiltedIrradiation, +) +from atlite.pv.orientation import ( + SurfaceOrientation, + get_orientation, + make_constant, + make_latitude, + make_latitude_optimal, +) +from atlite.pv.solar_panel_model import SolarPanelModel +from atlite.pv.solar_position import SolarPosition + +if TYPE_CHECKING: + pass + +__all__: list[str] = [ + "DiffuseHorizontalIrrad", + "TiltedDirectIrrad", + "TiltedDiffuseIrrad", + "TiltedGroundIrrad", + "TiltedIrradiation", + "SurfaceOrientation", + "get_orientation", + "make_constant", + "make_latitude", + "make_latitude_optimal", + "SolarPanelModel", + "SolarPosition", +] diff --git a/atlite/pv/irradiation.py b/atlite/pv/irradiation.py index f254e6f7..b89d844f 100644 --- a/atlite/pv/irradiation.py +++ b/atlite/pv/irradiation.py @@ -2,15 +2,21 @@ # # SPDX-License-Identifier: MIT +from __future__ import annotations + import logging import numpy as np from dask.array import cos, fmax, fmin, radians, sin, sqrt +from atlite._types import DataArray, Dataset + logger = logging.getLogger(__name__) -def DiffuseHorizontalIrrad(ds, solar_position, clearsky_model, influx): +def DiffuseHorizontalIrrad( + ds: Dataset, solar_position: Dataset, clearsky_model: str | None, influx: DataArray +) -> DataArray: """ Estimate diffuse horizontal irradiation from total horizontal irradiation. @@ -32,11 +38,6 @@ def DiffuseHorizontalIrrad(ds, solar_position, clearsky_model, influx): xarray.DataArray Diffuse horizontal irradiation. """ - # Clearsky model from Reindl 1990 to split downward radiation into direct - # and diffuse contributions. Should switch to more up-to-date model, f.ex. - # Ridley et al. (2010) http://dx.doi.org/10.1016/j.renene.2009.07.018 , - # Lauret et al. (2013):http://dx.doi.org/10.1016/j.renene.2012.01.049 - sinaltitude = sin(solar_position["altitude"]) influx_toa = ds["influx_toa"] @@ -45,15 +46,9 @@ def DiffuseHorizontalIrrad(ds, solar_position, clearsky_model, influx): "enhanced" if "temperature" in ds and "humidity" in ds else "simple" ) - # Reindl 1990 clearsky model - - k = influx / influx_toa # clearsky index - # k.values[k.values > 1.0] = 1.0 - # k = k.rename('clearsky index') + k = influx / influx_toa if clearsky_model == "simple": - # Simple Reindl model without ambient air temperature and - # relative humidity fraction = ( ((k > 0.0) & (k <= 0.3)) * fmin(1.0, 1.020 - 0.254 * k + 0.0123 * sinaltitude) @@ -62,8 +57,6 @@ def DiffuseHorizontalIrrad(ds, solar_position, clearsky_model, influx): + (k >= 0.78) * fmax(0.1, 0.486 * k - 0.182 * sinaltitude) ) elif clearsky_model == "enhanced": - # Enhanced Reindl model with ambient air temperature and relative - # humidity T = ds["temperature"] rh = ds["humidity"] @@ -87,14 +80,16 @@ def DiffuseHorizontalIrrad(ds, solar_position, clearsky_model, influx): else: raise KeyError("`clearsky model` must be chosen from 'simple' and 'enhanced'") - # Set diffuse fraction to one when the sun isn't up - # fraction = fraction.where(sinaltitude >= sin(radians(threshold))).fillna(1.0) - # fraction = fraction.rename('fraction index') - - return (influx * fraction).rename("diffuse horizontal") + return (influx * fraction).rename("diffuse horizontal") # type: ignore[no-any-return] -def TiltedDiffuseIrrad(ds, solar_position, surface_orientation, direct, diffuse): +def TiltedDiffuseIrrad( + ds: Dataset, + solar_position: Dataset, + surface_orientation: Dataset, + direct: DataArray, + diffuse: DataArray, +) -> DataArray: """ Calculate diffuse irradiation on a tilted surface. @@ -116,8 +111,6 @@ def TiltedDiffuseIrrad(ds, solar_position, surface_orientation, direct, diffuse) xarray.DataArray Diffuse tilted irradiation. """ - # Hay-Davies Model - sinaltitude = sin(solar_position["altitude"]) influx_toa = ds["influx_toa"] @@ -127,13 +120,9 @@ def TiltedDiffuseIrrad(ds, solar_position, surface_orientation, direct, diffuse) influx = direct + diffuse with np.errstate(divide="ignore", invalid="ignore"): - # brightening factor f = sqrt(direct / influx).fillna(0.0) - - # anisotropy factor A = direct / influx_toa - # geometric factor R_b = cosincidence / sinaltitude diffuse_t = ( @@ -143,8 +132,6 @@ def TiltedDiffuseIrrad(ds, solar_position, surface_orientation, direct, diffuse) + A * R_b ) * diffuse - # fixup: clip all negative values (unclear why it gets negative) - # note: REatlas does not do the fixup if logger.isEnabledFor(logging.WARNING): if ((diffuse_t < 0.0) & (sinaltitude > sin(radians(1.0)))).any(): logger.warning( @@ -154,10 +141,12 @@ def TiltedDiffuseIrrad(ds, solar_position, surface_orientation, direct, diffuse) with np.errstate(invalid="ignore"): diffuse_t = diffuse_t.clip(min=0).fillna(0) - return diffuse_t.rename("diffuse tilted") + return diffuse_t.rename("diffuse tilted") # type: ignore[no-any-return] -def TiltedDirectIrrad(solar_position, surface_orientation, direct): +def TiltedDirectIrrad( + solar_position: Dataset, surface_orientation: Dataset, direct: DataArray +) -> DataArray: """ Calculate direct irradiation on a tilted surface. @@ -178,13 +167,12 @@ def TiltedDirectIrrad(solar_position, surface_orientation, direct): sinaltitude = sin(solar_position["altitude"]) cosincidence = surface_orientation["cosincidence"] - # geometric factor R_b = cosincidence / sinaltitude - return (R_b * direct).rename("direct tilted") + return (R_b * direct).rename("direct tilted") # type: ignore[no-any-return] -def _albedo(ds, influx): +def _albedo(ds: Dataset, influx: DataArray) -> DataArray: """ Retrieve or derive surface albedo from the dataset. @@ -201,19 +189,22 @@ def _albedo(ds, influx): Surface albedo. """ if "albedo" in ds: - albedo = ds["albedo"] + return ds["albedo"] elif "outflux" in ds: - albedo = (ds["outflux"] / influx.where(influx != 0)).fillna(0).clip(max=1) + return (ds["outflux"] / influx.where(influx != 0)).fillna(0).clip(max=1) # type: ignore[no-any-return] else: raise AssertionError( "Need either albedo or outflux as a variable in the dataset. " "Check your cutout and dataset module." ) - return albedo - -def TiltedGroundIrrad(ds, solar_position, surface_orientation, influx): +def TiltedGroundIrrad( + ds: Dataset, + solar_position: Dataset, + surface_orientation: Dataset, + influx: DataArray, +) -> DataArray: """ Calculate ground-reflected irradiation on a tilted surface. @@ -235,19 +226,19 @@ def TiltedGroundIrrad(ds, solar_position, surface_orientation, influx): """ surface_slope = surface_orientation["slope"] ground_t = influx * _albedo(ds, influx) * (1.0 - cos(surface_slope)) / 2.0 - return ground_t.rename("ground tilted") + return ground_t.rename("ground tilted") # type: ignore[no-any-return] def TiltedIrradiation( - ds, - solar_position, - surface_orientation, - trigon_model, - clearsky_model, - tracking=0, - altitude_threshold=1.0, - irradiation="total", -): + ds: Dataset, + solar_position: Dataset, + surface_orientation: Dataset, + trigon_model: str, + clearsky_model: str | None, + tracking: int | str = 0, + altitude_threshold: float = 1.0, + irradiation: str = "total", +) -> DataArray: """ Calculate the irradiation on a tilted surface. @@ -261,7 +252,8 @@ def TiltedIrradiation( surface_orientation : xarray.Dataset Surface orientation calculated using atlite.orientation.SurfaceOrientation. trigon_model : str - Type of trigonometry model. Defaults to 'simple'if used via `convert_irradiation`. + Type of trigonometry model. Defaults to 'simple' if used via + `convert_irradiation`. clearsky_model : str or None Either the 'simple' or the 'enhanced' Reindl clearsky model. The default choice of None will choose dependending on @@ -288,24 +280,8 @@ def TiltedIrradiation( """ influx_toa = ds["influx_toa"] - def clip(influx, influx_max): - """ - Clip irradiation to physically admissible bounds. - - Parameters - ---------- - influx : xarray.DataArray - Irradiation to clip. - influx_max : xarray.DataArray - Upper bound for the irradiation. - - Returns - ------- - xarray.DataArray - Clipped irradiation. - """ - # use .data in clip due to dask-xarray incompatibilities - return influx.clip(min=0, max=influx_max.transpose(*influx.dims).data) + def clip(influx: DataArray, influx_max: DataArray) -> DataArray: + return influx.clip(min=0, max=influx_max.transpose(*influx.dims).data) # type: ignore[no-any-return] if "influx" in ds: influx = clip(ds["influx"], influx_toa) @@ -319,6 +295,7 @@ def clip(influx, influx_max): "Need either influx or influx_direct and influx_diffuse in the " "dataset. Check your cutout and dataset module." ) + if trigon_model == "simple": k = surface_orientation["cosincidence"] / sin(solar_position["altitude"]) if tracking != "dual": @@ -351,13 +328,12 @@ def clip(influx, influx_max): result = diffuse_t.rename("diffuse tilted") elif irradiation == "ground": result = ground_t.rename("ground tilted") - - # The solar_position algorithms have a high error for small solar altitude - # values, leading to big overall errors from the 1/sinaltitude factor. - # => Suppress irradiation below solar altitudes of 1 deg. + else: + msg = f"Unknown irradiation type: {irradiation}" + raise ValueError(msg) cap_alt = solar_position["altitude"] < radians(altitude_threshold) result = result.where(~(cap_alt | (direct + diffuse <= 0.01)), 0) result.attrs["units"] = "W m**-2" - return result + return result # type: ignore[no-any-return] diff --git a/atlite/pv/orientation.py b/atlite/pv/orientation.py index fb4f9e08..0a746e07 100644 --- a/atlite/pv/orientation.py +++ b/atlite/pv/orientation.py @@ -2,15 +2,23 @@ # # SPDX-License-Identifier: MIT +from __future__ import annotations + import sys +from collections.abc import Callable +from typing import Any import numpy as np import xarray as xr from dask.array import arccos, arcsin, arctan, cos, logical_and, radians, sin from numpy import pi +from atlite._types import Dataset, NumericArray + -def get_orientation(name, **params): +def get_orientation( + name: str | dict[str, Any], **params: Any +) -> Callable[[NumericArray, NumericArray, Dataset], dict[str, NumericArray]]: """ Return an orientation factory by name. @@ -29,10 +37,15 @@ def get_orientation(name, **params): if isinstance(name, dict): params = name name = params.pop("name", "constant") - return getattr(sys.modules[__name__], f"make_{name}")(**params) + result: Callable[[NumericArray, NumericArray, Dataset], dict[str, NumericArray]] = ( + getattr(sys.modules[__name__], f"make_{name}")(**params) + ) + return result -def make_latitude_optimal(): +def make_latitude_optimal() -> Callable[ + [NumericArray, NumericArray, Dataset], dict[str, xr.DataArray] +]: """ Returns an optimal tilt angle for the given ``lat``, assuming that the panel is facing towards the equator, using a simple method from [1]. @@ -56,7 +69,9 @@ def make_latitude_optimal(): """ - def latitude_optimal(lon, lat, solar_position): + def latitude_optimal( + lon: NumericArray, lat: NumericArray, solar_position: Dataset + ) -> dict[str, xr.DataArray]: """ Build an orientation with latitude-dependent optimal tilt. @@ -74,28 +89,29 @@ def latitude_optimal(lon, lat, solar_position): dict Mapping with ``slope`` and ``azimuth``. """ - slope = np.empty_like(lat.values) + slope = np.empty_like(lat.values) # type: ignore[union-attr] - below_25 = np.abs(lat.values) <= np.radians(25) - below_50 = np.abs(lat.values) <= np.radians(50) + below_25 = np.abs(lat.values) <= np.radians(25) # type: ignore[union-attr] + below_50 = np.abs(lat.values) <= np.radians(50) # type: ignore[union-attr] - slope[below_25] = 0.87 * np.abs(lat.values[below_25]) + slope[below_25] = 0.87 * np.abs(lat.values[below_25]) # type: ignore[union-attr] slope[~below_25 & below_50] = 0.76 * np.abs( - lat.values[~below_25 & below_50] + lat.values[~below_25 & below_50] # type: ignore[union-attr] ) + np.radians(0.31) slope[~below_50] = np.radians(40.0) - # South orientation for panels on northern hemisphere and vice versa - azimuth = np.where(lat.values < 0, 0, pi) + azimuth = np.where(lat.values < 0, 0, pi) # type: ignore[union-attr] return dict( - slope=xr.DataArray(slope, coords=lat.coords), - azimuth=xr.DataArray(azimuth, coords=lat.coords), + slope=xr.DataArray(slope, coords=lat.coords), # type: ignore[union-attr] + azimuth=xr.DataArray(azimuth, coords=lat.coords), # type: ignore[union-attr] ) return latitude_optimal -def make_constant(slope, azimuth): +def make_constant( + slope: float, azimuth: float +) -> Callable[[NumericArray, NumericArray, Dataset], dict[str, NumericArray]]: """ Create an orientation function with constant slope and azimuth. @@ -111,10 +127,12 @@ def make_constant(slope, azimuth): callable Orientation function returning constant ``slope`` and ``azimuth``. """ - slope = radians(slope) - azimuth = radians(azimuth) + slope_rad = radians(slope) + azimuth_rad = radians(azimuth) - def constant(lon, lat, solar_position): + def constant( + lon: NumericArray, lat: NumericArray, solar_position: Dataset + ) -> dict[str, NumericArray]: """ Return the configured constant panel orientation. @@ -132,12 +150,14 @@ def constant(lon, lat, solar_position): dict Mapping with constant ``slope`` and ``azimuth``. """ - return dict(slope=slope, azimuth=azimuth) + return dict(slope=slope_rad, azimuth=azimuth_rad) return constant -def make_latitude(azimuth=180): +def make_latitude( + azimuth: float = 180, +) -> Callable[[NumericArray, NumericArray, Dataset], dict[str, NumericArray]]: """ Create an orientation function with slope equal to latitude. @@ -151,9 +171,11 @@ def make_latitude(azimuth=180): callable Orientation function using latitude as slope. """ - azimuth = radians(azimuth) + azimuth_rad = radians(azimuth) - def latitude(lon, lat, solar_position): + def latitude( + lon: NumericArray, lat: NumericArray, solar_position: Dataset + ) -> dict[str, NumericArray]: """ Return an orientation with slope equal to latitude. @@ -171,12 +193,19 @@ def latitude(lon, lat, solar_position): dict Mapping with latitude-based ``slope`` and constant ``azimuth``. """ - return dict(slope=lat, azimuth=azimuth) + return dict(slope=lat, azimuth=azimuth_rad) return latitude -def SurfaceOrientation(ds, solar_position, orientation, tracking=None): +def SurfaceOrientation( + ds: Dataset, + solar_position: Dataset, + orientation: Callable[ + [NumericArray, NumericArray, Dataset], dict[str, NumericArray] + ], + tracking: str | None = None, +) -> Dataset: """ Compute cos(incidence) for slope and panel azimuth. @@ -192,9 +221,9 @@ def SurfaceOrientation(ds, solar_position, orientation, tracking=None): lon = radians(ds["lon"]) lat = radians(ds["lat"]) - orientation = orientation(lon, lat, solar_position) - surface_slope = orientation["slope"] - surface_azimuth = orientation["azimuth"] + orientation_dict = orientation(lon, lat, solar_position) + surface_slope = orientation_dict["slope"] + surface_azimuth = orientation_dict["azimuth"] sun_altitude = solar_position["altitude"] sun_azimuth = solar_position["azimuth"] @@ -204,24 +233,19 @@ def SurfaceOrientation(ds, solar_position, orientation, tracking=None): surface_azimuth - sun_azimuth ) + cos(surface_slope) * sin(sun_altitude) - elif tracking == "horizontal": # horizontal tracking with horizontal axis - axis_azimuth = orientation[ - "azimuth" - ] # here orientation['azimuth'] refers to the azimuth of the tracker axis. + elif tracking == "horizontal": + axis_azimuth = orientation_dict["azimuth"] rotation = arctan( (cos(sun_altitude) / sin(sun_altitude)) * sin(sun_azimuth - axis_azimuth) ) surface_slope = abs(rotation) surface_azimuth = axis_azimuth + arcsin(sin(rotation) / sin(surface_slope)) - # the 2nd part yields +/-1 and determines if the panel is facing east or west cosincidence = cos(surface_slope) * sin(sun_altitude) + sin( surface_slope ) * cos(sun_altitude) * cos(sun_azimuth - surface_azimuth) - elif tracking == "tilted_horizontal": # horizontal tracking with tilted axis' - axis_tilt = orientation[ - "slope" - ] # here orientation['slope'] refers to the tilt of the tracker axis. + elif tracking == "tilted_horizontal": + axis_tilt = orientation_dict["slope"] rotation = arctan( (cos(sun_altitude) * sin(sun_azimuth - surface_azimuth)) @@ -256,23 +280,21 @@ def SurfaceOrientation(ds, solar_position, orientation, tracking=None): + cos(axis_tilt) * sin(sun_altitude) ) + sin(rotation) * cos(sun_altitude) * sin(sun_azimuth - surface_azimuth) - elif tracking == "vertical": # vertical tracking, surface azimuth = sun_azimuth + elif tracking == "vertical": cosincidence = sin(surface_slope) * cos(sun_altitude) + cos( surface_slope ) * sin(sun_altitude) - elif tracking == "dual": # both vertical and horizontal tracking + elif tracking == "dual": cosincidence = np.float64(1.0) else: - assert False, ( + msg = ( "Values describing tracking system must be None for no tracking," + "'horizontal' for 1-axis horizontal tracking," - + "tilted_horizontal' for 1-axis horizontal tracking of tilted panle," - + "vertical' for 1-axis vertical tracking, or 'dual' for 2-axis tracking" + + "'tilted_horizontal' for 1-axis horizontal tracking of tilted panel," + + "'vertical' for 1-axis vertical tracking, or 'dual' for 2-axis tracking" ) + raise AssertionError(msg) - # fixup incidence angle: if the panel is badly oriented and the sun shines - # on the back of the panel (incidence angle > 90degree), the irradiation - # would be negative instead of 0; this is prevented here. cosincidence = cosincidence.clip(min=0) return xr.Dataset( diff --git a/atlite/pv/solar_panel_model.py b/atlite/pv/solar_panel_model.py index eea17406..7f8e82a9 100644 --- a/atlite/pv/solar_panel_model.py +++ b/atlite/pv/solar_panel_model.py @@ -2,14 +2,19 @@ # # SPDX-License-Identifier: MIT +from __future__ import annotations + +from typing import Any, Literal + import numpy as np +import xarray as xr -# Huld model was copied from gsee -- global solar energy estimator -# by Stefan Pfenninger -# https://github.com/renewables-ninja/gsee/blob/master/gsee/pv.py +from atlite._types import DataArray -def _power_huld(irradiance, t_amb, pc): +def _power_huld( + irradiance: DataArray, t_amb: DataArray, pc: dict[str, Any] +) -> DataArray: """ AC power per capacity predicted by Huld model, based on W/m2 irradiance. @@ -41,10 +46,12 @@ def _power_huld(irradiance, t_amb, pc): da.attrs["units"] = "kWh/kWp" da = da.rename("specific generation") - return da + return da # type: ignore[no-any-return] -def _power_bofinger(irradiance, t_amb, pc): +def _power_bofinger( + irradiance: DataArray, t_amb: DataArray, pc: dict[str, Any] +) -> DataArray: """ AC power per capacity predicted by bofinger model, based on W/m2 irradiance. @@ -71,10 +78,12 @@ def _power_bofinger(irradiance, t_amb, pc): capacity = (pc["A"] + pc["B"] * 1000.0 + pc["C"] * np.log(1000.0)) * 1e3 power = irradiance * eta * (pc.get("inverter_efficiency", 1.0) / capacity) power = power.where(irradiance >= pc["threshold"], 0) - return power.rename("AC power") + return power.rename("AC power") # type: ignore[no-any-return] -def SolarPanelModel(ds, irradiance, pc): +def SolarPanelModel( + ds: xr.Dataset, irradiance: DataArray, pc: dict[str, Any] +) -> DataArray: """ Compute PV power output for the selected panel model. @@ -92,11 +101,11 @@ def SolarPanelModel(ds, irradiance, pc): xarray.DataArray Specific PV power output. """ - model = pc.get("model", "huld") + model: Literal["huld", "bofinger"] = pc.get("model", "huld") if model == "huld": return _power_huld(irradiance, ds["temperature"], pc) elif model == "bofinger": return _power_bofinger(irradiance, ds["temperature"], pc) else: - AssertionError(f"Unknown panel model: {model}") + raise AssertionError(f"Unknown panel model: {model}") diff --git a/atlite/pv/solar_position.py b/atlite/pv/solar_position.py index 494cd433..8fc82c3f 100644 --- a/atlite/pv/solar_position.py +++ b/atlite/pv/solar_position.py @@ -2,6 +2,8 @@ # # SPDX-License-Identifier: MIT +from __future__ import annotations + from warnings import warn import pandas as pd @@ -9,8 +11,10 @@ from dask.array import arccos, arcsin, arctan2, cos, radians, sin from numpy import pi +from atlite._types import Dataset + -def SolarPosition(ds, time_shift="0H"): +def SolarPosition(ds: Dataset, time_shift: str | pd.Timedelta = "0H") -> Dataset: """ Compute solar azimuth and altitude. @@ -57,12 +61,15 @@ def SolarPosition(ds, time_shift="0H"): } if rvs.issubset(set(ds.data_vars)): - return ds[rvs].rename({v: v.replace("solar_", "") for v in rvs}) + return ds[rvs].rename({v: v.replace("solar_", "") for v in rvs}) # type: ignore[no-any-return] warn( - """The calculation method and handling of solar position variables will change. - The solar position will in the future be a permanent variables of a cutout. - Recreate your cutout to remove this warning and permanently include the solar position variables into your cutout.""", + ( + "The calculation method and handling of solar position variables will " + "change. The solar position will in the future be a permanent variable of " + "a cutout. Recreate your cutout to remove this warning and permanently " + "include the solar position variables into your cutout." + ), DeprecationWarning, ) @@ -78,7 +85,7 @@ def SolarPosition(ds, time_shift="0H"): # Operations make new DataArray eager; reconvert to lazy dask arrays chunks = ds.chunksizes.get("time", "auto") if isinstance(chunks, tuple): - chunks = chunks[0] + chunks = chunks[0] # type: ignore[assignment] n = n.chunk(chunks) hour = hour.chunk(chunks) minute = minute.chunk(chunks) @@ -118,4 +125,4 @@ def SolarPosition(ds, time_shift="0H"): vars = {da.name: da for da in [alt, az]} solar_position = xr.Dataset(vars) - return solar_position + return solar_position # type: ignore[no-any-return] diff --git a/atlite/resource.py b/atlite/resource.py index 01dae541..d135bb07 100644 --- a/atlite/resource.py +++ b/atlite/resource.py @@ -13,7 +13,7 @@ import re from operator import itemgetter from pathlib import Path -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any, Literal, cast import numpy as np import pandas as pd @@ -22,11 +22,11 @@ from dask.array import radians from scipy.signal import fftconvolve +from atlite._types import DataArray, NDArray, PathLike from atlite.utils import arrowdict logger = logging.getLogger(name=__name__) - RESOURCE_DIRECTORY = Path(__file__).parent / "resources" WINDTURBINE_DIRECTORY = RESOURCE_DIRECTORY / "windturbine" SOLARPANEL_DIRECTORY = RESOURCE_DIRECTORY / "solarpanel" @@ -38,17 +38,32 @@ from typing_extensions import NotRequired class TurbineConfig(TypedDict): - V: np.ndarray - POW: np.ndarray + V: NDArray + POW: NDArray P: float hub_height: float | int name: NotRequired[str] manufacturer: NotRequired[str] source: NotRequired[str] + class PanelConfig(TypedDict): + model: NotRequired[Literal["huld", "bofinger"]] + efficiency: NotRequired[float] + A: NotRequired[float] + B: NotRequired[float] + C: NotRequired[float] + name: NotRequired[str] + source: NotRequired[str] + + class CSPConfig(TypedDict): + efficiency: DataArray + path: PathLike + name: NotRequired[str] + source: NotRequired[str] + def get_windturbineconfig( - turbine: str | Path | dict, add_cutout_windspeed: bool = True + turbine: str | PathLike | dict[str, Any], add_cutout_windspeed: bool = True ) -> TurbineConfig: """ Load the wind 'turbine' configuration. @@ -79,15 +94,15 @@ def get_windturbineconfig( Config with details on the turbine """ - if not isinstance(turbine, (str | Path | dict)): + if not isinstance(turbine, (str, Path, dict)): raise KeyError( f"`turbine` must be a str, pathlib.Path or dict, but is {type(turbine)}." ) if isinstance(turbine, str) and turbine.startswith("oedb:"): - conf = get_oedb_windturbineconfig(turbine[len("oedb:") :]) + conf = cast(dict[str, Any], get_oedb_windturbineconfig(turbine[len("oedb:") :])) - elif isinstance(turbine, (str | Path)): + elif isinstance(turbine, (str, Path)): if isinstance(turbine, str): turbine_path = windturbines[turbine.replace(".yaml", "")] @@ -106,10 +121,12 @@ def get_windturbineconfig( elif isinstance(turbine, dict): conf = turbine - return _validate_turbine_config_dict(conf, add_cutout_windspeed) + return _validate_turbine_config_dict( + cast(dict[str, Any], conf), add_cutout_windspeed + ) -def get_solarpanelconfig(panel): +def get_solarpanelconfig(panel: str | PathLike) -> PanelConfig: """ Load the 'panel'.yaml file from local disk and provide a solar panel dict. @@ -127,7 +144,7 @@ def get_solarpanelconfig(panel): Config with details on the solarpanel """ - assert isinstance(panel, (str | Path)) + assert isinstance(panel, (str, Path)) if isinstance(panel, str): panel_path = solarpanels[panel.replace(".yaml", "")] @@ -136,12 +153,12 @@ def get_solarpanelconfig(panel): panel_path = panel with open(panel_path) as f: - conf = yaml.safe_load(f) + conf = cast(PanelConfig, yaml.safe_load(f)) return conf -def get_cspinstallationconfig(installation): +def get_cspinstallationconfig(installation: str | PathLike) -> CSPConfig: """ Load the 'installation'.yaml file from local disk to provide the system efficiencies. @@ -160,7 +177,7 @@ def get_cspinstallationconfig(installation): Config with details on the CSP installation. """ - assert isinstance(installation, (str | Path)) + assert isinstance(installation, (str, Path)) if isinstance(installation, str): installation_path = cspinstallations[installation.replace(".yaml", "")] @@ -168,20 +185,14 @@ def get_cspinstallationconfig(installation): elif isinstance(installation, Path): installation_path = installation - # Load and set expected index columns with open(installation_path) as f: - config = yaml.safe_load(f) + config = cast(dict[str, Any], yaml.safe_load(f)) config["path"] = installation_path - ## Convert efficiency dict to xr.DataArray and convert units to deg -> rad, % -> p.u. da = pd.DataFrame(config["efficiency"]).set_index(["altitude", "azimuth"]) - # Handle as xarray DataArray early - da will be 'return'-ed da = da.to_xarray()["value"] - # Solar altitude + azimuth expected in deg for better readibility - # calculations use solar position in rad - # Convert da to new coordinates and drop old da = da.rename({"azimuth": "azimuth [deg]", "altitude": "altitude [deg]"}) da = da.assign_coords( { @@ -193,15 +204,14 @@ def get_cspinstallationconfig(installation): da = da.chunk("auto") - # Efficiency unit from % to p.u. da /= 1.0e2 config["efficiency"] = da - return config + return cast(CSPConfig, config) -def solarpanel_rated_capacity_per_unit(panel): +def solarpanel_rated_capacity_per_unit(panel: str | PathLike | PanelConfig) -> float: """ Return the rated capacity per unit of a solar panel configuration. @@ -215,22 +225,22 @@ def solarpanel_rated_capacity_per_unit(panel): float Rated capacity per unit area or per panel, depending on the model. """ - # unit is m^2 here - - if isinstance(panel, (str | Path)): + if isinstance(panel, (str, Path)): panel = get_solarpanelconfig(panel) model = panel.get("model", "huld") if model == "huld": - return panel["efficiency"] + return cast(float, panel["efficiency"]) elif model == "bofinger": - # one unit in the capacity layout is interpreted as one panel of a - # capacity (A + 1000 * B + log(1000) * C) * 1000W/m^2 * (k / 1000) A, B, C = itemgetter("A", "B", "C")(panel) - return (A + B * 1000.0 + C * np.log(1000.0)) * 1e3 + return cast(float, (A + B * 1000.0 + C * np.log(1000.0)) * 1e3) + else: + raise ValueError(f"Unknown panel model: {model}") -def windturbine_rated_capacity_per_unit(turbine): +def windturbine_rated_capacity_per_unit( + turbine: str | PathLike | TurbineConfig, +) -> float: """ Return the rated capacity of a wind turbine configuration. @@ -244,13 +254,15 @@ def windturbine_rated_capacity_per_unit(turbine): float Rated turbine capacity. """ - if isinstance(turbine, (str | Path)): + if isinstance(turbine, (str, Path)): turbine = get_windturbineconfig(turbine) return turbine["P"] -def windturbine_smooth(turbine, params=None): +def windturbine_smooth( + turbine: TurbineConfig, params: dict[str, float] | None | bool = None +) -> TurbineConfig: """ Smooth the powercurve in `turbine` with a gaussian kernel. @@ -278,30 +290,25 @@ def windturbine_smooth(turbine, params=None): if params is None or params is True: params = {} - eta = params.get("eta", 0.95) - Delta_v = params.get("Delta_v", 1.27) - sigma = params.get("sigma", 2.29) + params = cast(dict[str, float], params) + eta: float = params.get("eta", 0.95) + Delta_v: float = params.get("Delta_v", 1.27) + sigma: float = params.get("sigma", 2.29) - def kernel(v_0): - # all velocities in m/s - return ( + def kernel(v_0: NDArray) -> NDArray: + return ( # type: ignore[no-any-return] 1.0 / np.sqrt(2 * np.pi * sigma * sigma) * np.exp(-(v_0 - Delta_v) * (v_0 - Delta_v) / (2 * sigma * sigma)) ) - def smooth(velocities, power): - # interpolate kernel and power curve to the same, regular velocity grid + def smooth(velocities: NDArray, power: NDArray) -> tuple[NDArray, NDArray]: velocities_reg = np.linspace(-50.0, 50.0, 1001) power_reg = np.interp(velocities_reg, velocities, power) kernel_reg = kernel(velocities_reg) - # convolve power and kernel - # the downscaling is necessary because scipy expects the velocity - # increments to be 1., but here, they are 0.1 convolution = 0.1 * fftconvolve(power_reg, kernel_reg, mode="same") - # sample down so power curve doesn't get too long velocities_new = np.linspace(0.0, 35.0, 72) power_new = eta * np.interp(velocities_new, velocities_reg, convolution) @@ -309,7 +316,7 @@ def smooth(velocities, power): turbine = turbine.copy() turbine["V"], turbine["POW"] = smooth(turbine["V"], turbine["POW"]) - turbine["P"] = np.max(turbine["POW"]) + turbine["P"] = cast(float, float(np.max(turbine["POW"]))) if any(turbine["POW"][np.where(turbine["V"] == 0.0)] > 1e-2): logger.warning( @@ -323,12 +330,14 @@ def smooth(velocities, power): return turbine -def _max_v_is_zero_pow(turbine): - return np.any(turbine["POW"][turbine["V"] == turbine["V"].max()] == 0) +def _max_v_is_zero_pow(turbine: TurbineConfig) -> bool: + return cast( + bool, bool(np.any(turbine["POW"][turbine["V"] == turbine["V"].max()] == 0)) + ) def _validate_turbine_config_dict( - turbine: dict, add_cutout_windspeed: bool + turbine: dict[str, Any], add_cutout_windspeed: bool ) -> TurbineConfig: """ Checks the turbine config dict format and power curve. @@ -356,11 +365,10 @@ def _validate_turbine_config_dict( ) raise ValueError(err_msg) - if not all(isinstance(turbine[p], (np.ndarray | list)) for p in ("POW", "V")): + if not all(isinstance(turbine[p], (np.ndarray, list)) for p in ("POW", "V")): err_msg = "turbine entries 'POW' and 'V' must be np.ndarray or list" raise ValueError(err_msg) - # convert lists from user provided turbine dicts to numpy arrays if any(isinstance(turbine[p], list) for p in ("POW", "V")): turbine["V"] = np.array(turbine["V"]) turbine["POW"] = np.array(turbine["POW"]) @@ -370,17 +378,15 @@ def _validate_turbine_config_dict( raise ValueError(err_msg) if not np.all(np.diff(turbine["V"]) >= 0): - # This check is not strict as it uses `>=` instead of `>` and thus allows equal - # wind speeds in the array. However, many power curves have two entries for the - # same wind speed at the cut-in and cut-out speeds which would make them fail if - # using `>` only. err_msg = ( "wind speed 'V' in the turbine config dict is expected to be increasing, " f"but is currently not in ascending order:\n{turbine['V']}" ) raise ValueError(err_msg) - if add_cutout_windspeed is True and not _max_v_is_zero_pow(turbine): + if add_cutout_windspeed is True and not _max_v_is_zero_pow( + cast(TurbineConfig, turbine) + ): turbine["V"] = np.pad(turbine["V"], (0, 1), "maximum") turbine["POW"] = np.pad(turbine["POW"], (0, 1), "constant", constant_values=0) logger.info( @@ -388,18 +394,18 @@ def _validate_turbine_config_dict( f"V={turbine['V'][-1]} m/s." ) - if not _max_v_is_zero_pow(turbine): + if not _max_v_is_zero_pow(cast(TurbineConfig, turbine)): logger.warning( "The power curve does not have a cut-out wind speed, i.e. the power" " output corresponding to the\nhighest wind speed is not zero. You can" " either change the power curve manually or set\n" "'add_cutout_windspeed=True' in the Cutout.wind conversion method." ) - return turbine + return cast(TurbineConfig, turbine) def get_oedb_windturbineconfig( - search: int | str | None = None, **search_params + search: int | str | None = None, **search_params: Any ) -> TurbineConfig: """ Download a windturbine configuration from the OEDB database. @@ -532,7 +538,7 @@ def get_oedb_windturbineconfig( name = "{manufacturer}_{name}".format(**turbineconf).translate(charmap) windturbines[name] = turbineconf - return turbineconf + return turbineconf # type: ignore[return-value] # Global caches diff --git a/atlite/utils.py b/atlite/utils.py index 86bd5bc3..da31eadb 100644 --- a/atlite/utils.py +++ b/atlite/utils.py @@ -5,10 +5,14 @@ General utility functions for internal use. """ +from __future__ import annotations + import logging import re import textwrap +from collections.abc import Callable from pathlib import Path +from typing import TYPE_CHECKING, Any, TypeAlias, cast import pandas as pd import xarray as xr @@ -16,10 +20,18 @@ from atlite.datasets import modules as datamodules from atlite.gis import maybe_swap_spatial_dims +if TYPE_CHECKING: + pass + logger = logging.getLogger(__name__) +PathLike: TypeAlias = str | Path +NDArray: TypeAlias = Any +DataArray: TypeAlias = xr.DataArray +Dataset: TypeAlias = xr.Dataset + -def migrate_from_cutout_directory(old_cutout_dir, path): +def migrate_from_cutout_directory(old_cutout_dir: PathLike, path: PathLike) -> Dataset: """ Convert an old style cutout directory to new style netcdf file. """ @@ -61,7 +73,7 @@ def migrate_from_cutout_directory(old_cutout_dir, path): ) raise - data = maybe_swap_spatial_dims(data) + data = cast(Dataset, maybe_swap_spatial_dims(data)) # type: ignore[no-untyped-call] module = data.attrs["module"] data.attrs["prepared_features"] = list(datamodules[module].features) for v in data: @@ -79,18 +91,18 @@ def migrate_from_cutout_directory(old_cutout_dir, path): return data -def timeindex_from_slice(timeslice): +def timeindex_from_slice(timeslice: Any) -> pd.DatetimeIndex: end = pd.Timestamp(timeslice.end) + pd.offsets.DateOffset(months=1) return pd.date_range(timeslice.start, end, freq="1h", closed="left") -class arrowdict(dict): +class arrowdict(dict[str, Any]): """ A subclass of dict, which allows you to get items in the dict using the attribute syntax! """ - def __getattr__(self, item): + def __getattr__(self, item: str) -> Any: try: return self.__getitem__(item) except KeyError as e: @@ -98,8 +110,8 @@ def __getattr__(self, item): _re_pattern = re.compile("[a-zA-Z_][a-zA-Z0-9_]*") - def __dir__(self): - dict_keys = [] + def __dir__(self) -> list[str]: + dict_keys: list[str] = [] for k in self.keys(): if isinstance(k, str): m = self._re_pattern.match(k) @@ -117,22 +129,23 @@ class CachedAttribute: times. Sort of like memoization. """ - # For python 3.8 >= use functoolts.cached_property instead. + method: Callable[[Any], Any] + name: str + __doc__: str | None - def __init__(self, method, name=None, doc=None): - # record the unbound-method and the name + def __init__( + self, + method: Callable[[Any], Any], + name: str | None = None, + doc: str | None = None, + ) -> None: self.method = method self.name = name or method.__name__ self.__doc__ = doc or method.__doc__ - def __get__(self, inst, cls): + def __get__(self, inst: Any, cls: type[Any] | None) -> Any: if inst is None: - # instance attribute accessed on class, return self - # You get here if you write `Foo.bar` return self - # compute, cache and return the instance's attribute value result = self.method(inst) - # setattr redefines the instance's attribute so this doesn't get called - # again setattr(inst, self.name, result) return result diff --git a/atlite/wind.py b/atlite/wind.py index 92dd1f19..75304840 100644 --- a/atlite/wind.py +++ b/atlite/wind.py @@ -9,24 +9,21 @@ import logging import re -from typing import TYPE_CHECKING +from typing import Literal, cast import numpy as np -import xarray as xr - -logger = logging.getLogger(__name__) +from atlite._types import DataArray, Dataset, NDArray -if TYPE_CHECKING: - from typing import Literal +logger = logging.getLogger(__name__) def extrapolate_wind_speed( - ds: xr.Dataset, + ds: Dataset, to_height: int | float, from_height: int | None = None, method: Literal["logarithmic", "power"] = "logarithmic", -) -> xr.DataArray: +) -> DataArray: """ Extrapolate the wind speed from a given height above ground to another. @@ -72,37 +69,37 @@ def extrapolate_wind_speed( Wind Resource Assessment: A Comparison against Tall Towers' https://doi.org/10.3390/en14144169 . """ - # Fast lane - to_name = f"wnd{int(to_height):0d}m" + to_name: str = f"wnd{int(to_height):0d}m" if to_name in ds: return ds[to_name] if from_height is None: - # Determine closest height to to_name - heights = np.asarray([int(s[3:-1]) for s in ds if re.match(r"wnd\d+m", s)]) + heights: NDArray = np.asarray( + [int(str(s)[3:-1]) for s in ds if re.match(r"wnd\d+m", str(s))] + ) if len(heights) == 0: raise AssertionError("Wind speed is not in dataset") - from_height = heights[np.argmin(np.abs(heights - to_height))] + from_height = int(heights[np.argmin(np.abs(heights - to_height))]) - from_name = f"wnd{int(from_height):0d}m" + from_name: str = f"wnd{int(from_height):0d}m" if method == "logarithmic": try: - roughness = ds["roughness"] + roughness: DataArray = ds["roughness"] except KeyError: raise RuntimeError( "The logarithmic interpolation method requires surface roughness (roughness);\n" "make sure you choose a compatible dataset like ERA5" ) - wnd_spd = ds[from_name] * ( + wnd_spd: DataArray = ds[from_name] * ( np.log(to_height / roughness) / np.log(from_height / roughness) ) - method_desc = "logarithmic method with roughness" + method_desc: str = "logarithmic method with roughness" elif method == "power": try: - wnd_shear_exp = ds["wnd_shear_exp"] + wnd_shear_exp: DataArray = ds["wnd_shear_exp"] except KeyError: raise RuntimeError( "The power law interpolation method requires a wind shear exponent (wnd_shear_exp);\n" @@ -125,4 +122,4 @@ def extrapolate_wind_speed( } ) - return wnd_spd.rename(to_name) + return cast(DataArray, wnd_spd.rename(to_name)) diff --git a/pyproject.toml b/pyproject.toml index d7d3c952..f901e88b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,7 +58,7 @@ Documentation = "https://atlite.readthedocs.io/en/latest/" [project.optional-dependencies] -dev = ["pre-commit", "pytest", "pytest-cov", "matplotlib", "ruff"] +dev = ["pre-commit", "pytest", "pytest-cov", "matplotlib", "ruff", "mypy", "types-PyYAML"] docs = [ "numpydoc==1.8.0", @@ -120,3 +120,12 @@ ignore = [ 'D417', # Missing argument descriptions in the docstring ] + +[tool.mypy] +python_version = "3.10" +warn_return_any = true +warn_unused_configs = true +ignore_missing_imports = true +disallow_incomplete_defs = true +check_untyped_defs = true +exclude = ["test/", ".venv/", "build/"] From 9228ca7b92a876b3944b64355d4aba72db3b15a2 Mon Sep 17 00:00:00 2001 From: Fabian Date: Mon, 16 Mar 2026 10:18:45 +0100 Subject: [PATCH 04/27] Enable ruff rules B, SIM, RET, C4, TC, NPY, G, PTH, RUF100 and fix all violations --- .pre-commit-config.yaml | 2 +- atlite/_types.py | 6 +- atlite/aggregate.py | 17 +- atlite/convert.py | 39 ++-- atlite/cutout.py | 32 ++-- atlite/data.py | 15 +- atlite/datasets/__init__.py | 5 +- atlite/datasets/cordex.py | 196 +++++++++++---------- atlite/datasets/era5.py | 61 +++---- atlite/datasets/gebco.py | 5 +- atlite/datasets/ncep.py | 143 +++++++-------- atlite/datasets/sarah.py | 39 ++-- atlite/gis.py | 81 +++++---- atlite/hydro.py | 8 +- atlite/pv/__init__.py | 5 - atlite/pv/irradiation.py | 25 +-- atlite/pv/orientation.py | 20 ++- atlite/pv/solar_panel_model.py | 17 +- atlite/pv/solar_position.py | 9 +- atlite/resource.py | 67 ++++--- atlite/utils.py | 13 +- atlite/wind.py | 11 +- doc/chart.py | 34 ++-- examples/historic-comparison-germany.ipynb | 33 ++-- examples/plotting_with_atlite.ipynb | 16 +- examples/solarpv_tracking_options.ipynb | 6 +- examples/working-with-csp.ipynb | 8 +- pyproject.toml | 26 ++- test/conftest.py | 9 +- test/test_aggregate_time.py | 4 +- test/test_creation.py | 6 +- test/test_gis.py | 9 +- test/test_preparation_and_conversion.py | 19 +- 33 files changed, 510 insertions(+), 476 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f3fda553..b44e22ba 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -29,7 +29,7 @@ repos: hooks: - id: mypy additional_dependencies: ['types-PyYAML', 'types-requests'] - exclude: ^(tests/|docs/|examples/) + exclude: ^(tests?/|docs/|examples/) # Find common spelling mistakes in comments and docstrings - repo: https://github.com/codespell-project/codespell diff --git a/atlite/_types.py b/atlite/_types.py index 995c1051..3338d665 100644 --- a/atlite/_types.py +++ b/atlite/_types.py @@ -4,9 +4,8 @@ from __future__ import annotations -from collections.abc import Callable, Sequence from pathlib import Path -from typing import Any, Literal, TypeAlias, TypedDict +from typing import TYPE_CHECKING, Any, Literal, TypeAlias, TypedDict import geopandas as gpd import numpy as np @@ -15,6 +14,9 @@ from pyproj import CRS from shapely.geometry.base import BaseGeometry +if TYPE_CHECKING: + from collections.abc import Callable, Sequence + NDArray: TypeAlias = np.ndarray[Any, np.dtype[np.floating[Any]]] NDArrayInt: TypeAlias = np.ndarray[Any, np.dtype[np.signedinteger[Any]]] NDArrayBool: TypeAlias = np.ndarray[Any, np.dtype[np.bool_]] diff --git a/atlite/aggregate.py b/atlite/aggregate.py index c9da58f7..111127b9 100644 --- a/atlite/aggregate.py +++ b/atlite/aggregate.py @@ -10,14 +10,14 @@ from typing import TYPE_CHECKING, cast import dask -import pandas as pd import xarray as xr -from atlite._types import DataArray - if TYPE_CHECKING: + import pandas as pd from scipy.sparse import spmatrix + from atlite._types import DataArray + def aggregate_matrix( da: DataArray, @@ -45,7 +45,7 @@ def aggregate_matrix( index = index.rename("dim_0") if isinstance(da.data, dask.array.core.Array): da = da.stack(spatial=("y", "x")) - da = da.chunk(dict(spatial=-1)) + da = da.chunk({"spatial": -1}) result = xr.apply_ufunc( lambda da: da * matrix.T, da, @@ -53,9 +53,8 @@ def aggregate_matrix( output_core_dims=[[index.name]], dask="parallelized", output_dtypes=[da.dtype], - dask_gufunc_kwargs=dict(output_sizes={index.name: index.size}), + dask_gufunc_kwargs={"output_sizes": {index.name: index.size}}, ).assign_coords(**{index.name: index}) - return cast(DataArray, result) - else: - da = da.stack(spatial=("y", "x")).transpose("spatial", "time") - return xr.DataArray(matrix * da, [index, da.coords["time"]]) + return cast("DataArray", result) + da = da.stack(spatial=("y", "x")).transpose("spatial", "time") + return xr.DataArray(matrix * da, [index, da.coords["time"]]) diff --git a/atlite/convert.py b/atlite/convert.py index 3118a6e1..107fa58a 100644 --- a/atlite/convert.py +++ b/atlite/convert.py @@ -11,7 +11,6 @@ import logging import warnings from collections import namedtuple -from collections.abc import Callable from operator import itemgetter from pathlib import Path from typing import TYPE_CHECKING, Any, Literal @@ -29,7 +28,6 @@ from atlite import csp as cspm from atlite import hydro as hydrom from atlite import wind as windm -from atlite._types import DataArray, Dataset, NumericArray from atlite.aggregate import aggregate_matrix from atlite.gis import spdiag from atlite.pv.irradiation import TiltedIrradiation @@ -46,6 +44,9 @@ logger = logging.getLogger(__name__) if TYPE_CHECKING: + from collections.abc import Callable + + from atlite._types import DataArray, Dataset, NumericArray from atlite.cutout import Cutout from atlite.resource import TurbineConfig @@ -64,7 +65,7 @@ def convert_and_aggregate( capacity_factor: bool = False, capacity_factor_timeseries: bool = False, show_progress: bool = False, - dask_kwargs: dict[str, Any] = {}, + dask_kwargs: dict[str, Any] | None = None, **convert_kwds: Any, ) -> Any: """ @@ -150,6 +151,8 @@ def convert_and_aggregate( pv : Generate solar PV generation time-series. """ + if dask_kwargs is None: + dask_kwargs = {} if ( aggregate_time is not None and aggregate_time is not False @@ -186,7 +189,7 @@ def convert_and_aggregate( aggregate_time = False func_name = convert_func.__name__.replace("convert_", "") - logger.info(f"Convert and aggregate '{func_name}'.") + logger.info("Convert and aggregate '%s'.", func_name) da = convert_func(cutout.data, **convert_kwds) no_args = all(v is None for v in [layout, shapes, matrix]) @@ -270,8 +273,7 @@ def convert_and_aggregate( if return_capacity: return maybe_progressbar(results, show_progress, **dask_kwargs), capacity - else: - return maybe_progressbar(results, show_progress, **dask_kwargs) + return maybe_progressbar(results, show_progress, **dask_kwargs) def maybe_progressbar( @@ -744,7 +746,7 @@ def convert_solar_thermal( def solar_thermal( cutout: Cutout, - orientation: dict[str, float] = {"slope": 45.0, "azimuth": 180.0}, + orientation: dict[str, float] | None = None, trigon_model: str = "simple", clearsky_model: str = "simple", c0: float = 0.8, @@ -785,6 +787,8 @@ def solar_thermal( (2014) 1003-1018 """ + if orientation is None: + orientation = {"slope": 45.0, "azimuth": 180.0} if not callable(orientation): orientation = get_orientation(orientation) # type: ignore[assignment] @@ -842,8 +846,7 @@ def apply_power_curve(da): ) da.attrs["units"] = "MWh/MWp" - da = da.rename("specific generation") - return da # type: ignore[no-any-return] + return da.rename("specific generation") # type: ignore[no-any-return] def wind( @@ -964,7 +967,7 @@ def convert_irradiation( """ solar_position = SolarPosition(ds) surface_orientation = SurfaceOrientation(ds, solar_position, orientation, tracking) - irradiation = TiltedIrradiation( + return TiltedIrradiation( ds, solar_position, surface_orientation, @@ -973,7 +976,6 @@ def convert_irradiation( tracking=tracking, irradiation=irradiation, ) - return irradiation def irradiation( @@ -1082,8 +1084,7 @@ def convert_pv( clearsky_model=clearsky_model, tracking=tracking, ) - solar_panel = SolarPanelModel(ds, irradiation, panel) - return solar_panel + return SolarPanelModel(ds, irradiation, panel) def pv(cutout, panel, orientation, tracking=None, clearsky_model=None, **params): @@ -1213,9 +1214,7 @@ def convert_csp(ds, installation): da = da.fillna(0.0) da.attrs["units"] = "kWh/kW_ref" - da = da.rename("specific generation") - - return da + return da.rename("specific generation") def csp(cutout, installation, technology=None, **params): @@ -1342,7 +1341,7 @@ def runoff( if smooth is not None: if smooth is True: smooth = 24 * 7 - if "return_capacity" in params.keys(): + if "return_capacity" in params: result = result[0].rolling(time=smooth, min_periods=1).mean(), result[1] else: result = result.rolling(time=smooth, min_periods=1).mean() @@ -1438,7 +1437,7 @@ def hydro( # The hydrological parameters are in units of "m of water per day" and so # they should be multiplied by 1000 and the basin area to convert to m3 # d-1 = m3 h-1 / 24 - runoff *= xr.DataArray(basins.shapes.to_crs(dict(proj="cea")).area) + runoff *= xr.DataArray(basins.shapes.to_crs({"proj": "cea"}).area) return hydrom.shift_and_aggregate_runoff_for_plants( basins, runoff, flowspeed, show_progress @@ -1535,7 +1534,7 @@ def convert_line_rating( def line_rating( - cutout, shapes, line_resistance, show_progress=False, dask_kwargs={}, **params + cutout, shapes, line_resistance, show_progress=False, dask_kwargs=None, **params ): """ Create a dynamic line rating time series based on the IEEE-738 standard. @@ -1604,6 +1603,8 @@ def line_rating( >>> s = np.sqrt(3) * i * v / 1e3 # in MW """ + if dask_kwargs is None: + dask_kwargs = {} if not isinstance(shapes, gpd.GeoSeries): shapes = gpd.GeoSeries(shapes).rename_axis("dim_0") diff --git a/atlite/cutout.py b/atlite/cutout.py index e06c50e8..6f25deb2 100644 --- a/atlite/cutout.py +++ b/atlite/cutout.py @@ -17,7 +17,6 @@ from __future__ import annotations import logging -from collections.abc import Sequence from pathlib import Path from tempfile import mktemp from typing import TYPE_CHECKING, Any @@ -32,18 +31,20 @@ from pyproj import CRS from shapely.geometry import box -from atlite._types import ( - CrsLike, - DataArray, - GeoDataFrame, - Geometry, - NDArray, - Number, - PathLike, - SparseMatrix, -) - if TYPE_CHECKING: + from collections.abc import Sequence + + from atlite._types import ( + CrsLike, + DataArray, + GeoDataFrame, + Geometry, + NDArray, + Number, + PathLike, + SparseMatrix, + ) + pass from atlite.convert import ( @@ -173,12 +174,13 @@ def __init__(self, path: PathLike, **cutoutparams: Any) -> None: if cutoutparams: warn( f"Arguments {', '.join(cutoutparams)} are ignored, since " - "cutout is already built." + "cutout is already built.", + stacklevel=2, ) elif "data" in cutoutparams: data = cutoutparams.pop("data") else: - logger.info(f"Building new cutout {path}") + logger.info("Building new cutout %s", path) if "bounds" in cutoutparams: bounds = cutoutparams.pop("bounds") @@ -220,7 +222,7 @@ def __init__(self, path: PathLike, **cutoutparams: Any) -> None: # Check compatibility of CRS modules = atleast_1d(data.attrs.get("module")) # type: ignore[arg-type] - crs = set(CRS(datamodules[m].crs) for m in modules) + crs = {CRS(datamodules[m].crs) for m in modules} assert len(crs) == 1, f"CRS of {module} not compatible" self.path = path diff --git a/atlite/data.py b/atlite/data.py index caf7538f..3f218bfe 100644 --- a/atlite/data.py +++ b/atlite/data.py @@ -9,7 +9,6 @@ import logging import os -from collections.abc import Callable, Iterable from functools import wraps from pathlib import Path from shutil import rmtree @@ -25,12 +24,12 @@ from dask.utils import SerializableLock from numpy import atleast_1d -from atlite._types import DataArray, Dataset, PathLike from atlite.datasets import modules as datamodules if TYPE_CHECKING: - from collections.abc import Sequence + from collections.abc import Callable, Iterable, Sequence + from atlite._types import DataArray, Dataset, PathLike from atlite.cutout import Cutout logger = logging.getLogger(__name__) @@ -172,7 +171,7 @@ def maybe_remove_tmpdir( @wraps(func) def wrapper(*args: Any, **kwargs: Any) -> Any: - if kwargs.get("tmpdir", None): + if kwargs.get("tmpdir"): res: Any = func(*args, **kwargs) else: kwargs["tmpdir"] = mkdtemp() @@ -270,7 +269,7 @@ def cutout_prepare( temp_dir_path: Path = Path(tmpdir) if not temp_dir_path.is_dir(): raise FileNotFoundError(f"The tmpdir: {temp_dir_path} does not exist.") - logger.info(f"Storing temporary files in {tmpdir}") + logger.info("Storing temporary files in %s", tmpdir) modules_array: np.ndarray[Any, np.dtype[Any]] = atleast_1d(cutout.module) modules_list: list[str] = modules_array.tolist() @@ -289,7 +288,7 @@ def cutout_prepare( missing_vars = missing_vars[lambda v: ~v.isin(cutout.data)] if missing_vars.empty: continue - logger.info(f"Calculating and writing with module {module}:") + logger.info("Calculating and writing with module %s:", module) missing_features: np.ndarray[Any, np.dtype[Any]] = missing_vars.index.unique( "feature" ) @@ -304,7 +303,7 @@ def cutout_prepare( ) prepared |= set(missing_features) - cutout.data.attrs.update(dict(prepared_features=list(prepared))) + cutout.data.attrs.update({"prepared_features": list(prepared)}) attrs: dict[str, Any] = non_bool_dict(cutout.data.attrs) attrs.update(ds.attrs) @@ -332,7 +331,7 @@ def cutout_prepare( if cutout.path.exists(): cutout.data.close() cutout.path.unlink() - os.rename(tmp, cutout.path) + Path(tmp).rename(cutout.path) cutout.data = xr.open_dataset(cutout.path, chunks=cutout.chunks) diff --git a/atlite/datasets/__init__.py b/atlite/datasets/__init__.py index 39e11a03..bb006105 100644 --- a/atlite/datasets/__init__.py +++ b/atlite/datasets/__init__.py @@ -8,8 +8,11 @@ from __future__ import annotations -from types import ModuleType +from typing import TYPE_CHECKING from atlite.datasets import era5, gebco, sarah +if TYPE_CHECKING: + from types import ModuleType + modules: dict[str, ModuleType] = {"era5": era5, "sarah": sarah, "gebco": gebco} diff --git a/atlite/datasets/cordex.py b/atlite/datasets/cordex.py index f759ccef..50b92720 100644 --- a/atlite/datasets/cordex.py +++ b/atlite/datasets/cordex.py @@ -16,16 +16,19 @@ import glob import os -from collections.abc import Generator from itertools import groupby from operator import itemgetter -from typing import Any +from typing import TYPE_CHECKING, Any -import numpy as np import pandas as pd import xarray as xr -from atlite._types import PathLike +if TYPE_CHECKING: + from collections.abc import Generator + + import numpy as np + + from atlite._types import PathLike # Model and CRS Settings model = "MPI-M-MPI-ESM-LR" @@ -38,10 +41,9 @@ def rename_and_clean_coords(ds: xr.Dataset) -> xr.Dataset: ds = ds.rename({"rlon": "x", "rlat": "y"}) - ds = ds.drop( + return ds.drop( # type: ignore[no-any-return] (set(ds.coords) | set(ds.data_vars)) & {"bnds", "height", "rotated_pole"} ) - return ds def prepare_data_cordex( @@ -115,7 +117,7 @@ def prepare_meta_cordex( module: Any, model: str = "MPI-M-MPI-ESM-LR", ) -> xr.Dataset: - fn = next(glob.iglob(template.format(year=year, model=model))) + fn = next(glob.iglob(template.format(year=year, model=model))) # noqa: PTH207 with xr.open_dataset(fn) as ds: ds = rename_and_clean_coords(ds) ds = ds.coords.to_dataset() @@ -157,150 +159,150 @@ def tasks_yearly_cordex( ys = slice(first - 0.1 * (second - first), last + 0.1 * (second - first)) return [ - dict( - prepare_func=prepare_func, - xs=xs, - ys=ys, - oldname=oldname, - newname=newname, - fn=next(glob.iglob(template.format(year=year, model=model))), - year=year, - months=list(map(itemgetter(1), yearmonths)), - ) + { + "prepare_func": prepare_func, + "xs": xs, + "ys": ys, + "oldname": oldname, + "newname": newname, + "fn": next(glob.iglob(template.format(year=year, model=model))), # noqa: PTH207 + "year": year, + "months": list(map(itemgetter(1), yearmonths)), + } for year, yearmonths in groupby(yearmonths, itemgetter(0)) ] weather_data_config: dict[str, dict[str, Any]] = {} try: - from atlite import config # type: ignore[attr-defined] # noqa: F401 + from atlite import config # type: ignore[attr-defined] weather_data_config = { - "influx": dict( - tasks_func=tasks_yearly_cordex, - prepare_func=prepare_data_cordex, - oldname="rsds", - newname="influx", - template=os.path.join( - config.cordex_dir, # noqa: F821 + "influx": { + "tasks_func": tasks_yearly_cordex, + "prepare_func": prepare_data_cordex, + "oldname": "rsds", + "newname": "influx", + "template": os.path.join( # noqa: PTH118 + config.cordex_dir, "{model}", "influx", "rsds_*_{year}*.nc", ), - ), - "outflux": dict( - tasks_func=tasks_yearly_cordex, - prepare_func=prepare_data_cordex, - oldname="rsus", - newname="outflux", - template=os.path.join( - config.cordex_dir, # noqa: F821 + }, + "outflux": { + "tasks_func": tasks_yearly_cordex, + "prepare_func": prepare_data_cordex, + "oldname": "rsus", + "newname": "outflux", + "template": os.path.join( # noqa: PTH118 + config.cordex_dir, "{model}", "outflux", "rsus_*_{year}*.nc", ), - ), - "temperature": dict( - tasks_func=tasks_yearly_cordex, - prepare_func=prepare_data_cordex, - oldname="tas", - newname="temperature", - template=os.path.join( - config.cordex_dir, # noqa: F821 + }, + "temperature": { + "tasks_func": tasks_yearly_cordex, + "prepare_func": prepare_data_cordex, + "oldname": "tas", + "newname": "temperature", + "template": os.path.join( # noqa: PTH118 + config.cordex_dir, "{model}", "temperature", "tas_*_{year}*.nc", ), - ), - "humidity": dict( - tasks_func=tasks_yearly_cordex, - prepare_func=prepare_data_cordex, - oldname="hurs", - newname="humidity", - template=os.path.join( - config.cordex_dir, # noqa: F821 + }, + "humidity": { + "tasks_func": tasks_yearly_cordex, + "prepare_func": prepare_data_cordex, + "oldname": "hurs", + "newname": "humidity", + "template": os.path.join( # noqa: PTH118 + config.cordex_dir, "{model}", "humidity", "hurs_*_{year}*.nc", ), - ), - "wnd10m": dict( - tasks_func=tasks_yearly_cordex, - prepare_func=prepare_data_cordex, - oldname="sfcWind", - newname="wnd10m", - template=os.path.join( - config.cordex_dir, # noqa: F821 + }, + "wnd10m": { + "tasks_func": tasks_yearly_cordex, + "prepare_func": prepare_data_cordex, + "oldname": "sfcWind", + "newname": "wnd10m", + "template": os.path.join( # noqa: PTH118 + config.cordex_dir, "{model}", "wind", "sfcWind_*_{year}*.nc", ), - ), - "roughness": dict( - tasks_func=tasks_yearly_cordex, - prepare_func=prepare_static_data_cordex, - oldname="rlst", - newname="roughness", - template=os.path.join( - config.cordex_dir, # noqa: F821 + }, + "roughness": { + "tasks_func": tasks_yearly_cordex, + "prepare_func": prepare_static_data_cordex, + "oldname": "rlst", + "newname": "roughness", + "template": os.path.join( # noqa: PTH118 + config.cordex_dir, "{model}", "roughness", "rlst_*.nc", ), - ), - "runoff": dict( - tasks_func=tasks_yearly_cordex, - prepare_func=prepare_data_cordex, - oldname="mrro", - newname="runoff", - template=os.path.join( - config.cordex_dir, # noqa: F821 + }, + "runoff": { + "tasks_func": tasks_yearly_cordex, + "prepare_func": prepare_data_cordex, + "oldname": "mrro", + "newname": "runoff", + "template": os.path.join( # noqa: PTH118 + config.cordex_dir, "{model}", "runoff", "mrro_*_{year}*.nc", ), - ), - "height": dict( - tasks_func=tasks_yearly_cordex, - prepare_func=prepare_static_data_cordex, - oldname="orog", - newname="height", - template=os.path.join( - config.cordex_dir, # noqa: F821 + }, + "height": { + "tasks_func": tasks_yearly_cordex, + "prepare_func": prepare_static_data_cordex, + "oldname": "orog", + "newname": "height", + "template": os.path.join( # noqa: PTH118 + config.cordex_dir, "{model}", "altitude", "orog_*.nc", ), - ), - "CWT": dict( - tasks_func=tasks_yearly_cordex, - prepare_func=prepare_weather_types_cordex, - oldname="CWT", - newname="CWT", - template=os.path.join( - config.cordex_dir, # noqa: F821 + }, + "CWT": { + "tasks_func": tasks_yearly_cordex, + "prepare_func": prepare_weather_types_cordex, + "oldname": "CWT", + "newname": "CWT", + "template": os.path.join( # noqa: PTH118 + config.cordex_dir, "{model}", "weather_types", "CWT_*_{year}*.nc", ), - ), + }, } except ImportError: pass meta_data_config: dict[str, Any] = {} try: - from atlite import config # type: ignore[attr-defined] # noqa: F401 + from atlite import config # type: ignore[attr-defined] - meta_data_config = dict( - prepare_func=prepare_meta_cordex, - template=os.path.join( - config.cordex_dir, # noqa: F821 + meta_data_config = { + "prepare_func": prepare_meta_cordex, + "template": os.path.join( # noqa: PTH118 + config.cordex_dir, "{model}", "temperature", "tas_*_{year}*.nc", ), - height_config=weather_data_config["height"], - ) + "height_config": weather_data_config["height"], + } except (ImportError, KeyError): pass diff --git a/atlite/datasets/era5.py b/atlite/datasets/era5.py index 4dde6800..ccbfc0fd 100644 --- a/atlite/datasets/era5.py +++ b/atlite/datasets/era5.py @@ -14,9 +14,9 @@ import os import warnings import weakref -from collections.abc import Callable +from pathlib import Path from tempfile import mkstemp -from typing import Any, Literal +from typing import TYPE_CHECKING, Any, Literal import cdsapi import numpy as np @@ -24,13 +24,18 @@ import xarray as xr from dask import compute, delayed from dask.array import arctan2, sqrt -from dask.utils import SerializableLock from numpy import atleast_1d -from atlite._types import ERA5RetrievalParams, PathLike from atlite.gis import maybe_swap_spatial_dims from atlite.pv.solar_position import SolarPosition +if TYPE_CHECKING: + from collections.abc import Callable + + from dask.utils import SerializableLock + + from atlite._types import ERA5RetrievalParams, PathLike + # Null context for running a with statements wihout any context try: from contextlib import nullcontext @@ -72,8 +77,7 @@ def _add_height(ds: xr.Dataset) -> xr.Dataset: if "time" in z.coords: z = z.isel(time=0, drop=True) ds["height"] = z / g0 - ds = ds.drop_vars("z") - return ds + return ds.drop_vars("z") # type: ignore[no-any-return] def _rename_and_clean_coords(ds: xr.Dataset, add_lon_lat: bool = True) -> xr.Dataset: @@ -84,8 +88,7 @@ def _rename_and_clean_coords(ds: xr.Dataset, add_lon_lat: bool = True) -> xr.Dat ds = maybe_swap_spatial_dims(ds) # type: ignore[assignment] if add_lon_lat: ds = ds.assign_coords(lon=ds.coords["x"], lat=ds.coords["y"]) - ds = ds.drop_vars(["expver", "number"], errors="ignore") - return ds # type: ignore[return-value] + return ds.drop_vars(["expver", "number"], errors="ignore") # type: ignore[no-any-return] def get_data_wind(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: @@ -113,9 +116,7 @@ def get_data_wind(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: ds["wnd_azimuth"] = azimuth.where(azimuth >= 0, azimuth + 2 * np.pi) ds = ds.drop_vars(["u100", "v100", "u10", "v10", "wnd10m"]) - ds = ds.rename({"fsr": "roughness"}) - - return ds # type: ignore[no-any-return] + return ds.rename({"fsr": "roughness"}) # type: ignore[no-any-return] def sanitize_wind(ds: xr.Dataset) -> xr.Dataset: @@ -157,9 +158,7 @@ def get_data_influx(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: sp = SolarPosition(ds, time_shift=time_shift) sp = sp.rename({v: f"solar_{v}" for v in sp.data_vars}) - ds = xr.merge([ds, sp]) - - return ds # type: ignore[no-any-return] + return xr.merge([ds, sp]) # type: ignore[no-any-return] def sanitize_influx(ds: xr.Dataset) -> xr.Dataset: @@ -179,7 +178,7 @@ def get_data_temperature(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: ) ds = _rename_and_clean_coords(ds) - ds = ds.rename( + return ds.rename( # type: ignore[no-any-return] { "t2m": "temperature", "stl4": "soil temperature", @@ -187,16 +186,12 @@ def get_data_temperature(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: } ) - return ds # type: ignore[no-any-return] - def get_data_runoff(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: ds = retrieve_data(variable=["runoff"], **retrieval_params) ds = _rename_and_clean_coords(ds) - ds = ds.rename({"ro": "runoff"}) - - return ds # type: ignore[no-any-return] + return ds.rename({"ro": "runoff"}) # type: ignore[no-any-return] def sanitize_runoff(ds: xr.Dataset) -> xr.Dataset: @@ -208,9 +203,7 @@ def get_data_height(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: ds = retrieve_data(variable="geopotential", **retrieval_params) ds = _rename_and_clean_coords(ds) - ds = _add_height(ds) - - return ds + return _add_height(ds) def _area(coords: dict[str, xr.DataArray]) -> list[float]: @@ -257,20 +250,24 @@ def retrieval_times( def noisy_unlink(path: PathLike) -> None: - logger.debug(f"Deleting file {path}") + logger.debug("Deleting file %s", path) try: - os.unlink(path) + Path(path).unlink() except PermissionError: - logger.error(f"Unable to delete file {path}, as it is still in use.") + logger.error("Unable to delete file %s, as it is still in use.", path) def add_finalizer(ds: xr.Dataset, target: PathLike) -> None: - logger.debug(f"Adding finalizer for {target}") + logger.debug("Adding finalizer for %s", target) weakref.finalize(ds._close.__self__.ds, noisy_unlink, target) # type: ignore[union-attr] def sanitize_chunks(chunks: Any, **dim_mapping: str) -> Any: - dim_mapping = dict(time="valid_time", x="longitude", y="latitude") | dim_mapping + dim_mapping = { + "time": "valid_time", + "x": "longitude", + "y": "latitude", + } | dim_mapping if not isinstance(chunks, dict): return chunks @@ -347,10 +344,10 @@ def retrieve_data( "Need to specify at least 'variable', 'year' and 'month'" ) - logger.debug(f"Requesting {product} with API request: {request}") + logger.debug("Requesting %s with API request: %s", product, request) client = cdsapi.Client( - info_callback=logger.debug, debug=logging.DEBUG >= logging.root.level + info_callback=logger.debug, debug=logging.root.level <= logging.DEBUG ) result = client.retrieve(product, request) @@ -365,7 +362,7 @@ def retrieve_data( timestr = f"{request['year']}-{request['month']}" variables = atleast_1d(request["variable"]) varstr = "\n\t".join([f"{v} ({timestr})" for v in variables]) - logger.info(f"CDS: Downloading variables\n\t{varstr}\n") + logger.info("CDS: Downloading variables\n\t%s\n", varstr) result.download(target) if request["data_format"] == "grib": @@ -409,7 +406,7 @@ def get_data( f"sanitize_{feature}" ) - logger.info(f"Requesting data for feature {feature}...") + logger.info("Requesting data for feature %s...", feature) def retrieve_once(time: dict[str, Any]) -> xr.Dataset: ds = func({**retrieval_params, **time}) # type: ignore[misc, typeddict-item] diff --git a/atlite/datasets/gebco.py b/atlite/datasets/gebco.py index 154ef62d..25f4eb66 100755 --- a/atlite/datasets/gebco.py +++ b/atlite/datasets/gebco.py @@ -10,14 +10,15 @@ from __future__ import annotations import logging -from typing import Any +from typing import TYPE_CHECKING, Any import rasterio as rio import xarray as xr from pandas import to_numeric from rasterio.warp import Resampling -from atlite._types import PathLike +if TYPE_CHECKING: + from atlite._types import PathLike logger = logging.getLogger(__name__) diff --git a/atlite/datasets/ncep.py b/atlite/datasets/ncep.py index 8793ce4e..914da2c2 100644 --- a/atlite/datasets/ncep.py +++ b/atlite/datasets/ncep.py @@ -16,14 +16,16 @@ import glob import os -from collections.abc import Generator -from typing import Any +from typing import TYPE_CHECKING, Any import numpy as np import pandas as pd import xarray as xr -from atlite._types import PathLike +if TYPE_CHECKING: + from collections.abc import Generator + + from atlite._types import PathLike engine: str = "pynio" crs: int = 4326 @@ -57,8 +59,7 @@ def convert_lons_lats_ncep( ds = ds.sel(lon_0=xs) ds = ds.rename({"lon_0": "x", "lat_0": "y"}) - ds = ds.assign_coords(lon=ds.coords["x"], lat=ds.coords["y"]) - return ds + return ds.assign_coords(lon=ds.coords["x"], lat=ds.coords["y"]) # type: ignore[no-any-return] def convert_time_hourly_ncep(ds: xr.Dataset, drop_time_vars: bool = True) -> xr.Dataset: @@ -247,7 +248,7 @@ def prepare_meta_ncep( module: Any, engine: str = engine, ) -> xr.Dataset: - fn = next(glob.iglob(template.format(year=year, month=month))) + fn = next(glob.iglob(template.format(year=year, month=month))) # noqa: PTH207 with xr.open_dataset(fn, engine=engine) as ds: ds = ds.coords.to_dataset() ds = convert_lons_lats_ncep(ds, xs, ys) @@ -279,14 +280,14 @@ def tasks_monthly_ncep( meta_attrs: dict[str, Any], ) -> list[dict[str, Any]]: return [ - dict( - prepare_func=prepare_func, - xs=xs, - ys=ys, - fn=next(glob.iglob(template.format(year=ym[0], month=ym[1]))), - engine=engine, - yearmonth=ym, - ) + { + "prepare_func": prepare_func, + "xs": xs, + "ys": ys, + "fn": next(glob.iglob(template.format(year=ym[0], month=ym[1]))), # noqa: PTH207 + "engine": engine, + "yearmonth": ym, + } for ym in yearmonths ] @@ -306,7 +307,7 @@ def tasks_height_ncep( xs=xs, ys=ys, yearmonths=yearmonths, - fn=next(glob.iglob(template)), + fn=next(glob.iglob(template)), # noqa: PTH207 **extra_args, ) ] @@ -314,88 +315,88 @@ def tasks_height_ncep( weather_data_config: dict[str, dict[str, Any]] = {} try: - from atlite import config # type: ignore[attr-defined] # noqa: F401 + from atlite import config # type: ignore[attr-defined] weather_data_config = { - "influx": dict( - tasks_func=tasks_monthly_ncep, - prepare_func=prepare_influx_ncep, - template=os.path.join( - config.ncep_dir, # noqa: F821 + "influx": { + "tasks_func": tasks_monthly_ncep, + "prepare_func": prepare_influx_ncep, + "template": os.path.join( # noqa: PTH118 + config.ncep_dir, "{year}{month:0>2}/dswsfc.*.grb2", ), - ), - "outflux": dict( - tasks_func=tasks_monthly_ncep, - prepare_func=prepare_outflux_ncep, - template=os.path.join( - config.ncep_dir, # noqa: F821 + }, + "outflux": { + "tasks_func": tasks_monthly_ncep, + "prepare_func": prepare_outflux_ncep, + "template": os.path.join( # noqa: PTH118 + config.ncep_dir, "{year}{month:0>2}/uswsfc.*.grb2", ), - ), - "temperature": dict( - tasks_func=tasks_monthly_ncep, - prepare_func=prepare_temperature_ncep, - template=os.path.join( - config.ncep_dir, # noqa: F821 + }, + "temperature": { + "tasks_func": tasks_monthly_ncep, + "prepare_func": prepare_temperature_ncep, + "template": os.path.join( # noqa: PTH118 + config.ncep_dir, "{year}{month:0>2}/tmp2m.*.grb2", ), - ), - "soil temperature": dict( - tasks_func=tasks_monthly_ncep, - prepare_func=prepare_soil_temperature_ncep, - template=os.path.join( - config.ncep_dir, # noqa: F821 + }, + "soil temperature": { + "tasks_func": tasks_monthly_ncep, + "prepare_func": prepare_soil_temperature_ncep, + "template": os.path.join( # noqa: PTH118 + config.ncep_dir, "{year}{month:0>2}/soilt1.*.grb2", ), - ), - "wnd10m": dict( - tasks_func=tasks_monthly_ncep, - prepare_func=prepare_wnd10m_ncep, - template=os.path.join( - config.ncep_dir, # noqa: F821 + }, + "wnd10m": { + "tasks_func": tasks_monthly_ncep, + "prepare_func": prepare_wnd10m_ncep, + "template": os.path.join( # noqa: PTH118 + config.ncep_dir, "{year}{month:0>2}/wnd10m.*.grb2", ), - ), - "runoff": dict( - tasks_func=tasks_monthly_ncep, - prepare_func=prepare_runoff_ncep, - template=os.path.join( - config.ncep_dir, # noqa: F821 + }, + "runoff": { + "tasks_func": tasks_monthly_ncep, + "prepare_func": prepare_runoff_ncep, + "template": os.path.join( # noqa: PTH118 + config.ncep_dir, "{year}{month:0>2}/runoff.*.grb2", ), - ), - "roughness": dict( - tasks_func=tasks_monthly_ncep, - prepare_func=prepare_roughness_ncep, - template=os.path.join( - config.ncep_dir, # noqa: F821 + }, + "roughness": { + "tasks_func": tasks_monthly_ncep, + "prepare_func": prepare_roughness_ncep, + "template": os.path.join( # noqa: PTH118 + config.ncep_dir, "{year}{month:0>2}/flxf.gdas.*.grb2", ), - ), - "height": dict( - tasks_func=tasks_height_ncep, - prepare_func=prepare_height_ncep, - template=os.path.join( - config.ncep_dir, # noqa: F821 + }, + "height": { + "tasks_func": tasks_height_ncep, + "prepare_func": prepare_height_ncep, + "template": os.path.join( # noqa: PTH118 + config.ncep_dir, "height/cdas1.20130101.splgrbanl.grb2", ), - ), + }, } except ImportError: pass meta_data_config: dict[str, Any] = {} try: - from atlite import config # type: ignore[attr-defined] # noqa: F401 + from atlite import config # type: ignore[attr-defined] - meta_data_config = dict( - prepare_func=prepare_meta_ncep, - template=os.path.join( - config.ncep_dir, # noqa: F821 + meta_data_config = { + "prepare_func": prepare_meta_ncep, + "template": os.path.join( # noqa: PTH118 + config.ncep_dir, "{year}{month:0>2}/tmp2m.*.grb2", ), - height_config=weather_data_config["height"], - ) + "height_config": weather_data_config["height"], + } except (ImportError, KeyError): pass diff --git a/atlite/datasets/sarah.py b/atlite/datasets/sarah.py index 14376b87..4d1ba1ab 100644 --- a/atlite/datasets/sarah.py +++ b/atlite/datasets/sarah.py @@ -8,22 +8,23 @@ from __future__ import annotations -import glob import logging -import os import warnings from functools import partial -from typing import Any +from pathlib import Path +from typing import TYPE_CHECKING, Any import numpy as np import pandas as pd import xarray as xr from rasterio.warp import Resampling -from atlite._types import PathLike from atlite.gis import regrid from atlite.pv.solar_position import SolarPosition +if TYPE_CHECKING: + from atlite._types import PathLike + logger = logging.getLogger(__name__) @@ -45,8 +46,8 @@ def get_filenames(sarah_dir: str | PathLike, coords: dict[str, Any]) -> pd.DataFrame: def _filenames_starting_with(name: str) -> pd.Series[str]: - pattern = os.path.join(sarah_dir, "**", f"{name}*.nc") - files = pd.Series(glob.glob(pattern, recursive=True)) + pattern = str(Path(sarah_dir) / "**" / f"{name}*.nc") + files = pd.Series([str(f) for f in Path(sarah_dir).rglob(f"{name}*.nc")]) assert not files.empty, ( f"No files found at {pattern}. Make sure " f"sarah_dir points to the correct directory!" @@ -56,7 +57,10 @@ def _filenames_starting_with(name: str) -> pd.Series[str]: return files.sort_index() files = pd.concat( - dict(sis=_filenames_starting_with("SIS"), sid=_filenames_starting_with("SID")), + { + "sis": _filenames_starting_with("SIS"), + "sid": _filenames_starting_with("SID"), + }, join="inner", axis=1, ) @@ -66,8 +70,10 @@ def _filenames_starting_with(name: str) -> pd.Series[str]: if (start < files.index[0]) or (end > files.index[-1]): logger.error( - f"Files in {sarah_dir} do not cover the whole time span:" - f"\t{start} until {end}" + "Files in %s do not cover the whole time span:\t%s until %s", + sarah_dir, + start, + end, ) return files.loc[(files.index >= start) & (files.index <= end)].sort_index() @@ -147,7 +153,7 @@ def get_data( creation_parameters.setdefault("sarah_interpolate", True) files = get_filenames(sarah_dir, coords) - open_kwargs = dict(chunks=chunks, parallel=creation_parameters["parallel"]) + open_kwargs = {"chunks": chunks, "parallel": creation_parameters["parallel"]} ds_sis = xr.open_mfdataset(files.sis, combine="by_coords", **open_kwargs)[["SIS"]] ds_sid = xr.open_mfdataset(files.sid, combine="by_coords", **open_kwargs)[["SID"]] ds = xr.merge([ds_sis, ds_sid]) @@ -156,18 +162,15 @@ def get_data( lon=ds.lon.astype(float).round(4), lat=ds.lat.astype(float).round(4) ) - if creation_parameters["sarah_interpolate"]: - ds = interpolate(ds) - else: - ds = ds.fillna(0) + ds = interpolate(ds) if creation_parameters["sarah_interpolate"] else ds.fillna(0) if cutout.dt not in ["30min", "30T"]: - ds = hourly_mean(ds) + ds = hourly_mean(ds) # type: ignore[arg-type] if (cutout.dx != dx) or (cutout.dy != dy): ds = regrid(ds, coords["lon"], coords["lat"], resampling=Resampling.average) - dif_attrs = dict(long_name="Surface Diffuse Shortwave Flux", units="W m-2") + dif_attrs = {"long_name": "Surface Diffuse Shortwave Flux", "units": "W m-2"} ds["influx_diffuse"] = (ds["SIS"] - ds["SID"]).assign_attrs(**dif_attrs) ds = ds.rename({"SID": "influx_direct"}).drop_vars("SIS") ds = ds.assign_coords(x=ds.coords["lon"], y=ds.coords["lat"]) @@ -179,6 +182,4 @@ def get_data( sp = SolarPosition(ds, time_shift="0H") sp = sp.rename({v: f"solar_{v}" for v in sp.data_vars}) - ds = xr.merge([ds, sp]) - - return ds # type: ignore[no-any-return] + return xr.merge([ds, sp]) # type: ignore[no-any-return] diff --git a/atlite/gis.py b/atlite/gis.py index 9092d983..a2cdb06c 100644 --- a/atlite/gis.py +++ b/atlite/gis.py @@ -10,7 +10,6 @@ import logging import multiprocessing as mp from collections import OrderedDict -from collections.abc import Callable, Iterable, Sequence from pathlib import Path from typing import TYPE_CHECKING, Any, cast from warnings import catch_warnings, simplefilter @@ -34,20 +33,22 @@ from shapely.strtree import STRtree from tqdm import tqdm -from atlite._types import ( - CrsLike, - DataArray, - Dataset, - GeoDataFrame, - Geometry, - GeoSeries, - NDArray, - PathLike, -) - if TYPE_CHECKING: + from collections.abc import Callable, Iterable, Sequence + from matplotlib.axes import Axes + from atlite._types import ( + CrsLike, + DataArray, + Dataset, + GeoDataFrame, + Geometry, + GeoSeries, + NDArray, + PathLike, + ) + logger = logging.getLogger(__name__) @@ -97,7 +98,7 @@ def get_coords( ) ds = ds.assign_coords(lon=ds.coords["x"], lat=ds.coords["y"]) ds = ds.sel(x=x, y=y, time=time) - return cast(Dataset, ds) + return cast("Dataset", ds) def spdiag(v: NDArray | Sequence[float]) -> sp.sparse.csr_matrix: @@ -148,10 +149,9 @@ def _reproject_shape(shape: Geometry) -> Geometry: if isinstance(shapes, pd.Series): return shapes.map(_reproject_shape) - elif isinstance(shapes, dict): + if isinstance(shapes, dict): return OrderedDict((k, _reproject_shape(v)) for k, v in shapes.items()) - else: - return list(map(_reproject_shape, shapes)) + return list(map(_reproject_shape, shapes)) def compute_indicatormatrix( @@ -194,7 +194,7 @@ def compute_indicatormatrix( ) indicator = sp.sparse.lil_matrix((len(dest_list), len(orig_list)), dtype=float) tree = STRtree(orig_list) - idx = dict((hash(o.wkt), i) for i, o in enumerate(orig_list)) + idx = {hash(o.wkt): i for i, o in enumerate(orig_list)} for i, d in enumerate(dest_list): for o in tree.query(d): @@ -245,7 +245,7 @@ def compute_intersectionmatrix( ) intersection = sp.sparse.lil_matrix((len(dest_list), len(orig_list)), dtype=float) tree = STRtree(orig_list) - idx = dict((hash(o.wkt), i) for i, o in enumerate(orig_list)) + idx = {hash(o.wkt): i for i, o in enumerate(orig_list)} for i, d in enumerate(dest_list): for o in tree.query(d): @@ -573,15 +573,15 @@ def add_raster( CRS of the raster. Specify this if the raster has invalid crs. """ - d: dict[str, Any] = dict( - raster=raster, - codes=codes, - buffer=buffer, - invert=invert, - nodata=nodata, - allow_no_overlap=allow_no_overlap, - crs=crs, - ) + d: dict[str, Any] = { + "raster": raster, + "codes": codes, + "buffer": buffer, + "invert": invert, + "nodata": nodata, + "allow_no_overlap": allow_no_overlap, + "crs": crs, + } self.rasters.append(d) def add_geometry( @@ -605,7 +605,7 @@ def add_geometry( of the geometries. The default is False. """ - d: dict[str, Any] = dict(geometry=geometry, buffer=buffer, invert=invert) + d: dict[str, Any] = {"geometry": geometry, "buffer": buffer, "invert": invert} self.geometries.append(d) def open_files(self) -> None: @@ -724,8 +724,7 @@ def compute_shape_availability( return shape_availability_reprojected( geometry, self, dst_transform, dst_crs, dst_shape ) - else: - return shape_availability(geometry, self) + return shape_availability(geometry, self) def plot_shape_availability( self, @@ -889,12 +888,12 @@ def compute_availabilitymatrix( shapes = shapes.to_crs(excluder.crs) args = (excluder, cutout.transform_r, cutout.crs, cutout.shape) - tqdm_kwargs = dict( - ascii=False, - unit=" gridcells", - total=len(shapes), - desc="Compute availability matrix", - ) + tqdm_kwargs = { + "ascii": False, + "unit": " gridcells", + "total": len(shapes), + "desc": "Compute availability matrix", + } if nprocesses is None: if not disable_progressbar: @@ -1028,14 +1027,14 @@ def _reproject(src: NDArray, **kwargs: Any) -> NDArray: if reprojected.ndim != src.ndim: reprojected = reprojected.squeeze(axis=0) - return cast(NDArray, reprojected) + return cast("NDArray", reprojected) data_vars = ds.data_vars.values() if isinstance(ds, xr.Dataset) else (ds,) dtypes = {da.dtype for da in data_vars} assert len(dtypes) == 1, "regrid can only reproject datasets with homogeneous dtype" return cast( - Dataset | DataArray, + "Dataset | DataArray", ( xr.apply_ufunc( _reproject, @@ -1043,9 +1042,9 @@ def _reproject(src: NDArray, **kwargs: Any) -> NDArray: input_core_dims=[[namey, namex]], output_core_dims=[["yout", "xout"]], output_dtypes=[dtypes.pop()], - dask_gufunc_kwargs=dict( - output_sizes={"yout": dst_shape[0], "xout": dst_shape[1]} - ), + dask_gufunc_kwargs={ + "output_sizes": {"yout": dst_shape[0], "xout": dst_shape[1]} + }, dask="parallelized", kwargs=kwargs, ) diff --git a/atlite/hydro.py b/atlite/hydro.py index 5fe838d6..e1f9818e 100644 --- a/atlite/hydro.py +++ b/atlite/hydro.py @@ -47,8 +47,10 @@ def find_basin( hids = shapes.index[shapes.intersects(Point(lon, lat))] if len(hids) > 1: logger.warning( - f"The point ({lon}, {lat}) is in several basins: {hids}. " - "Assuming the first one." + "The point (%s, %s) is in several basins: %s. Assuming the first one.", + lon, + lat, + hids, ) return int(hids[0]) @@ -175,7 +177,7 @@ def shift_and_aggregate_runoff_for_plants( disable=not show_progress, desc="Shift and aggregate runoff by plant", ): - inflow_plant: xr.DataArray = inflow.loc[dict(plant=ppl.Index)] + inflow_plant: xr.DataArray = inflow.loc[{"plant": ppl.Index}] distances: pd.Series = ( basins.meta.loc[ppl.upstream, "DIST_MAIN"] - basins.meta.at[ppl.hid, "DIST_MAIN"] diff --git a/atlite/pv/__init__.py b/atlite/pv/__init__.py index a533f7f9..e05416cb 100644 --- a/atlite/pv/__init__.py +++ b/atlite/pv/__init__.py @@ -5,8 +5,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING - from atlite.pv.irradiation import ( DiffuseHorizontalIrrad, TiltedDiffuseIrrad, @@ -24,9 +22,6 @@ from atlite.pv.solar_panel_model import SolarPanelModel from atlite.pv.solar_position import SolarPosition -if TYPE_CHECKING: - pass - __all__: list[str] = [ "DiffuseHorizontalIrrad", "TiltedDirectIrrad", diff --git a/atlite/pv/irradiation.py b/atlite/pv/irradiation.py index b89d844f..d9e13fb0 100644 --- a/atlite/pv/irradiation.py +++ b/atlite/pv/irradiation.py @@ -5,11 +5,13 @@ from __future__ import annotations import logging +from typing import TYPE_CHECKING import numpy as np from dask.array import cos, fmax, fmin, radians, sin, sqrt -from atlite._types import DataArray, Dataset +if TYPE_CHECKING: + from atlite._types import DataArray, Dataset logger = logging.getLogger(__name__) @@ -132,11 +134,11 @@ def TiltedDiffuseIrrad( + A * R_b ) * diffuse - if logger.isEnabledFor(logging.WARNING): - if ((diffuse_t < 0.0) & (sinaltitude > sin(radians(1.0)))).any(): - logger.warning( - "diffuse_t exhibits negative values above altitude threshold." - ) + if ( + logger.isEnabledFor(logging.WARNING) + and ((diffuse_t < 0.0) & (sinaltitude > sin(radians(1.0)))).any() + ): + logger.warning("diffuse_t exhibits negative values above altitude threshold.") with np.errstate(invalid="ignore"): diffuse_t = diffuse_t.clip(min=0).fillna(0) @@ -190,13 +192,12 @@ def _albedo(ds: Dataset, influx: DataArray) -> DataArray: """ if "albedo" in ds: return ds["albedo"] - elif "outflux" in ds: + if "outflux" in ds: return (ds["outflux"] / influx.where(influx != 0)).fillna(0).clip(max=1) # type: ignore[no-any-return] - else: - raise AssertionError( - "Need either albedo or outflux as a variable in the dataset. " - "Check your cutout and dataset module." - ) + raise AssertionError( + "Need either albedo or outflux as a variable in the dataset. " + "Check your cutout and dataset module." + ) def TiltedGroundIrrad( diff --git a/atlite/pv/orientation.py b/atlite/pv/orientation.py index 0a746e07..d2544105 100644 --- a/atlite/pv/orientation.py +++ b/atlite/pv/orientation.py @@ -5,15 +5,17 @@ from __future__ import annotations import sys -from collections.abc import Callable -from typing import Any +from typing import TYPE_CHECKING, Any import numpy as np import xarray as xr from dask.array import arccos, arcsin, arctan, cos, logical_and, radians, sin from numpy import pi -from atlite._types import Dataset, NumericArray +if TYPE_CHECKING: + from collections.abc import Callable + + from atlite._types import Dataset, NumericArray def get_orientation( @@ -101,10 +103,10 @@ def latitude_optimal( slope[~below_50] = np.radians(40.0) azimuth = np.where(lat.values < 0, 0, pi) # type: ignore[union-attr] - return dict( - slope=xr.DataArray(slope, coords=lat.coords), # type: ignore[union-attr] - azimuth=xr.DataArray(azimuth, coords=lat.coords), # type: ignore[union-attr] - ) + return { + "slope": xr.DataArray(slope, coords=lat.coords), # type: ignore[union-attr] + "azimuth": xr.DataArray(azimuth, coords=lat.coords), # type: ignore[union-attr] + } return latitude_optimal @@ -150,7 +152,7 @@ def constant( dict Mapping with constant ``slope`` and ``azimuth``. """ - return dict(slope=slope_rad, azimuth=azimuth_rad) + return {"slope": slope_rad, "azimuth": azimuth_rad} return constant @@ -193,7 +195,7 @@ def latitude( dict Mapping with latitude-based ``slope`` and constant ``azimuth``. """ - return dict(slope=lat, azimuth=azimuth_rad) + return {"slope": lat, "azimuth": azimuth_rad} return latitude diff --git a/atlite/pv/solar_panel_model.py b/atlite/pv/solar_panel_model.py index 7f8e82a9..8eb6dec0 100644 --- a/atlite/pv/solar_panel_model.py +++ b/atlite/pv/solar_panel_model.py @@ -4,12 +4,14 @@ from __future__ import annotations -from typing import Any, Literal +from typing import TYPE_CHECKING, Any, Literal import numpy as np -import xarray as xr -from atlite._types import DataArray +if TYPE_CHECKING: + import xarray as xr + + from atlite._types import DataArray def _power_huld( @@ -44,9 +46,7 @@ def _power_huld( da = G_ * eff * pc.get("inverter_efficiency", 1.0) da.attrs["units"] = "kWh/kWp" - da = da.rename("specific generation") - - return da # type: ignore[no-any-return] + return da.rename("specific generation") # type: ignore[no-any-return] def _power_bofinger( @@ -105,7 +105,6 @@ def SolarPanelModel( if model == "huld": return _power_huld(irradiance, ds["temperature"], pc) - elif model == "bofinger": + if model == "bofinger": return _power_bofinger(irradiance, ds["temperature"], pc) - else: - raise AssertionError(f"Unknown panel model: {model}") + raise AssertionError(f"Unknown panel model: {model}") diff --git a/atlite/pv/solar_position.py b/atlite/pv/solar_position.py index 8fc82c3f..07e87cb6 100644 --- a/atlite/pv/solar_position.py +++ b/atlite/pv/solar_position.py @@ -4,6 +4,7 @@ from __future__ import annotations +from typing import TYPE_CHECKING from warnings import warn import pandas as pd @@ -11,7 +12,8 @@ from dask.array import arccos, arcsin, arctan2, cos, radians, sin from numpy import pi -from atlite._types import Dataset +if TYPE_CHECKING: + from atlite._types import Dataset def SolarPosition(ds: Dataset, time_shift: str | pd.Timedelta = "0H") -> Dataset: @@ -71,6 +73,7 @@ def SolarPosition(ds: Dataset, time_shift: str | pd.Timedelta = "0H") -> Dataset "include the solar position variables into your cutout." ), DeprecationWarning, + stacklevel=2, ) # up to h and dec from [1] @@ -123,6 +126,4 @@ def SolarPosition(ds: Dataset, time_shift: str | pd.Timedelta = "0H") -> Dataset az.attrs["units"] = "rad" vars = {da.name: da for da in [alt, az]} - solar_position = xr.Dataset(vars) - - return solar_position # type: ignore[no-any-return] + return xr.Dataset(vars) diff --git a/atlite/resource.py b/atlite/resource.py index d135bb07..ca309e39 100644 --- a/atlite/resource.py +++ b/atlite/resource.py @@ -22,7 +22,6 @@ from dask.array import radians from scipy.signal import fftconvolve -from atlite._types import DataArray, NDArray, PathLike from atlite.utils import arrowdict logger = logging.getLogger(name=__name__) @@ -37,6 +36,8 @@ from typing_extensions import NotRequired + from atlite._types import DataArray, NDArray, PathLike + class TurbineConfig(TypedDict): V: NDArray POW: NDArray @@ -100,7 +101,9 @@ def get_windturbineconfig( ) if isinstance(turbine, str) and turbine.startswith("oedb:"): - conf = cast(dict[str, Any], get_oedb_windturbineconfig(turbine[len("oedb:") :])) + conf = cast( + "dict[str, Any]", get_oedb_windturbineconfig(turbine[len("oedb:") :]) + ) elif isinstance(turbine, (str, Path)): if isinstance(turbine, str): @@ -109,20 +112,20 @@ def get_windturbineconfig( elif isinstance(turbine, Path): turbine_path = turbine - with open(turbine_path) as f: + with Path(turbine_path).open() as f: conf = yaml.safe_load(f) - conf = dict( - V=np.array(conf["V"]), - POW=np.array(conf["POW"]), - hub_height=conf["HUB_HEIGHT"], - P=np.max(conf["POW"]), - ) + conf = { + "V": np.array(conf["V"]), + "POW": np.array(conf["POW"]), + "hub_height": conf["HUB_HEIGHT"], + "P": np.max(conf["POW"]), + } elif isinstance(turbine, dict): conf = turbine return _validate_turbine_config_dict( - cast(dict[str, Any], conf), add_cutout_windspeed + cast("dict[str, Any]", conf), add_cutout_windspeed ) @@ -152,10 +155,8 @@ def get_solarpanelconfig(panel: str | PathLike) -> PanelConfig: elif isinstance(panel, Path): panel_path = panel - with open(panel_path) as f: - conf = cast(PanelConfig, yaml.safe_load(f)) - - return conf + with Path(panel_path).open() as f: + return cast("PanelConfig", yaml.safe_load(f)) def get_cspinstallationconfig(installation: str | PathLike) -> CSPConfig: @@ -185,8 +186,8 @@ def get_cspinstallationconfig(installation: str | PathLike) -> CSPConfig: elif isinstance(installation, Path): installation_path = installation - with open(installation_path) as f: - config = cast(dict[str, Any], yaml.safe_load(f)) + with Path(installation_path).open() as f: + config = cast("dict[str, Any]", yaml.safe_load(f)) config["path"] = installation_path da = pd.DataFrame(config["efficiency"]).set_index(["altitude", "azimuth"]) @@ -208,7 +209,7 @@ def get_cspinstallationconfig(installation: str | PathLike) -> CSPConfig: config["efficiency"] = da - return cast(CSPConfig, config) + return cast("CSPConfig", config) def solarpanel_rated_capacity_per_unit(panel: str | PathLike | PanelConfig) -> float: @@ -230,12 +231,11 @@ def solarpanel_rated_capacity_per_unit(panel: str | PathLike | PanelConfig) -> f model = panel.get("model", "huld") if model == "huld": - return cast(float, panel["efficiency"]) - elif model == "bofinger": + return cast("float", panel["efficiency"]) + if model == "bofinger": A, B, C = itemgetter("A", "B", "C")(panel) - return cast(float, (A + B * 1000.0 + C * np.log(1000.0)) * 1e3) - else: - raise ValueError(f"Unknown panel model: {model}") + return cast("float", (A + B * 1000.0 + C * np.log(1000.0)) * 1e3) + raise ValueError(f"Unknown panel model: {model}") def windturbine_rated_capacity_per_unit( @@ -290,7 +290,7 @@ def windturbine_smooth( if params is None or params is True: params = {} - params = cast(dict[str, float], params) + params = cast("dict[str, float]", params) eta: float = params.get("eta", 0.95) Delta_v: float = params.get("Delta_v", 1.27) sigma: float = params.get("sigma", 2.29) @@ -316,7 +316,7 @@ def smooth(velocities: NDArray, power: NDArray) -> tuple[NDArray, NDArray]: turbine = turbine.copy() turbine["V"], turbine["POW"] = smooth(turbine["V"], turbine["POW"]) - turbine["P"] = cast(float, float(np.max(turbine["POW"]))) + turbine["P"] = cast("float", float(np.max(turbine["POW"]))) if any(turbine["POW"][np.where(turbine["V"] == 0.0)] > 1e-2): logger.warning( @@ -332,7 +332,7 @@ def smooth(velocities: NDArray, power: NDArray) -> tuple[NDArray, NDArray]: def _max_v_is_zero_pow(turbine: TurbineConfig) -> bool: return cast( - bool, bool(np.any(turbine["POW"][turbine["V"] == turbine["V"].max()] == 0)) + "bool", bool(np.any(turbine["POW"][turbine["V"] == turbine["V"].max()] == 0)) ) @@ -385,23 +385,23 @@ def _validate_turbine_config_dict( raise ValueError(err_msg) if add_cutout_windspeed is True and not _max_v_is_zero_pow( - cast(TurbineConfig, turbine) + cast("TurbineConfig", turbine) ): turbine["V"] = np.pad(turbine["V"], (0, 1), "maximum") turbine["POW"] = np.pad(turbine["POW"], (0, 1), "constant", constant_values=0) logger.info( - "adding a cut-out wind speed to the turbine power curve at " - f"V={turbine['V'][-1]} m/s." + "adding a cut-out wind speed to the turbine power curve at V=%s m/s.", + turbine["V"][-1], ) - if not _max_v_is_zero_pow(cast(TurbineConfig, turbine)): + if not _max_v_is_zero_pow(cast("TurbineConfig", turbine)): logger.warning( "The power curve does not have a cut-out wind speed, i.e. the power" " output corresponding to the\nhighest wind speed is not zero. You can" " either change the power curve manually or set\n" "'add_cutout_windspeed=True' in the Cutout.wind conversion method." ) - return cast(TurbineConfig, turbine) + return cast("TurbineConfig", turbine) def get_oedb_windturbineconfig( @@ -460,9 +460,8 @@ def get_oedb_windturbineconfig( _oedb_turbines = df[df.has_power_curve] logger.info( - "Searching turbine power curve in OEDB database using " - + ", ".join(f"{k}='{v}'" for (k, v) in search_params.items()) - + "." + "Searching turbine power curve in OEDB database using %s.", + ", ".join(f"{k}='{v}'" for (k, v) in search_params.items()), ) # Working copy @@ -487,7 +486,7 @@ def get_oedb_windturbineconfig( if len(df) < 1: raise RuntimeError("No turbine found.") - elif len(df) > 1: + if len(df) > 1: raise RuntimeError( f"Provided information corresponds to {len(df)} turbines," " use `id` for an unambiguous search.\n" diff --git a/atlite/utils.py b/atlite/utils.py index da31eadb..82c13f4c 100644 --- a/atlite/utils.py +++ b/atlite/utils.py @@ -10,7 +10,6 @@ import logging import re import textwrap -from collections.abc import Callable from pathlib import Path from typing import TYPE_CHECKING, Any, TypeAlias, cast @@ -21,6 +20,8 @@ from atlite.gis import maybe_swap_spatial_dims if TYPE_CHECKING: + from collections.abc import Callable + pass logger = logging.getLogger(__name__) @@ -73,7 +74,7 @@ def migrate_from_cutout_directory(old_cutout_dir: PathLike, path: PathLike) -> D ) raise - data = cast(Dataset, maybe_swap_spatial_dims(data)) # type: ignore[no-untyped-call] + data = cast("Dataset", maybe_swap_spatial_dims(data)) # type: ignore[no-untyped-call] module = data.attrs["module"] data.attrs["prepared_features"] = list(datamodules[module].features) for v in data: @@ -84,8 +85,10 @@ def migrate_from_cutout_directory(old_cutout_dir: PathLike, path: PathLike) -> D path = Path(path).with_suffix(".nc") logger.info( - f"Writing cutout data to {path}. When done, load it again using" - f"\n\n\tatlite.Cutout('{path}')" + "Writing cutout data to %s. When done, load it again using" + "\n\n\tatlite.Cutout('%s')", + path, + path, ) data.to_netcdf(path) return data @@ -106,7 +109,7 @@ def __getattr__(self, item: str) -> Any: try: return self.__getitem__(item) except KeyError as e: - raise AttributeError(e.args[0]) + raise AttributeError(e.args[0]) from e _re_pattern = re.compile("[a-zA-Z_][a-zA-Z0-9_]*") diff --git a/atlite/wind.py b/atlite/wind.py index 75304840..6b86f02f 100644 --- a/atlite/wind.py +++ b/atlite/wind.py @@ -9,11 +9,12 @@ import logging import re -from typing import Literal, cast +from typing import TYPE_CHECKING, Literal, cast import numpy as np -from atlite._types import DataArray, Dataset, NDArray +if TYPE_CHECKING: + from atlite._types import DataArray, Dataset, NDArray logger = logging.getLogger(__name__) @@ -92,7 +93,7 @@ def extrapolate_wind_speed( raise RuntimeError( "The logarithmic interpolation method requires surface roughness (roughness);\n" "make sure you choose a compatible dataset like ERA5" - ) + ) from None wnd_spd: DataArray = ds[from_name] * ( np.log(to_height / roughness) / np.log(from_height / roughness) ) @@ -104,7 +105,7 @@ def extrapolate_wind_speed( raise RuntimeError( "The power law interpolation method requires a wind shear exponent (wnd_shear_exp);\n" "make sure you choose a compatible dataset like ERA5 and update your cutout" - ) + ) from None wnd_spd = ds[from_name] * (to_height / from_height) ** wnd_shear_exp method_desc = "power method with wind shear exponent" else: @@ -122,4 +123,4 @@ def extrapolate_wind_speed( } ) - return cast(DataArray, wnd_spd.rename(to_name)) + return cast("DataArray", wnd_spd.rename(to_name)) diff --git a/doc/chart.py b/doc/chart.py index e03a0e0b..6812108a 100755 --- a/doc/chart.py +++ b/doc/chart.py @@ -37,16 +37,16 @@ processedstr = "\n" + "\n\n\n".join([" ◦ " + s for s in processeddata]) + "\n" # defaults for boxes and arrows -kwargs = dict(verticalalignment="center", fontsize=14, color="#545454") -arrowkwargs = dict( - head_width=0.2, - width=0.13, - head_length=0.05, - edgecolor="white", - length_includes_head=True, - color="lightgray", - alpha=1, -) +kwargs = {"verticalalignment": "center", "fontsize": 14, "color": "#545454"} +arrowkwargs = { + "head_width": 0.2, + "width": 0.13, + "head_length": 0.05, + "edgecolor": "white", + "length_includes_head": True, + "color": "lightgray", + "alpha": 1, +} y = 0.5 # First arrow @@ -61,7 +61,12 @@ y, climatestr, **kwargs, - bbox=dict(facecolor="indianred", alpha=0.5, edgecolor="None", boxstyle="round"), + bbox={ + "facecolor": "indianred", + "alpha": 0.5, + "edgecolor": "None", + "boxstyle": "round", + }, ) # Second arrow @@ -74,7 +79,12 @@ y, processedstr, **kwargs, - bbox=dict(facecolor="olivedrab", alpha=0.5, edgecolor="None", boxstyle="round"), + bbox={ + "facecolor": "olivedrab", + "alpha": 0.5, + "edgecolor": "None", + "boxstyle": "round", + }, ) diff --git a/examples/historic-comparison-germany.ipynb b/examples/historic-comparison-germany.ipynb index dda326ef..f60cc169 100644 --- a/examples/historic-comparison-germany.ipynb +++ b/examples/historic-comparison-germany.ipynb @@ -67,17 +67,17 @@ "metadata": {}, "outputs": [], "source": [ - "import os\n", "import zipfile\n", + "from pathlib import Path\n", "\n", "import requests\n", "\n", "\n", "def download_file(url, local_filename):\n", " # variant of http://stackoverflow.com/a/16696317\n", - " if not os.path.exists(local_filename):\n", + " if not Path(local_filename).exists():\n", " r = requests.get(url, stream=True)\n", - " with open(local_filename, \"wb\") as f:\n", + " with Path(local_filename).open(\"wb\") as f:\n", " for chunk in r.iter_content(chunk_size=1024):\n", " if chunk:\n", " f.write(chunk)\n", @@ -121,7 +121,7 @@ "opsd.index = opsd.index.tz_convert(None)\n", "\n", "# We are only interested in the 2012 data\n", - "opsd = opsd[(\"2011\" < opsd.index) & (opsd.index < \"2013\")]" + "opsd = opsd[(opsd.index > \"2011\") & (opsd.index < \"2013\")]" ] }, { @@ -456,7 +456,7 @@ "source": [ "compare = (\n", " pd.DataFrame(\n", - " dict(atlite=pv.squeeze().to_series(), opsd=opsd[\"DE_solar_generation_actual\"])\n", + " {\"atlite\": pv.squeeze().to_series(), \"opsd\": opsd[\"DE_solar_generation_actual\"]}\n", " )\n", " / 1e3\n", ") # in GW\n", @@ -493,9 +493,10 @@ "pv_opt = cutout.pv(panel=\"CSi\", orientation=\"latitude_optimal\", layout=solar_layout)\n", "compare_opt = (\n", " pd.DataFrame(\n", - " dict(\n", - " atlite=pv_opt.squeeze().to_series(), opsd=opsd[\"DE_solar_generation_actual\"]\n", - " )\n", + " {\n", + " \"atlite\": pv_opt.squeeze().to_series(),\n", + " \"opsd\": opsd[\"DE_solar_generation_actual\"],\n", + " }\n", " )\n", " / 1e3\n", ") # in GW\n", @@ -685,14 +686,14 @@ "outputs": [], "source": [ "turbine_categories = [\n", - " dict(name=\"Vestas_V25_200kW\", up=400.0),\n", - " dict(name=\"Vestas_V47_660kW\", up=700.0),\n", - " dict(name=\"Bonus_B1000_1000kW\", up=1100.0),\n", - " dict(name=\"Suzlon_S82_1.5_MW\", up=1600.0),\n", - " dict(name=\"Vestas_V66_1750kW\", up=1900.0),\n", - " dict(name=\"Vestas_V80_2MW_gridstreamer\", up=2200.0),\n", - " dict(name=\"Siemens_SWT_2300kW\", up=2500.0),\n", - " dict(name=\"Vestas_V90_3MW\", up=50000.0),\n", + " {\"name\": \"Vestas_V25_200kW\", \"up\": 400.0},\n", + " {\"name\": \"Vestas_V47_660kW\", \"up\": 700.0},\n", + " {\"name\": \"Bonus_B1000_1000kW\", \"up\": 1100.0},\n", + " {\"name\": \"Suzlon_S82_1.5_MW\", \"up\": 1600.0},\n", + " {\"name\": \"Vestas_V66_1750kW\", \"up\": 1900.0},\n", + " {\"name\": \"Vestas_V80_2MW_gridstreamer\", \"up\": 2200.0},\n", + " {\"name\": \"Siemens_SWT_2300kW\", \"up\": 2500.0},\n", + " {\"name\": \"Vestas_V90_3MW\", \"up\": 50000.0},\n", "]" ] }, diff --git a/examples/plotting_with_atlite.ipynb b/examples/plotting_with_atlite.ipynb index dbbd3db3..f3031235 100644 --- a/examples/plotting_with_atlite.ipynb +++ b/examples/plotting_with_atlite.ipynb @@ -263,14 +263,14 @@ "gs = GridSpec(3, 3, figure=fig)\n", "\n", "ax = fig.add_subplot(gs[:, 0:2], projection=projection)\n", - "plot_grid_dict = dict(\n", - " alpha=0.1,\n", - " edgecolor=\"k\",\n", - " zorder=4,\n", - " aspect=\"equal\",\n", - " facecolor=\"None\",\n", - " transform=plate(),\n", - ")\n", + "plot_grid_dict = {\n", + " \"alpha\": 0.1,\n", + " \"edgecolor\": \"k\",\n", + " \"zorder\": 4,\n", + " \"aspect\": \"equal\",\n", + " \"facecolor\": \"None\",\n", + " \"transform\": plate(),\n", + "}\n", "UkIr.plot(ax=ax, zorder=1, transform=plate())\n", "cells.plot(ax=ax, **plot_grid_dict)\n", "country_bound.plot(ax=ax, edgecolor=\"orange\", facecolor=\"None\", transform=plate())\n", diff --git a/examples/solarpv_tracking_options.ipynb b/examples/solarpv_tracking_options.ipynb index e31d345e..436c39a7 100644 --- a/examples/solarpv_tracking_options.ipynb +++ b/examples/solarpv_tracking_options.ipynb @@ -408,7 +408,9 @@ "source": [ "day_profiles = [ds.loc[day, point].squeeze() for ds in data]\n", "\n", - "df = pd.DataFrame({k: v.to_series() for k, v in zip(labels, day_profiles)})\n", + "df = pd.DataFrame(\n", + " {k: v.to_series() for k, v in zip(labels, day_profiles, strict=False)}\n", + ")\n", "df.plot(figsize=(10, 5))\n", "plt.title(\"PV Tracking: Portugal @(-9°, 40°), May 1, 2019\")" ] @@ -452,7 +454,7 @@ ], "source": [ "average = [ds.mean(\"dim_0\").mean().item() for ds in data]\n", - "df = pd.Series({k: v for k, v in zip(labels, average)})\n", + "df = pd.Series(dict(zip(labels, average, strict=False)))\n", "df.mul(100).plot.barh(figsize=(10, 5), zorder=2)\n", "plt.grid(axis=\"x\", zorder=1)\n", "plt.title(\"PV Tracking: Average Capacity Factor per Cell [%]\")" diff --git a/examples/working-with-csp.ipynb b/examples/working-with-csp.ipynb index aead50e8..4795e0e1 100644 --- a/examples/working-with-csp.ipynb +++ b/examples/working-with-csp.ipynb @@ -27,6 +27,8 @@ "metadata": {}, "outputs": [], "source": [ + "from pathlib import Path\n", + "\n", "import geopandas as gpd\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", @@ -521,7 +523,7 @@ "}\n", "\n", "layout = xr.zeros_like(cf)\n", - "layout.loc[dict(x=nearest_location[\"x\"], y=nearest_location[\"y\"])] = installed_power" + "layout.loc[{\"x\": nearest_location[\"x\"], \"y\": nearest_location[\"y\"]}] = installed_power" ] }, { @@ -764,7 +766,7 @@ "\n", "# installed power = 950 W/m^2 * area = 1205.0 MW\n", "installed_power = config[\"r_irradiance\"] * area / 1.0e6\n", - "layout.loc[dict(x=nearest_location[\"x\"], y=nearest_location[\"y\"])] = installed_power\n", + "layout.loc[{\"x\": nearest_location[\"x\"], \"y\": nearest_location[\"y\"]}] = installed_power\n", "\n", "# Calculate time-series for layout with both installation configurations\n", "time_series = xr.merge(\n", @@ -1286,7 +1288,7 @@ " \"efficiency\": df,\n", "}\n", "\n", - "with open(\"SAM_solar_tower.yaml\", \"w\") as f:\n", + "with Path(\"SAM_solar_tower.yaml\").open(\"w\") as f:\n", " yaml.safe_dump(config, f, default_flow_style=False, sort_keys=False)" ] } diff --git a/pyproject.toml b/pyproject.toml index f901e88b..99da4ec3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -84,15 +84,23 @@ extend-include = ['*.ipynb'] [tool.ruff.lint] select = [ - 'F', # pyflakes - 'E', # pycodestyle: Error - 'W', # pycodestyle: Warning - 'I', # isort - 'D', # pydocstyle - 'UP', # pyupgrade - 'TID', # flake8-tidy-imports - # 'NPY', # numpy - 'RUF013', # ruff + 'F', # pyflakes + 'E', # pycodestyle: Error + 'W', # pycodestyle: Warning + 'I', # isort + 'D', # pydocstyle + 'UP', # pyupgrade + 'TID', # flake8-tidy-imports + 'B', # flake8-bugbear + 'SIM', # flake8-simplify + 'RET', # flake8-return + 'C4', # flake8-comprehensions + 'TC', # flake8-type-checking + 'NPY', # numpy + 'G', # flake8-logging-format + 'PTH', # flake8-use-pathlib + 'RUF013', # ruff: implicit-optional + 'RUF100', # ruff: unused-noqa ] ignore = [ diff --git a/test/conftest.py b/test/conftest.py index 9dfb99a6..04b5533a 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -35,8 +35,7 @@ def cutouts_path(tmp_path_factory, pytestconfig): path = Path(custom_path) path.mkdir(parents=True, exist_ok=True) return path - else: - return tmp_path_factory.mktemp("atlite_cutouts") + return tmp_path_factory.mktemp("atlite_cutouts") def _prepare_era5_cutout(path, prepare_kwargs=None, **kwargs): @@ -110,18 +109,16 @@ def cutout_era5_weird_resolution(cutouts_path): @pytest.fixture(scope="session") def cutout_era5_reduced(cutouts_path): tmp_path = cutouts_path / "cutout_era5_reduced.nc" - cutout = Cutout(path=tmp_path, module="era5", bounds=BOUNDS, time=TIME) - return cutout + return Cutout(path=tmp_path, module="era5", bounds=BOUNDS, time=TIME) @pytest.fixture(scope="session") def cutout_era5_overwrite(cutouts_path, cutout_era5_reduced): tmp_path = cutouts_path / "cutout_era5_overwrite.nc" - cutout = Cutout(path=tmp_path, module="era5", bounds=BOUNDS, time=TIME) + return Cutout(path=tmp_path, module="era5", bounds=BOUNDS, time=TIME) # cutout.data = cutout.data.drop_vars("influx_direct") # cutout.prepare("influx", overwrite=True) # TODO Needs to be fixed - return cutout @pytest.fixture(scope="session") diff --git a/test/test_aggregate_time.py b/test/test_aggregate_time.py index 8a3f4ee2..0e4f6221 100644 --- a/test/test_aggregate_time.py +++ b/test/test_aggregate_time.py @@ -26,12 +26,12 @@ def identity_convert(ds, **kwargs): @pytest.fixture def cutout(): - np.random.seed(42) + rng = np.random.default_rng(42) times = xr.date_range("2020-01-01", periods=24, freq="h") data = xr.Dataset( { "var": xr.DataArray( - np.random.rand(24, 3, 4), + rng.random((24, 3, 4)), dims=["time", "y", "x"], coords={ "time": times, diff --git a/test/test_creation.py b/test/test_creation.py index 52d95c7a..055b089a 100755 --- a/test/test_creation.py +++ b/test/test_creation.py @@ -50,12 +50,12 @@ def test_shape(ref): def test_extent(ref): reference_extent = [-4.125, 1.625, 55.875, 61.125] - assert all([x == y for x, y in zip(ref.extent, reference_extent)]) + assert all(x == y for x, y in zip(ref.extent, reference_extent, strict=False)) def test_bounds(ref): reference_extent = [-4.125, 55.875, 1.625, 61.125] - assert all([x == y for x, y in zip(ref.bounds, reference_extent)]) + assert all(x == y for x, y in zip(ref.bounds, reference_extent, strict=False)) def test_transform(ref): @@ -147,7 +147,7 @@ def test_dx_dy_dt(): ) assert dx == cutout.dx assert dy == cutout.dy - assert "h" == cutout.dt + assert cutout.dt == "h" def test_available_features(ref): diff --git a/test/test_gis.py b/test/test_gis.py index 23c28fe7..e05dc813 100755 --- a/test/test_gis.py +++ b/test/test_gis.py @@ -72,7 +72,8 @@ def raster(tmp_path_factory): bounds = (X0, Y0, X1, Y1) # same as in test_gis.py res = 0.01 transform, shape = padded_transform_and_shape(bounds, res) - mask = np.random.rand(*shape) < raster_clip + rng = np.random.default_rng(42) + mask = rng.random(shape) < raster_clip mask = mask.astype(rio.int32) path = tmp_path / "raster.tif" with rio.open( @@ -96,7 +97,8 @@ def raster_reproject(tmp_path_factory): bounds = rio.warp.transform_bounds(4326, 3035, X0, Y0, X1, Y1) res = 1000 transform, shape = padded_transform_and_shape(bounds, res) - mask = np.random.rand(*shape) < raster_clip + rng = np.random.default_rng(42) + mask = rng.random(shape) < raster_clip mask = mask.astype(rio.int32) path = tmp_path / "raster_reproject.tif" with rio.open( @@ -120,7 +122,8 @@ def raster_codes(tmp_path_factory): bounds = (X0, Y0, X1, Y1) # same as in test_gis.py res = 0.01 transform, shape = padded_transform_and_shape(bounds, res) - mask = (np.random.rand(*shape) * 100).astype(int) + rng = np.random.default_rng(42) + mask = (rng.random(shape) * 100).astype(int) mask = mask.astype(rio.int32) path = tmp_path / "raster_codes.tif" with rio.open( diff --git a/test/test_preparation_and_conversion.py b/test/test_preparation_and_conversion.py index 1ed9bdf1..f95230b2 100644 --- a/test/test_preparation_and_conversion.py +++ b/test/test_preparation_and_conversion.py @@ -12,6 +12,7 @@ import os import sys from datetime import date +from pathlib import Path import geopandas as gpd import numpy as np @@ -365,12 +366,12 @@ def hydro_test(cutout): cutout.grid.loc[[0], ["x", "y"]].values, columns=["lon", "lat"] ) basins = gpd.GeoDataFrame( - dict( - geometry=[cutout.grid.geometry[0]], - HYBAS_ID=[0], - DIST_MAIN=10, - NEXT_DOWN=None, - ), + { + "geometry": [cutout.grid.geometry[0]], + "HYBAS_ID": [0], + "DIST_MAIN": 10, + "NEXT_DOWN": None, + }, index=[0], crs=cutout.crs, ) @@ -405,7 +406,7 @@ def test_data_module_arguments_era5(cutout_era5): All data variables should have an attribute to which module they belong. """ - for v in cutout_era5.data: + for _v in cutout_era5.data: assert cutout_era5.data.attrs["module"] == "era5" @staticmethod @@ -588,7 +589,7 @@ def test_line_rating_era5(cutout_era5): @pytest.mark.skipif( - not os.path.exists(SARAH_DIR), reason="'sarah_dir' is not a valid path" + not Path(SARAH_DIR).exists(), reason="'sarah_dir' is not a valid path" ) class TestSarah: @staticmethod @@ -642,7 +643,7 @@ def test_runoff_sarah(cutout_sarah): @pytest.mark.skipif( - not os.path.exists(GEBCO_PATH), reason="'gebco_path' is not a valid path" + not Path(GEBCO_PATH).exists(), reason="'gebco_path' is not a valid path" ) class TestGebco: @staticmethod From 820e7e2d87014abea406815f6692937080728b05 Mon Sep 17 00:00:00 2001 From: Fabian Date: Mon, 16 Mar 2026 10:47:54 +0100 Subject: [PATCH 05/27] Include test directory in mypy checking and fix all test type errors --- .pre-commit-config.yaml | 2 +- atlite/gis.py | 8 ++++++-- pyproject.toml | 2 +- test/conftest.py | 2 +- test/test_aggregate_time.py | 4 ++-- test/test_gis.py | 16 ++++++++-------- test/test_preparation_and_conversion.py | 2 +- 7 files changed, 20 insertions(+), 16 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b44e22ba..c9852c5b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -29,7 +29,7 @@ repos: hooks: - id: mypy additional_dependencies: ['types-PyYAML', 'types-requests'] - exclude: ^(tests?/|docs/|examples/) + exclude: ^(docs/|examples/) # Find common spelling mistakes in comments and docstrings - repo: https://github.com/codespell-project/codespell diff --git a/atlite/gis.py b/atlite/gis.py index a2cdb06c..ab7c5ad8 100644 --- a/atlite/gis.py +++ b/atlite/gis.py @@ -538,8 +538,12 @@ def __init__(self, crs: CrsLike = 3035, res: float = 100) -> None: def add_raster( self, raster: PathLike | rio.DatasetReader, - codes: int | list[int] | Callable[[NDArray], NDArray] | None = None, - buffer: int = 0, + codes: int + | list[int] + | Sequence[int] + | Callable[[NDArray], NDArray] + | None = None, + buffer: float = 0, invert: bool = False, nodata: int = 255, allow_no_overlap: bool = False, diff --git a/pyproject.toml b/pyproject.toml index 99da4ec3..20f17c5a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -136,4 +136,4 @@ warn_unused_configs = true ignore_missing_imports = true disallow_incomplete_defs = true check_untyped_defs = true -exclude = ["test/", ".venv/", "build/"] +exclude = [".venv/", "build/"] diff --git a/test/conftest.py b/test/conftest.py index 04b5533a..412c9eea 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -7,7 +7,7 @@ from pathlib import Path import pytest -from dateutil.relativedelta import relativedelta +from dateutil.relativedelta import relativedelta # type: ignore[import-untyped] from atlite import Cutout diff --git a/test/test_aggregate_time.py b/test/test_aggregate_time.py index 0e4f6221..4a0c6b88 100644 --- a/test/test_aggregate_time.py +++ b/test/test_aggregate_time.py @@ -174,8 +174,8 @@ def test_capacity_factor_with_aggregate_time_raises(self, cutout): class TestInvalidArgs: def test_invalid_aggregate_time_value(self, cutout): with pytest.raises(ValueError, match="aggregate_time must be"): - convert_and_aggregate(cutout, identity_convert, aggregate_time="invalid") + convert_and_aggregate(cutout, identity_convert, aggregate_time="invalid") # type: ignore[arg-type] def test_aggregate_time_true_raises(self, cutout): with pytest.raises(ValueError, match="aggregate_time must be"): - convert_and_aggregate(cutout, identity_convert, aggregate_time=True) + convert_and_aggregate(cutout, identity_convert, aggregate_time=True) # type: ignore[arg-type] diff --git a/test/test_gis.py b/test/test_gis.py index e05dc813..7aa2e9b0 100755 --- a/test/test_gis.py +++ b/test/test_gis.py @@ -264,27 +264,27 @@ def test_regrid(): fine = np.block([[ones * A, ones * B], [ones * C, ones * D]]) # add coordinates finecoords = np.arange(0.5, 8, 1) - fine = xr.DataArray(fine, coords=[("y", finecoords), ("x", finecoords)]) + fine = xr.DataArray(fine, coords=[("y", finecoords), ("x", finecoords)]) # type: ignore[assignment] coarsecoords = np.arange(2, 8, 4) coarse = xr.DataArray(np.nan, coords=[("y", coarsecoords), ("x", coarsecoords)]) # apply average resampling - res = regrid(fine, coarse.x, coarse.y, resampling=5) + res = regrid(fine, coarse.x, coarse.y, resampling=5) # type: ignore[arg-type] target = np.array([[A, B], [C, D]]) assert allclose(res, target) assert (coarse.x == res.x).all() and (coarse.y == res.y).all() # now test multiple layers - fine = xr.concat([fine] * 10, pd.Index(range(10), name="z")) - res = regrid(fine, coarse.x, coarse.y, resampling=5) + fine = xr.concat([fine] * 10, pd.Index(range(10), name="z")) # type: ignore[assignment, list-item] + res = regrid(fine, coarse.x, coarse.y, resampling=5) # type: ignore[arg-type] target = np.stack([np.array([[A, B], [C, D]])] * 10) assert allclose(res, target) assert (coarse.x == res.x).all() and (coarse.y == res.y).all() # now let the target grid cover a subarea of the original - fine = fine.sel(z=0, drop=True) + fine = fine.sel(z=0, drop=True) # type: ignore[attr-defined] coarsecoords = np.arange(1, 6, 2) coarse = xr.DataArray(np.nan, coords=[("y", coarsecoords), ("x", coarsecoords)]) @@ -589,7 +589,7 @@ def test_availability_matrix_rastered(ref, raster): ).rename_axis("shape") I = np.asarray(ref.indicatormatrix(shapes).todense()) I = I.reshape(shapes.shape + ref.shape) - I = xr.DataArray(I, coords=[shapes.index, ref.coords["y"], ref.coords["x"]]) + I = xr.DataArray(I, coords=[shapes.index, ref.coords["y"], ref.coords["x"]]) # type: ignore[assignment] excluder = ExclusionContainer(ref.crs, res=0.01) excluder.add_raster(raster) ds = ref.availabilitymatrix(shapes, excluder) @@ -619,7 +619,7 @@ def test_availability_matrix_rastered_repro(ref, raster_reproject): ).rename_axis("shape") I = np.asarray(ref.indicatormatrix(shapes).todense()) I = I.reshape(shapes.shape + ref.shape) - I = xr.DataArray(I, coords=[shapes.index, ref.coords["y"], ref.coords["x"]]) + I = xr.DataArray(I, coords=[shapes.index, ref.coords["y"], ref.coords["x"]]) # type: ignore[assignment] excluder = ExclusionContainer() excluder.add_raster(raster_reproject) ds = ref.availabilitymatrix(shapes, excluder) @@ -650,7 +650,7 @@ def test_shape_availability_exclude_raster_codes(ref, raster_codes): # test with a function excluder = ExclusionContainer(ref.crs, res=res) - excluder.add_raster(raster_codes, codes=lambda x: x < 20, invert=True) + excluder.add_raster(raster_codes, codes=lambda x: x < 20, invert=True) # type: ignore[arg-type, return-value] masked, transform = shape_availability(shapes, excluder) assert ratio == masked.sum() / masked.size diff --git a/test/test_preparation_and_conversion.py b/test/test_preparation_and_conversion.py index f95230b2..56411eb2 100644 --- a/test/test_preparation_and_conversion.py +++ b/test/test_preparation_and_conversion.py @@ -19,7 +19,7 @@ import pandas as pd import pytest import urllib3 -from dateutil.relativedelta import relativedelta +from dateutil.relativedelta import relativedelta # type: ignore[import-untyped] from shapely.geometry import LineString as Line from shapely.geometry import Point From b1edf3f689a9f968ed56df65663f64218855c196 Mon Sep 17 00:00:00 2001 From: Fabian Date: Mon, 16 Mar 2026 11:05:31 +0100 Subject: [PATCH 06/27] Remove examples/ from mypy exclude (no .py files to check, notebooks covered by ruff) --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c9852c5b..6eebfc55 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -29,7 +29,7 @@ repos: hooks: - id: mypy additional_dependencies: ['types-PyYAML', 'types-requests'] - exclude: ^(docs/|examples/) + exclude: ^docs/ # Find common spelling mistakes in comments and docstrings - repo: https://github.com/codespell-project/codespell From c5a7a8d3f3c2795590d6bf340bbc13a52c579932 Mon Sep 17 00:00:00 2001 From: Fabian Date: Mon, 16 Mar 2026 11:07:43 +0100 Subject: [PATCH 07/27] Include doc/ in mypy checking and fix type errors in conf.py and chart.py --- .pre-commit-config.yaml | 1 - doc/chart.py | 8 +++++++- doc/conf.py | 2 +- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6eebfc55..67b39839 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -29,7 +29,6 @@ repos: hooks: - id: mypy additional_dependencies: ['types-PyYAML', 'types-requests'] - exclude: ^docs/ # Find common spelling mistakes in comments and docstrings - repo: https://github.com/codespell-project/codespell diff --git a/doc/chart.py b/doc/chart.py index 6812108a..4d08befe 100755 --- a/doc/chart.py +++ b/doc/chart.py @@ -7,6 +7,8 @@ This is a temporary script file. """ +from typing import Any + import matplotlib.pyplot as plt fig, ax = plt.subplots(figsize=(12, 5)) @@ -37,7 +39,11 @@ processedstr = "\n" + "\n\n\n".join([" ◦ " + s for s in processeddata]) + "\n" # defaults for boxes and arrows -kwargs = {"verticalalignment": "center", "fontsize": 14, "color": "#545454"} +kwargs: dict[str, Any] = { + "verticalalignment": "center", + "fontsize": 14, + "color": "#545454", +} arrowkwargs = { "head_width": 0.2, "width": 0.13, diff --git a/doc/conf.py b/doc/conf.py index 64119206..3b3a61a9 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -253,7 +253,7 @@ # -- Options for LaTeX output --------------------------------------------- -latex_elements = { +latex_elements: dict[str, str] = { # The paper size ('letterpaper' or 'a4paper'). #'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). From 8153e27617aa342efab5622883182223c120cdbc Mon Sep 17 00:00:00 2001 From: Fabian Date: Mon, 16 Mar 2026 11:32:19 +0100 Subject: [PATCH 08/27] Bump mypy python_version to 3.11, enabling proper xarray Self types and removing unnecessary type: ignore comments --- atlite/convert.py | 18 +++++++++--------- atlite/cutout.py | 4 ++-- atlite/data.py | 10 +++++----- atlite/datasets/cordex.py | 4 ++-- atlite/datasets/era5.py | 16 ++++++++-------- atlite/datasets/gebco.py | 2 +- atlite/datasets/ncep.py | 12 ++++++------ atlite/datasets/sarah.py | 8 ++++---- atlite/pv/irradiation.py | 4 ++-- atlite/pv/orientation.py | 4 ++-- atlite/pv/solar_position.py | 2 +- atlite/resource.py | 4 ++-- atlite/utils.py | 2 +- pyproject.toml | 2 +- test/test_gis.py | 4 ++-- 15 files changed, 48 insertions(+), 48 deletions(-) diff --git a/atlite/convert.py b/atlite/convert.py index 107fa58a..b903d90e 100644 --- a/atlite/convert.py +++ b/atlite/convert.py @@ -320,7 +320,7 @@ def convert_temperature(ds: Dataset) -> DataArray: Ambient temperature in degrees Celsius. """ # Temperature is in Kelvin - return ds["temperature"] - 273.15 # type: ignore[no-any-return] + return ds["temperature"] - 273.15 def temperature(cutout: Cutout, **params: Any) -> DataArray | NumericArray: @@ -347,7 +347,7 @@ def convert_soil_temperature(ds: Dataset) -> DataArray: # There are nans where there is sea; by setting them # to zero we guarantee they do not contribute when multiplied # by matrix in atlite/aggregate.py - return (ds["soil temperature"] - 273.15).fillna(0.0) # type: ignore[no-any-return] + return (ds["soil temperature"] - 273.15).fillna(0.0) def soil_temperature(cutout: Cutout, **params: Any) -> DataArray | NumericArray: @@ -370,7 +370,7 @@ def convert_dewpoint_temperature(ds: Dataset) -> DataArray: Dew point temperature in degrees Celsius. """ # Temperature is in Kelvin - return ds["dewpoint temperature"] - 273.15 # type: ignore[no-any-return] + return ds["dewpoint temperature"] - 273.15 def dewpoint_temperature(cutout: Cutout, **params: Any) -> DataArray | NumericArray: @@ -430,7 +430,7 @@ def convert_coefficient_of_performance( delta_T = sink_T - source_T - return c0 + c1 * delta_T + c2 * delta_T**2 # type: ignore[no-any-return] + return c0 + c1 * delta_T + c2 * delta_T**2 # type: ignore[operator] def coefficient_of_performance( @@ -519,7 +519,7 @@ def convert_heat_demand( heat_demand = heat_demand.clip(min=0.0) - return (constant + heat_demand).rename("heat_demand") # type: ignore[no-any-return] + return (constant + heat_demand).rename("heat_demand") def heat_demand( @@ -623,7 +623,7 @@ def convert_cooling_demand( cooling_demand = cooling_demand.clip(min=0.0) - return (constant + cooling_demand).rename("cooling_demand") # type: ignore[no-any-return] + return (constant + cooling_demand).rename("cooling_demand") def cooling_demand( @@ -741,7 +741,7 @@ def convert_solar_thermal( output = irradiation * eta - return output.where(output > 0.0, 0.0) # type: ignore[no-any-return] + return output.where(output > 0.0, 0.0) def solar_thermal( @@ -925,7 +925,7 @@ def wind( if smooth: turbine_config = windturbine_smooth(turbine_config, params=smooth) - return cutout.convert_and_aggregate( # type: ignore[no-any-return, return-value] + return cutout.convert_and_aggregate( # type: ignore[no-any-return] convert_func=convert_wind, turbine=turbine_config, interpolation_method=interpolation_method, @@ -1520,7 +1520,7 @@ def convert_line_rating( if isinstance(ds, dict): Position = namedtuple("Position", ["altitude", "azimuth"]) - solar_position = Position(ds["solar_altitude"], ds["solar_azimuth"]) # type: ignore[assignment] + solar_position = Position(ds["solar_altitude"], ds["solar_azimuth"]) else: solar_position = SolarPosition(ds) # type: ignore[assignment] Phi_s = arccos( diff --git a/atlite/cutout.py b/atlite/cutout.py index 6f25deb2..1b0208fe 100644 --- a/atlite/cutout.py +++ b/atlite/cutout.py @@ -432,7 +432,7 @@ def sel( if bounds is not None: if buffer > 0: - bounds = box(*bounds).buffer(buffer).bounds # type: ignore[assignment] + bounds = box(*bounds).buffer(buffer).bounds x1, y1, x2, y2 = bounds # type: ignore[misc] kwargs.update(x=slice(x1, x2), y=slice(y1, y2)) data = self.data.sel(**kwargs) @@ -620,7 +620,7 @@ def uniform_density_layout( capacity placed within one grid cell. """ - return capacity_density * self.area(crs) # type: ignore[no-any-return] + return capacity_density * self.area(crs) def equals(self, other: Any) -> bool: """ diff --git a/atlite/data.py b/atlite/data.py index 3f218bfe..4a209d5c 100644 --- a/atlite/data.py +++ b/atlite/data.py @@ -16,11 +16,11 @@ from typing import TYPE_CHECKING, Any import numpy as np -import pandas as pd # type: ignore[import-untyped] +import pandas as pd import xarray as xr -from dask import compute as dask_compute # type: ignore[attr-defined] +from dask import compute as dask_compute from dask import delayed -from dask.diagnostics import ProgressBar # type: ignore[attr-defined] +from dask.diagnostics import ProgressBar from dask.utils import SerializableLock from numpy import atleast_1d @@ -87,7 +87,7 @@ def get_features( ) datasets.append(feature_data) - datasets = dask_compute(*datasets) # type: ignore[no-untyped-call] + datasets = dask_compute(*datasets) ds: Dataset = xr.merge(datasets, compat="equals") for v in ds: @@ -324,7 +324,7 @@ def cutout_prepare( logger.debug("Writing cutout to file...") write_job: Any = ds.to_netcdf(tmp, compute=False) if show_progress: - with ProgressBar(minimum=2): # type: ignore[no-untyped-call] + with ProgressBar(minimum=2): write_job.compute(**dask_kwargs) else: write_job.compute(**dask_kwargs) diff --git a/atlite/datasets/cordex.py b/atlite/datasets/cordex.py index 50b92720..5e6ab60a 100644 --- a/atlite/datasets/cordex.py +++ b/atlite/datasets/cordex.py @@ -41,7 +41,7 @@ def rename_and_clean_coords(ds: xr.Dataset) -> xr.Dataset: ds = ds.rename({"rlon": "x", "rlat": "y"}) - return ds.drop( # type: ignore[no-any-return] + return ds.drop( (set(ds.coords) | set(ds.data_vars)) & {"bnds", "height", "rotated_pole"} ) @@ -136,7 +136,7 @@ def prepare_meta_cordex( meta["height"] = ds["height"] - return meta # type: ignore[no-any-return] + return meta def tasks_yearly_cordex( diff --git a/atlite/datasets/era5.py b/atlite/datasets/era5.py index ccbfc0fd..f2b61efc 100644 --- a/atlite/datasets/era5.py +++ b/atlite/datasets/era5.py @@ -44,7 +44,7 @@ import contextlib @contextlib.contextmanager # type: ignore[no-redef] - def nullcontext(): # type: ignore[misc] + def nullcontext(): yield @@ -77,7 +77,7 @@ def _add_height(ds: xr.Dataset) -> xr.Dataset: if "time" in z.coords: z = z.isel(time=0, drop=True) ds["height"] = z / g0 - return ds.drop_vars("z") # type: ignore[no-any-return] + return ds.drop_vars("z") def _rename_and_clean_coords(ds: xr.Dataset, add_lon_lat: bool = True) -> xr.Dataset: @@ -88,7 +88,7 @@ def _rename_and_clean_coords(ds: xr.Dataset, add_lon_lat: bool = True) -> xr.Dat ds = maybe_swap_spatial_dims(ds) # type: ignore[assignment] if add_lon_lat: ds = ds.assign_coords(lon=ds.coords["x"], lat=ds.coords["y"]) - return ds.drop_vars(["expver", "number"], errors="ignore") # type: ignore[no-any-return] + return ds.drop_vars(["expver", "number"], errors="ignore") def get_data_wind(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: @@ -116,7 +116,7 @@ def get_data_wind(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: ds["wnd_azimuth"] = azimuth.where(azimuth >= 0, azimuth + 2 * np.pi) ds = ds.drop_vars(["u100", "v100", "u10", "v10", "wnd10m"]) - return ds.rename({"fsr": "roughness"}) # type: ignore[no-any-return] + return ds.rename({"fsr": "roughness"}) def sanitize_wind(ds: xr.Dataset) -> xr.Dataset: @@ -158,7 +158,7 @@ def get_data_influx(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: sp = SolarPosition(ds, time_shift=time_shift) sp = sp.rename({v: f"solar_{v}" for v in sp.data_vars}) - return xr.merge([ds, sp]) # type: ignore[no-any-return] + return xr.merge([ds, sp]) def sanitize_influx(ds: xr.Dataset) -> xr.Dataset: @@ -178,7 +178,7 @@ def get_data_temperature(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: ) ds = _rename_and_clean_coords(ds) - return ds.rename( # type: ignore[no-any-return] + return ds.rename( { "t2m": "temperature", "stl4": "soil temperature", @@ -191,7 +191,7 @@ def get_data_runoff(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: ds = retrieve_data(variable=["runoff"], **retrieval_params) ds = _rename_and_clean_coords(ds) - return ds.rename({"ro": "runoff"}) # type: ignore[no-any-return] + return ds.rename({"ro": "runoff"}) def sanitize_runoff(ds: xr.Dataset) -> xr.Dataset: @@ -417,7 +417,7 @@ def retrieve_once(time: dict[str, Any]) -> xr.Dataset: if feature in static_features: static_times = retrieval_times(coords, static=True) assert isinstance(static_times, dict) - return retrieve_once(static_times).squeeze() # type: ignore[no-any-return] + return retrieve_once(static_times).squeeze() time_chunks = retrieval_times(coords, monthly_requests=monthly_requests) assert isinstance(time_chunks, list) diff --git a/atlite/datasets/gebco.py b/atlite/datasets/gebco.py index 25f4eb66..9b32dc02 100755 --- a/atlite/datasets/gebco.py +++ b/atlite/datasets/gebco.py @@ -65,7 +65,7 @@ def get_data( path = creation_parameters["gebco_path"] coords = cutout.coords - return ( # type: ignore[no-any-return] + return ( get_data_gebco_height(coords["x"], coords["y"], path) .to_dataset() .assign_coords(cutout.coords) diff --git a/atlite/datasets/ncep.py b/atlite/datasets/ncep.py index 914da2c2..dc0c8a1e 100644 --- a/atlite/datasets/ncep.py +++ b/atlite/datasets/ncep.py @@ -59,7 +59,7 @@ def convert_lons_lats_ncep( ds = ds.sel(lon_0=xs) ds = ds.rename({"lon_0": "x", "lat_0": "y"}) - return ds.assign_coords(lon=ds.coords["x"], lat=ds.coords["y"]) # type: ignore[no-any-return] + return ds.assign_coords(lon=ds.coords["x"], lat=ds.coords["y"]) def convert_time_hourly_ncep(ds: xr.Dataset, drop_time_vars: bool = True) -> xr.Dataset: @@ -81,7 +81,7 @@ def unaverage(da: xr.DataArray, dim: str = "forecast_time0") -> xr.DataArray: y = da * xr.DataArray( np.arange(1, len(coords) + 1), dims=[dim], coords={dim: coords} ) - return y - y.shift(**{dim: 1}).fillna(0.0) # type: ignore[no-any-return, arg-type] + return y - y.shift(**{dim: 1}).fillna(0.0) # type: ignore[arg-type] for k, da in ds.items(): assert isinstance(k, str) @@ -89,12 +89,12 @@ def unaverage(da: xr.DataArray, dim: str = "forecast_time0") -> xr.DataArray: ds[k[: -len("_avg")]] = unaverage(da) ds = ds.drop(k) - return ds # type: ignore[return-value] + return ds def convert_unaccumulate_ncep(ds: xr.Dataset) -> xr.Dataset: def unaccumulate(da: xr.DataArray, dim: str = "forecast_time0") -> xr.DataArray: - return da - da.shift(**{dim: 1}).fillna(0.0) # type: ignore[no-any-return, arg-type] + return da - da.shift(**{dim: 1}).fillna(0.0) # type: ignore[arg-type] for k, da in ds.items(): assert isinstance(k, str) @@ -102,7 +102,7 @@ def unaccumulate(da: xr.DataArray, dim: str = "forecast_time0") -> xr.DataArray: ds[k[: -len("_acc")]] = unaccumulate(da) ds = ds.drop(k) - return ds # type: ignore[return-value] + return ds def convert_clip_lower( @@ -268,7 +268,7 @@ def prepare_meta_ncep( meta["height"] = ds["height"] - return meta # type: ignore[no-any-return] + return meta def tasks_monthly_ncep( diff --git a/atlite/datasets/sarah.py b/atlite/datasets/sarah.py index 4d1ba1ab..f36d075f 100644 --- a/atlite/datasets/sarah.py +++ b/atlite/datasets/sarah.py @@ -162,13 +162,13 @@ def get_data( lon=ds.lon.astype(float).round(4), lat=ds.lat.astype(float).round(4) ) - ds = interpolate(ds) if creation_parameters["sarah_interpolate"] else ds.fillna(0) + ds = interpolate(ds) if creation_parameters["sarah_interpolate"] else ds.fillna(0) # type: ignore[assignment] if cutout.dt not in ["30min", "30T"]: - ds = hourly_mean(ds) # type: ignore[arg-type] + ds = hourly_mean(ds) if (cutout.dx != dx) or (cutout.dy != dy): - ds = regrid(ds, coords["lon"], coords["lat"], resampling=Resampling.average) + ds = regrid(ds, coords["lon"], coords["lat"], resampling=Resampling.average) # type: ignore[assignment] dif_attrs = {"long_name": "Surface Diffuse Shortwave Flux", "units": "W m-2"} ds["influx_diffuse"] = (ds["SIS"] - ds["SID"]).assign_attrs(**dif_attrs) @@ -182,4 +182,4 @@ def get_data( sp = SolarPosition(ds, time_shift="0H") sp = sp.rename({v: f"solar_{v}" for v in sp.data_vars}) - return xr.merge([ds, sp]) # type: ignore[no-any-return] + return xr.merge([ds, sp]) diff --git a/atlite/pv/irradiation.py b/atlite/pv/irradiation.py index d9e13fb0..376bdfba 100644 --- a/atlite/pv/irradiation.py +++ b/atlite/pv/irradiation.py @@ -193,7 +193,7 @@ def _albedo(ds: Dataset, influx: DataArray) -> DataArray: if "albedo" in ds: return ds["albedo"] if "outflux" in ds: - return (ds["outflux"] / influx.where(influx != 0)).fillna(0).clip(max=1) # type: ignore[no-any-return] + return (ds["outflux"] / influx.where(influx != 0)).fillna(0).clip(max=1) raise AssertionError( "Need either albedo or outflux as a variable in the dataset. " "Check your cutout and dataset module." @@ -282,7 +282,7 @@ def TiltedIrradiation( influx_toa = ds["influx_toa"] def clip(influx: DataArray, influx_max: DataArray) -> DataArray: - return influx.clip(min=0, max=influx_max.transpose(*influx.dims).data) # type: ignore[no-any-return] + return influx.clip(min=0, max=influx_max.transpose(*influx.dims).data) if "influx" in ds: influx = clip(ds["influx"], influx_toa) diff --git a/atlite/pv/orientation.py b/atlite/pv/orientation.py index d2544105..3e95d293 100644 --- a/atlite/pv/orientation.py +++ b/atlite/pv/orientation.py @@ -260,10 +260,10 @@ def SurfaceOrientation( surface_slope = arccos(cos(rotation) * cos(axis_tilt)) azimuth_difference = sun_azimuth - surface_azimuth - azimuth_difference = np.where( + azimuth_difference = np.where( # type: ignore[assignment] azimuth_difference > pi, azimuth_difference - 2 * pi, azimuth_difference ) - azimuth_difference = np.where( + azimuth_difference = np.where( # type: ignore[assignment] azimuth_difference < -pi, 2 * pi + azimuth_difference, azimuth_difference ) rotation = np.where( diff --git a/atlite/pv/solar_position.py b/atlite/pv/solar_position.py index 07e87cb6..7efcdde6 100644 --- a/atlite/pv/solar_position.py +++ b/atlite/pv/solar_position.py @@ -63,7 +63,7 @@ def SolarPosition(ds: Dataset, time_shift: str | pd.Timedelta = "0H") -> Dataset } if rvs.issubset(set(ds.data_vars)): - return ds[rvs].rename({v: v.replace("solar_", "") for v in rvs}) # type: ignore[no-any-return] + return ds[rvs].rename({v: v.replace("solar_", "") for v in rvs}) warn( ( diff --git a/atlite/resource.py b/atlite/resource.py index ca309e39..7bfcdeb4 100644 --- a/atlite/resource.py +++ b/atlite/resource.py @@ -17,8 +17,8 @@ import numpy as np import pandas as pd -import requests -import yaml +import requests # type: ignore[import-untyped] +import yaml # type: ignore[import-untyped] from dask.array import radians from scipy.signal import fftconvolve diff --git a/atlite/utils.py b/atlite/utils.py index 82c13f4c..f2df1904 100644 --- a/atlite/utils.py +++ b/atlite/utils.py @@ -74,7 +74,7 @@ def migrate_from_cutout_directory(old_cutout_dir: PathLike, path: PathLike) -> D ) raise - data = cast("Dataset", maybe_swap_spatial_dims(data)) # type: ignore[no-untyped-call] + data = cast("Dataset", maybe_swap_spatial_dims(data)) module = data.attrs["module"] data.attrs["prepared_features"] = list(datamodules[module].features) for v in data: diff --git a/pyproject.toml b/pyproject.toml index 20f17c5a..f3c0c509 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -130,7 +130,7 @@ ignore = [ ] [tool.mypy] -python_version = "3.10" +python_version = "3.11" warn_return_any = true warn_unused_configs = true ignore_missing_imports = true diff --git a/test/test_gis.py b/test/test_gis.py index 7aa2e9b0..ff971d73 100755 --- a/test/test_gis.py +++ b/test/test_gis.py @@ -596,7 +596,7 @@ def test_availability_matrix_rastered(ref, raster): eligible_share = 1 - raster_clip assert isclose(I.sum() * eligible_share, ds.sum(), atol=5) - assert_allclose(I.sum(["x", "y"]) * eligible_share, ds.sum(["x", "y"]), atol=5) + assert_allclose(I.sum(["x", "y"]) * eligible_share, ds.sum(["x", "y"]), atol=5) # type: ignore[list-item] excluder = ExclusionContainer(ref.crs, res=0.01) excluder.add_raster(raster) @@ -626,7 +626,7 @@ def test_availability_matrix_rastered_repro(ref, raster_reproject): eligible_share = 1 - raster_clip assert isclose(I.sum() * eligible_share, ds.sum(), atol=5) - assert_allclose(I.sum(["x", "y"]) * eligible_share, ds.sum(["x", "y"]), atol=5) + assert_allclose(I.sum(["x", "y"]) * eligible_share, ds.sum(["x", "y"]), atol=5) # type: ignore[list-item] def test_shape_availability_exclude_raster_codes(ref, raster_codes): From 16cd01cd6759b05861490a7e7ae4669f264add20 Mon Sep 17 00:00:00 2001 From: Fabian Date: Mon, 16 Mar 2026 11:52:23 +0100 Subject: [PATCH 09/27] Fix mypy errors: remove unused type: ignore comments, fix type annotations, remove stray code --- atlite/convert.py | 26 +++++++++++++------------- atlite/cutout.py | 9 +++++---- atlite/datasets/cordex.py | 4 ++-- atlite/datasets/era5.py | 10 +++++----- atlite/datasets/ncep.py | 4 ++-- atlite/datasets/sarah.py | 6 +++--- atlite/pv/irradiation.py | 10 +++++----- atlite/pv/orientation.py | 28 ++++++++++++++++++---------- atlite/pv/solar_panel_model.py | 4 ++-- atlite/pv/solar_position.py | 2 +- atlite/resource.py | 6 +++--- pyproject.toml | 1 + test/test_gis.py | 20 ++++++++++---------- 13 files changed, 70 insertions(+), 60 deletions(-) diff --git a/atlite/convert.py b/atlite/convert.py index b903d90e..f0c3e73e 100644 --- a/atlite/convert.py +++ b/atlite/convert.py @@ -217,7 +217,7 @@ def convert_and_aggregate( ) if isinstance(matrix, xr.DataArray): - coords = matrix.indexes.get(matrix.dims[1]).to_frame(index=False) # type: ignore[union-attr] + coords = matrix.indexes.get(matrix.dims[1]).to_frame(index=False) if not np.array_equal(coords[["x", "y"]], cutout.grid[["x", "y"]]): raise ValueError( "Matrix spatial coordinates not aligned with cutout spatial " @@ -324,7 +324,7 @@ def convert_temperature(ds: Dataset) -> DataArray: def temperature(cutout: Cutout, **params: Any) -> DataArray | NumericArray: - return cutout.convert_and_aggregate(convert_func=convert_temperature, **params) # type: ignore[no-any-return] + return cutout.convert_and_aggregate(convert_func=convert_temperature, **params) # soil temperature @@ -351,7 +351,7 @@ def convert_soil_temperature(ds: Dataset) -> DataArray: def soil_temperature(cutout: Cutout, **params: Any) -> DataArray | NumericArray: - return cutout.convert_and_aggregate(convert_func=convert_soil_temperature, **params) # type: ignore[no-any-return] + return cutout.convert_and_aggregate(convert_func=convert_soil_temperature, **params) # dewpoint temperature @@ -374,7 +374,7 @@ def convert_dewpoint_temperature(ds: Dataset) -> DataArray: def dewpoint_temperature(cutout: Cutout, **params: Any) -> DataArray | NumericArray: - return cutout.convert_and_aggregate( # type: ignore[no-any-return] + return cutout.convert_and_aggregate( convert_func=convert_dewpoint_temperature, **params ) @@ -430,7 +430,7 @@ def convert_coefficient_of_performance( delta_T = sink_T - source_T - return c0 + c1 * delta_T + c2 * delta_T**2 # type: ignore[operator] + return c0 + c1 * delta_T + c2 * delta_T**2 def coefficient_of_performance( @@ -467,7 +467,7 @@ def coefficient_of_performance( Energy & Environmental Science (2012), 5, 9291-9306, https://doi.org/10.1039/C2EE22653G. """ - return cutout.convert_and_aggregate( # type: ignore[no-any-return] + return cutout.convert_and_aggregate( convert_func=convert_coefficient_of_performance, source=source, sink_T=sink_T, @@ -572,7 +572,7 @@ def heat_demand( documented in the `convert_and_aggregate` function. """ - return cutout.convert_and_aggregate( # type: ignore[no-any-return] + return cutout.convert_and_aggregate( convert_func=convert_heat_demand, threshold=threshold, a=a, @@ -679,7 +679,7 @@ def cooling_demand( documented in the `convert_and_aggregate` function. """ - return cutout.convert_and_aggregate( # type: ignore[no-any-return] + return cutout.convert_and_aggregate( convert_func=convert_cooling_demand, threshold=threshold, a=a, @@ -792,7 +792,7 @@ def solar_thermal( if not callable(orientation): orientation = get_orientation(orientation) # type: ignore[assignment] - return cutout.convert_and_aggregate( # type: ignore[no-any-return] + return cutout.convert_and_aggregate( convert_func=convert_solar_thermal, orientation=orientation, trigon_model=trigon_model, @@ -846,7 +846,7 @@ def apply_power_curve(da): ) da.attrs["units"] = "MWh/MWp" - return da.rename("specific generation") # type: ignore[no-any-return] + return da.rename("specific generation") def wind( @@ -925,7 +925,7 @@ def wind( if smooth: turbine_config = windturbine_smooth(turbine_config, params=smooth) - return cutout.convert_and_aggregate( # type: ignore[no-any-return] + return cutout.convert_and_aggregate( convert_func=convert_wind, turbine=turbine_config, interpolation_method=interpolation_method, @@ -1522,7 +1522,7 @@ def convert_line_rating( Position = namedtuple("Position", ["altitude", "azimuth"]) solar_position = Position(ds["solar_altitude"], ds["solar_azimuth"]) else: - solar_position = SolarPosition(ds) # type: ignore[assignment] + solar_position = SolarPosition(ds) Phi_s = arccos( cos(solar_position.altitude) * cos((solar_position.azimuth) - radians(psi)) ) @@ -1660,4 +1660,4 @@ def get_azimuth(shape): else: res = compute(res, **dask_kwargs) - return xr.concat(*res, dim=df.index).assign_attrs(units="A") # type: ignore[call-overload] + return xr.concat(*res, dim=df.index).assign_attrs(units="A") diff --git a/atlite/cutout.py b/atlite/cutout.py index 1b0208fe..d0aba70e 100644 --- a/atlite/cutout.py +++ b/atlite/cutout.py @@ -221,7 +221,7 @@ def __init__(self, path: PathLike, **cutoutparams: Any) -> None: data = xr.Dataset(coords=coords, attrs=attrs) # Check compatibility of CRS - modules = atleast_1d(data.attrs.get("module")) # type: ignore[arg-type] + modules = atleast_1d(data.attrs.get("module")) crs = {CRS(datamodules[m].crs) for m in modules} assert len(crs) == 1, f"CRS of {module} not compatible" @@ -240,7 +240,7 @@ def module(self) -> str | list[str]: """ Data module of the cutout. """ - return self.data.attrs.get("module") # type: ignore[no-any-return, return-value] + return self.data.attrs.get("module") # type: ignore[no-any-return] @property def crs(self) -> CRS: @@ -433,7 +433,8 @@ def sel( if bounds is not None: if buffer > 0: bounds = box(*bounds).buffer(buffer).bounds - x1, y1, x2, y2 = bounds # type: ignore[misc] + assert bounds is not None + x1, y1, x2, y2 = bounds kwargs.update(x=slice(x1, x2), y=slice(y1, y2)) data = self.data.sel(**kwargs) return Cutout(path, data=data) @@ -628,7 +629,7 @@ def equals(self, other: Any) -> bool: """ if not isinstance(other, Cutout): return NotImplemented # type: ignore[no-any-return] - return self.data.equals(other.data) # type: ignore[no-any-return] + return bool(self.data.equals(other.data)) def layout_from_capacity_list(self, data, col="Capacity"): """ diff --git a/atlite/datasets/cordex.py b/atlite/datasets/cordex.py index 5e6ab60a..7489f147 100644 --- a/atlite/datasets/cordex.py +++ b/atlite/datasets/cordex.py @@ -152,10 +152,10 @@ def tasks_yearly_cordex( model = meta_attrs["model"] if not isinstance(xs, slice): - first, second, last = xs.values[[0, 1, -1]] # type: ignore[attr-defined] + first, second, last = xs.values[[0, 1, -1]] xs = slice(first - 0.1 * (second - first), last + 0.1 * (second - first)) if not isinstance(ys, slice): - first, second, last = ys.values[[0, 1, -1]] # type: ignore[attr-defined] + first, second, last = ys.values[[0, 1, -1]] ys = slice(first - 0.1 * (second - first), last + 0.1 * (second - first)) return [ diff --git a/atlite/datasets/era5.py b/atlite/datasets/era5.py index f2b61efc..36287ca6 100644 --- a/atlite/datasets/era5.py +++ b/atlite/datasets/era5.py @@ -85,7 +85,7 @@ def _rename_and_clean_coords(ds: xr.Dataset, add_lon_lat: bool = True) -> xr.Dat ds = ds.assign_coords( x=np.round(ds.x.astype(float), 5), y=np.round(ds.y.astype(float), 5) ) - ds = maybe_swap_spatial_dims(ds) # type: ignore[assignment] + ds = maybe_swap_spatial_dims(ds) if add_lon_lat: ds = ds.assign_coords(lon=ds.coords["x"], lat=ds.coords["y"]) return ds.drop_vars(["expver", "number"], errors="ignore") @@ -259,7 +259,7 @@ def noisy_unlink(path: PathLike) -> None: def add_finalizer(ds: xr.Dataset, target: PathLike) -> None: logger.debug("Adding finalizer for %s", target) - weakref.finalize(ds._close.__self__.ds, noisy_unlink, target) # type: ignore[union-attr] + weakref.finalize(ds._close.__self__.ds, noisy_unlink, target) def sanitize_chunks(chunks: Any, **dim_mapping: str) -> Any: @@ -352,10 +352,10 @@ def retrieve_data( result = client.retrieve(product, request) if lock is None: - lock = nullcontext() # type: ignore[assignment] + lock = nullcontext() suffix = f".{request['data_format']}" - with lock: # type: ignore[union-attr] + with lock: fd, target = mkstemp(suffix=suffix, dir=tmpdir) os.close(fd) @@ -427,4 +427,4 @@ def retrieve_once(time: dict[str, Any]) -> xr.Dataset: else: datasets = map(retrieve_once, time_chunks) - return xr.concat(datasets, dim="time").sel(time=coords["time"]) # type: ignore[no-any-return] + return xr.concat(datasets, dim="time").sel(time=coords["time"]) diff --git a/atlite/datasets/ncep.py b/atlite/datasets/ncep.py index dc0c8a1e..2fdf52b2 100644 --- a/atlite/datasets/ncep.py +++ b/atlite/datasets/ncep.py @@ -81,7 +81,7 @@ def unaverage(da: xr.DataArray, dim: str = "forecast_time0") -> xr.DataArray: y = da * xr.DataArray( np.arange(1, len(coords) + 1), dims=[dim], coords={dim: coords} ) - return y - y.shift(**{dim: 1}).fillna(0.0) # type: ignore[arg-type] + return y - y.shift(**{dim: 1}).fillna(0.0) for k, da in ds.items(): assert isinstance(k, str) @@ -94,7 +94,7 @@ def unaverage(da: xr.DataArray, dim: str = "forecast_time0") -> xr.DataArray: def convert_unaccumulate_ncep(ds: xr.Dataset) -> xr.Dataset: def unaccumulate(da: xr.DataArray, dim: str = "forecast_time0") -> xr.DataArray: - return da - da.shift(**{dim: 1}).fillna(0.0) # type: ignore[arg-type] + return da - da.shift(**{dim: 1}).fillna(0.0) for k, da in ds.items(): assert isinstance(k, str) diff --git a/atlite/datasets/sarah.py b/atlite/datasets/sarah.py index f36d075f..d548eb5c 100644 --- a/atlite/datasets/sarah.py +++ b/atlite/datasets/sarah.py @@ -105,7 +105,7 @@ def _interpolate(a: Any) -> Any: dtypes = {da.dtype for da in data_vars} assert len(dtypes) == 1, "interpolate only supports datasets with homogeneous dtype" - return xr.apply_ufunc( # type: ignore[no-any-return] + return xr.apply_ufunc( _interpolate, ds, input_core_dims=[[dim]], @@ -162,13 +162,13 @@ def get_data( lon=ds.lon.astype(float).round(4), lat=ds.lat.astype(float).round(4) ) - ds = interpolate(ds) if creation_parameters["sarah_interpolate"] else ds.fillna(0) # type: ignore[assignment] + ds = interpolate(ds) if creation_parameters["sarah_interpolate"] else ds.fillna(0) if cutout.dt not in ["30min", "30T"]: ds = hourly_mean(ds) if (cutout.dx != dx) or (cutout.dy != dy): - ds = regrid(ds, coords["lon"], coords["lat"], resampling=Resampling.average) # type: ignore[assignment] + ds = regrid(ds, coords["lon"], coords["lat"], resampling=Resampling.average) dif_attrs = {"long_name": "Surface Diffuse Shortwave Flux", "units": "W m-2"} ds["influx_diffuse"] = (ds["SIS"] - ds["SID"]).assign_attrs(**dif_attrs) diff --git a/atlite/pv/irradiation.py b/atlite/pv/irradiation.py index 376bdfba..65f9cf22 100644 --- a/atlite/pv/irradiation.py +++ b/atlite/pv/irradiation.py @@ -82,7 +82,7 @@ def DiffuseHorizontalIrrad( else: raise KeyError("`clearsky model` must be chosen from 'simple' and 'enhanced'") - return (influx * fraction).rename("diffuse horizontal") # type: ignore[no-any-return] + return (influx * fraction).rename("diffuse horizontal") def TiltedDiffuseIrrad( @@ -143,7 +143,7 @@ def TiltedDiffuseIrrad( with np.errstate(invalid="ignore"): diffuse_t = diffuse_t.clip(min=0).fillna(0) - return diffuse_t.rename("diffuse tilted") # type: ignore[no-any-return] + return diffuse_t.rename("diffuse tilted") def TiltedDirectIrrad( @@ -171,7 +171,7 @@ def TiltedDirectIrrad( R_b = cosincidence / sinaltitude - return (R_b * direct).rename("direct tilted") # type: ignore[no-any-return] + return (R_b * direct).rename("direct tilted") def _albedo(ds: Dataset, influx: DataArray) -> DataArray: @@ -227,7 +227,7 @@ def TiltedGroundIrrad( """ surface_slope = surface_orientation["slope"] ground_t = influx * _albedo(ds, influx) * (1.0 - cos(surface_slope)) / 2.0 - return ground_t.rename("ground tilted") # type: ignore[no-any-return] + return ground_t.rename("ground tilted") def TiltedIrradiation( @@ -337,4 +337,4 @@ def clip(influx: DataArray, influx_max: DataArray) -> DataArray: result = result.where(~(cap_alt | (direct + diffuse <= 0.01)), 0) result.attrs["units"] = "W m**-2" - return result # type: ignore[no-any-return] + return result diff --git a/atlite/pv/orientation.py b/atlite/pv/orientation.py index 3e95d293..a7d7b4ed 100644 --- a/atlite/pv/orientation.py +++ b/atlite/pv/orientation.py @@ -91,21 +91,21 @@ def latitude_optimal( dict Mapping with ``slope`` and ``azimuth``. """ - slope = np.empty_like(lat.values) # type: ignore[union-attr] + slope = np.empty_like(lat.values) - below_25 = np.abs(lat.values) <= np.radians(25) # type: ignore[union-attr] - below_50 = np.abs(lat.values) <= np.radians(50) # type: ignore[union-attr] + below_25 = np.abs(lat.values) <= np.radians(25) + below_50 = np.abs(lat.values) <= np.radians(50) - slope[below_25] = 0.87 * np.abs(lat.values[below_25]) # type: ignore[union-attr] + slope[below_25] = 0.87 * np.abs(lat.values[below_25]) slope[~below_25 & below_50] = 0.76 * np.abs( - lat.values[~below_25 & below_50] # type: ignore[union-attr] + lat.values[~below_25 & below_50] ) + np.radians(0.31) slope[~below_50] = np.radians(40.0) - azimuth = np.where(lat.values < 0, 0, pi) # type: ignore[union-attr] + azimuth = np.where(lat.values < 0, 0, pi) return { - "slope": xr.DataArray(slope, coords=lat.coords), # type: ignore[union-attr] - "azimuth": xr.DataArray(azimuth, coords=lat.coords), # type: ignore[union-attr] + "slope": xr.DataArray(slope, coords=lat.coords), + "azimuth": xr.DataArray(azimuth, coords=lat.coords), } return latitude_optimal @@ -260,10 +260,10 @@ def SurfaceOrientation( surface_slope = arccos(cos(rotation) * cos(axis_tilt)) azimuth_difference = sun_azimuth - surface_azimuth - azimuth_difference = np.where( # type: ignore[assignment] + azimuth_difference = np.where( azimuth_difference > pi, azimuth_difference - 2 * pi, azimuth_difference ) - azimuth_difference = np.where( # type: ignore[assignment] + azimuth_difference = np.where( azimuth_difference < -pi, 2 * pi + azimuth_difference, azimuth_difference ) rotation = np.where( @@ -306,3 +306,11 @@ def SurfaceOrientation( "azimuth": surface_azimuth, } ) + + return xr.Dataset( + { + "cosincidence": cosincidence, + "slope": surface_slope, + "azimuth": surface_azimuth, + } + ) diff --git a/atlite/pv/solar_panel_model.py b/atlite/pv/solar_panel_model.py index 8eb6dec0..4659302d 100644 --- a/atlite/pv/solar_panel_model.py +++ b/atlite/pv/solar_panel_model.py @@ -46,7 +46,7 @@ def _power_huld( da = G_ * eff * pc.get("inverter_efficiency", 1.0) da.attrs["units"] = "kWh/kWp" - return da.rename("specific generation") # type: ignore[no-any-return] + return da.rename("specific generation") def _power_bofinger( @@ -78,7 +78,7 @@ def _power_bofinger( capacity = (pc["A"] + pc["B"] * 1000.0 + pc["C"] * np.log(1000.0)) * 1e3 power = irradiance * eta * (pc.get("inverter_efficiency", 1.0) / capacity) power = power.where(irradiance >= pc["threshold"], 0) - return power.rename("AC power") # type: ignore[no-any-return] + return power.rename("AC power") def SolarPanelModel( diff --git a/atlite/pv/solar_position.py b/atlite/pv/solar_position.py index 7efcdde6..97794e88 100644 --- a/atlite/pv/solar_position.py +++ b/atlite/pv/solar_position.py @@ -88,7 +88,7 @@ def SolarPosition(ds: Dataset, time_shift: str | pd.Timedelta = "0H") -> Dataset # Operations make new DataArray eager; reconvert to lazy dask arrays chunks = ds.chunksizes.get("time", "auto") if isinstance(chunks, tuple): - chunks = chunks[0] # type: ignore[assignment] + chunks = chunks[0] n = n.chunk(chunks) hour = hour.chunk(chunks) minute = minute.chunk(chunks) diff --git a/atlite/resource.py b/atlite/resource.py index 7bfcdeb4..cf5e49d1 100644 --- a/atlite/resource.py +++ b/atlite/resource.py @@ -17,8 +17,8 @@ import numpy as np import pandas as pd -import requests # type: ignore[import-untyped] -import yaml # type: ignore[import-untyped] +import requests +import yaml from dask.array import radians from scipy.signal import fftconvolve @@ -296,7 +296,7 @@ def windturbine_smooth( sigma: float = params.get("sigma", 2.29) def kernel(v_0: NDArray) -> NDArray: - return ( # type: ignore[no-any-return] + return ( 1.0 / np.sqrt(2 * np.pi * sigma * sigma) * np.exp(-(v_0 - Delta_v) * (v_0 - Delta_v) / (2 * sigma * sigma)) diff --git a/pyproject.toml b/pyproject.toml index f3c0c509..f97a62b2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -134,6 +134,7 @@ python_version = "3.11" warn_return_any = true warn_unused_configs = true ignore_missing_imports = true +warn_unused_ignores = true disallow_incomplete_defs = true check_untyped_defs = true exclude = [".venv/", "build/"] diff --git a/test/test_gis.py b/test/test_gis.py index ff971d73..e05dc813 100755 --- a/test/test_gis.py +++ b/test/test_gis.py @@ -264,27 +264,27 @@ def test_regrid(): fine = np.block([[ones * A, ones * B], [ones * C, ones * D]]) # add coordinates finecoords = np.arange(0.5, 8, 1) - fine = xr.DataArray(fine, coords=[("y", finecoords), ("x", finecoords)]) # type: ignore[assignment] + fine = xr.DataArray(fine, coords=[("y", finecoords), ("x", finecoords)]) coarsecoords = np.arange(2, 8, 4) coarse = xr.DataArray(np.nan, coords=[("y", coarsecoords), ("x", coarsecoords)]) # apply average resampling - res = regrid(fine, coarse.x, coarse.y, resampling=5) # type: ignore[arg-type] + res = regrid(fine, coarse.x, coarse.y, resampling=5) target = np.array([[A, B], [C, D]]) assert allclose(res, target) assert (coarse.x == res.x).all() and (coarse.y == res.y).all() # now test multiple layers - fine = xr.concat([fine] * 10, pd.Index(range(10), name="z")) # type: ignore[assignment, list-item] - res = regrid(fine, coarse.x, coarse.y, resampling=5) # type: ignore[arg-type] + fine = xr.concat([fine] * 10, pd.Index(range(10), name="z")) + res = regrid(fine, coarse.x, coarse.y, resampling=5) target = np.stack([np.array([[A, B], [C, D]])] * 10) assert allclose(res, target) assert (coarse.x == res.x).all() and (coarse.y == res.y).all() # now let the target grid cover a subarea of the original - fine = fine.sel(z=0, drop=True) # type: ignore[attr-defined] + fine = fine.sel(z=0, drop=True) coarsecoords = np.arange(1, 6, 2) coarse = xr.DataArray(np.nan, coords=[("y", coarsecoords), ("x", coarsecoords)]) @@ -589,14 +589,14 @@ def test_availability_matrix_rastered(ref, raster): ).rename_axis("shape") I = np.asarray(ref.indicatormatrix(shapes).todense()) I = I.reshape(shapes.shape + ref.shape) - I = xr.DataArray(I, coords=[shapes.index, ref.coords["y"], ref.coords["x"]]) # type: ignore[assignment] + I = xr.DataArray(I, coords=[shapes.index, ref.coords["y"], ref.coords["x"]]) excluder = ExclusionContainer(ref.crs, res=0.01) excluder.add_raster(raster) ds = ref.availabilitymatrix(shapes, excluder) eligible_share = 1 - raster_clip assert isclose(I.sum() * eligible_share, ds.sum(), atol=5) - assert_allclose(I.sum(["x", "y"]) * eligible_share, ds.sum(["x", "y"]), atol=5) # type: ignore[list-item] + assert_allclose(I.sum(["x", "y"]) * eligible_share, ds.sum(["x", "y"]), atol=5) excluder = ExclusionContainer(ref.crs, res=0.01) excluder.add_raster(raster) @@ -619,14 +619,14 @@ def test_availability_matrix_rastered_repro(ref, raster_reproject): ).rename_axis("shape") I = np.asarray(ref.indicatormatrix(shapes).todense()) I = I.reshape(shapes.shape + ref.shape) - I = xr.DataArray(I, coords=[shapes.index, ref.coords["y"], ref.coords["x"]]) # type: ignore[assignment] + I = xr.DataArray(I, coords=[shapes.index, ref.coords["y"], ref.coords["x"]]) excluder = ExclusionContainer() excluder.add_raster(raster_reproject) ds = ref.availabilitymatrix(shapes, excluder) eligible_share = 1 - raster_clip assert isclose(I.sum() * eligible_share, ds.sum(), atol=5) - assert_allclose(I.sum(["x", "y"]) * eligible_share, ds.sum(["x", "y"]), atol=5) # type: ignore[list-item] + assert_allclose(I.sum(["x", "y"]) * eligible_share, ds.sum(["x", "y"]), atol=5) def test_shape_availability_exclude_raster_codes(ref, raster_codes): @@ -650,7 +650,7 @@ def test_shape_availability_exclude_raster_codes(ref, raster_codes): # test with a function excluder = ExclusionContainer(ref.crs, res=res) - excluder.add_raster(raster_codes, codes=lambda x: x < 20, invert=True) # type: ignore[arg-type, return-value] + excluder.add_raster(raster_codes, codes=lambda x: x < 20, invert=True) masked, transform = shape_availability(shapes, excluder) assert ratio == masked.sum() / masked.size From 6591a145eb7817e97b96f34436f227e69a1734e3 Mon Sep 17 00:00:00 2001 From: Fabian Date: Mon, 16 Mar 2026 13:13:12 +0100 Subject: [PATCH 10/27] Enable ruff DOC/D417 rules for docstring-signature alignment and fix all 61 violations --- atlite/convert.py | 127 ++++++++++++++++----- atlite/cutout.py | 41 +++++-- atlite/data.py | 10 +- atlite/gis.py | 54 ++++++--- atlite/pv/irradiation.py | 18 +++ atlite/pv/orientation.py | 39 +++---- atlite/pv/solar_panel_model.py | 15 +++ atlite/pv/solar_position.py | 15 +-- atlite/resource.py | 35 ++++-- atlite/utils.py | 10 ++ atlite/wind.py | 28 ++--- doc/conf.py | 14 +-- examples/historic-comparison-germany.ipynb | 77 +++++++------ pyproject.toml | 7 +- test/test_preparation_and_conversion.py | 16 ++- 15 files changed, 349 insertions(+), 157 deletions(-) diff --git a/atlite/convert.py b/atlite/convert.py index f0c3e73e..80017345 100644 --- a/atlite/convert.py +++ b/atlite/convert.py @@ -77,6 +77,10 @@ def convert_and_aggregate( Parameters ---------- + cutout : atlite.Cutout + The cutout to process. + convert_func : callable + Callback like convert_wind, convert_pv. matrix : N x S - xr.DataArray or sp.sparse.csr_matrix or None If given, it is used to aggregate the grid cells to buses. N is the number of buses, S the number of spatial coordinates, in the @@ -112,11 +116,8 @@ def convert_and_aggregate( Whether to show a progress bar. dask_kwargs : dict, default {} Dict with keyword arguments passed to ``dask.compute``. - - Other Parameters - ---------------- - convert_func : Function - Callback like convert_wind, convert_pv + **convert_kwds : Any + Additional keyword arguments passed to ``convert_func``. Returns ------- @@ -145,6 +146,11 @@ def convert_and_aggregate( The installed units per bus in MW corresponding to ``layout`` (only if ``return_capacity`` is True). + Raises + ------ + ValueError + If deprecated parameters conflict or invalid arguments are provided. + See Also -------- wind : Generate wind generation time-series. @@ -448,8 +454,10 @@ def coefficient_of_performance( difference from source to sink. The defaults for either source (c0, c1, c2) are based on a quadratic regression in [1]. - Paramterers - ----------- + Parameters + ---------- + cutout : atlite.Cutout + The cutout to process. source : str The heat source. Can be 'air' or 'soil'. sink_T : float @@ -460,6 +468,13 @@ def coefficient_of_performance( The linear regression coefficient for the temperature difference. c2 : float The quadratic regression coefficient for the temperature difference. + **params + Additional keyword arguments passed to `convert_and_aggregate`. + + Returns + ------- + xr.DataArray + Coefficient of performance time-series. Reference --------- @@ -555,6 +570,8 @@ def heat_demand( Parameters ---------- + cutout : atlite.Cutout + The cutout to process. threshold : float Outside temperature in degrees Celsius above which there is no heat demand. @@ -565,6 +582,13 @@ def heat_demand( temperature (e.g. due to water heating). hour_shift : float Time shift relative to UTC for taking daily average + **params : Any + Additional keyword arguments passed to `convert_and_aggregate`. + + Returns + ------- + xr.DataArray + Heat demand time-series. Note ---- @@ -659,6 +683,8 @@ def cooling_demand( Parameters ---------- + cutout : atlite.Cutout + The cutout to process. threshold : float Outside temperature in degrees Celsius below which there is no cooling demand. The default 23C is taken as a more liberal @@ -672,6 +698,13 @@ def cooling_demand( temperature (e.g. due to ventilation). hour_shift : float Time shift relative to UTC for taking daily average + **params : Any + Additional keyword arguments passed to `convert_and_aggregate`. + + Returns + ------- + xr.DataArray + Cooling demand time-series. Note ---- @@ -762,7 +795,8 @@ def solar_thermal( Parameters ---------- - cutout : cutout + cutout : atlite.Cutout + The cutout to process. orientation : dict or str or function Panel orientation with slope and azimuth (units of degrees), or 'latitude_optimal'. @@ -775,6 +809,13 @@ def solar_thermal( Parameters for model in [1] (defaults to 0.8 and 3., respectively) t_store : float Store temperature in degree Celsius + **params : Any + Additional keyword arguments passed to `convert_and_aggregate`. + + Returns + ------- + xr.DataArray + Solar thermal generation time-series. Note ---- @@ -865,6 +906,8 @@ def wind( Parameters ---------- + cutout : atlite.Cutout + The cutout to process. turbine : str or dict A turbineconfig dictionary with the keys 'hub_height' for the hub height and 'V', 'POW' defining the power curve. @@ -885,6 +928,8 @@ def wind( interpolation_method : {"logarithmic", "power"} Law to interpolate wind speed to turbine hub height. Refer to :py:func:`atlite.wind.extrapolate_wind_speed`. + **params : Any + Additional keyword arguments passed to `convert_and_aggregate`. Returns ------- @@ -897,6 +942,11 @@ def wind( You can also specify all of the general conversion arguments documented in the :py:func:`convert_and_aggregate` function. + References + ---------- + .. [1] Andresen G B, Søndergaard A A and Greiner M 2015 Energy 93, Part 1 + 1074 – 1088. doi:10.1016/j.energy.2015.09.071 + Examples -------- Aggregate wind generation to bus regions: @@ -912,11 +962,6 @@ def wind( ('time', 'y', 'x') >>> location_cf = cf.sel(x=6.9, y=53.1, method="nearest") - References - ---------- - .. [1] Andresen G B, Søndergaard A A and Greiner M 2015 Energy 93, Part 1 - 1074 – 1088. doi:10.1016/j.energy.2015.09.071 - """ turbine_config = get_windturbineconfig( turbine, add_cutout_windspeed=add_cutout_windspeed @@ -992,6 +1037,8 @@ def irradiation( Parameters ---------- + cutout : atlite.Cutout + The cutout to process. orientation : str, dict or callback Panel orientation can be chosen from either 'latitude_optimal', a constant orientation {'slope': 0.0, @@ -1016,6 +1063,8 @@ def irradiation( model. The default choice of None will choose dependending on data availability, since the 'enhanced' model also incorporates ambient air temperature and relative humidity. + **params : Any + Additional keyword arguments passed to `convert_and_aggregate`. Returns ------- @@ -1094,6 +1143,8 @@ def pv(cutout, panel, orientation, tracking=None, clearsky_model=None, **params) Parameters ---------- + cutout : atlite.Cutout + The cutout to process. panel : str or dict Panel config dictionary with the parameters for the electrical model in [3]. Alternatively, name of yaml file stored in @@ -1115,6 +1166,8 @@ def pv(cutout, panel, orientation, tracking=None, clearsky_model=None, **params) model. The default choice of None will choose dependending on data availability, since the 'enhanced' model also incorporates ambient air temperature and relative humidity. + **params : Any + Additional keyword arguments passed to `convert_and_aggregate`. Returns ------- @@ -1127,19 +1180,6 @@ def pv(cutout, panel, orientation, tracking=None, clearsky_model=None, **params) You can also specify all of the general conversion arguments documented in the :py:func:`convert_and_aggregate` function. - Examples - -------- - Aggregate PV generation to bus regions: - - >>> pv = cutout.pv(panel="CSi", orientation="latitude_optimal", - ... matrix=matrix, index=buses, per_unit=True) - - Get per-cell capacity factor time series (no aggregation): - - >>> cf = cutout.pv(panel="CSi", orientation="latitude_optimal", - ... aggregate_time=False) - >>> location_cf = cf.sel(x=6.9, y=53.1, method="nearest") - References ---------- [1] Soteris A. Kalogirou. Solar Energy Engineering: Processes and Systems, @@ -1153,6 +1193,19 @@ def pv(cutout, panel, orientation, tracking=None, clearsky_model=None, **params) the Performance Check of Grid Connected Systems, Freiburg, June 2004. Eurosun (ISES Europe Solar Congress). + Examples + -------- + Aggregate PV generation to bus regions: + + >>> pv = cutout.pv(panel="CSi", orientation="latitude_optimal", + ... matrix=matrix, index=buses, per_unit=True) + + Get per-cell capacity factor time series (no aggregation): + + >>> cf = cutout.pv(panel="CSi", orientation="latitude_optimal", + ... aggregate_time=False) + >>> location_cf = cf.sel(x=6.9, y=53.1, method="nearest") + """ if isinstance(panel, (str | Path)): panel = get_solarpanelconfig(panel) @@ -1185,6 +1238,11 @@ def convert_csp(ds, installation): ------- xr.DataArray CSP output as specific yield per unit of reference capacity. + + Raises + ------ + ValueError + If the CSP technology option is not recognized. """ solar_position = SolarPosition(ds) @@ -1224,6 +1282,8 @@ def csp(cutout, installation, technology=None, **params): Parameters ---------- + cutout : atlite.Cutout + The cutout to process. installation: str or xr.DataArray CSP installation details determining the solar field efficiency dependent on the local solar position. Can be either the name of one of the standard @@ -1234,6 +1294,8 @@ def csp(cutout, installation, technology=None, **params): Overwrite CSP technology from the installation configuration. The technology affects which direct radiation is considered. Either 'parabolic trough' (DHI) or 'solar tower' (DNI). + **params + Additional keyword arguments passed to `convert_and_aggregate`. Returns ------- @@ -1362,7 +1424,8 @@ def runoff( normalize_using_yearly_i = normalize_using_yearly_i.astype(int) years = ( - pd.Series(pd.to_datetime(result.coords["time"].values).year) + pd + .Series(pd.to_datetime(result.coords["time"].values).year) .value_counts() .loc[lambda x: x > 8700] .index.intersection(normalize_using_yearly_i) @@ -1394,6 +1457,8 @@ def hydro( Parameters ---------- + cutout : atlite.Cutout + The cutout to process. plants : pd.DataFrame Run-of-river plants or dams with lon, lat columns. hydrobasins : str|gpd.GeoDataFrame @@ -1406,6 +1471,13 @@ def hydro( better for coarser resolution). show_progress : bool Whether to display progressbars. + **kwargs + Additional keyword arguments passed to `convert_and_aggregate`. + + Returns + ------- + xr.DataArray + Inflow time-series for each plant. References ---------- @@ -1555,6 +1627,7 @@ def line_rating( Parameters ---------- cutout : atlite.Cutout + The cutout to process. shapes : geopandas.GeoSeries Line shapes of the lines. line_resistance : float/series diff --git a/atlite/cutout.py b/atlite/cutout.py index d0aba70e..aaddbfb2 100644 --- a/atlite/cutout.py +++ b/atlite/cutout.py @@ -137,6 +137,8 @@ def __init__(self, path: PathLike, **cutoutparams: Any) -> None: data : xr.Dataset User provided cutout data. Save the cutout using `Cutout.to_file()` afterwards. + **cutoutparams + Additional keyword arguments. See Other Parameters below. Other Parameters ---------------- @@ -150,13 +152,20 @@ def __init__(self, path: PathLike, **cutoutparams: Any) -> None: Whether to interpolate NaN's in the SARAH data. This takes effect for sarah data which has missing data for areas where dawn and nightfall happens (ca. 30 min gap). - gebco_path: str + gebco_path : str Path to find the gebco NetCDF file. Only necessary when including the gebco module. parallel : bool, default False Whether to open dataset in parallel mode. Take effect for all xr.open_mfdataset usages. + Raises + ------ + TypeError + If required arguments are missing when building a new cutout. + ValueError + If ``bounds`` has an invalid format. + """ path = Path(path).with_suffix(".nc") chunks = cutoutparams.pop("chunks", {"time": 100}) @@ -289,9 +298,12 @@ def extent(self) -> NDArray: """ xs, ys = self.coords["x"].values, self.coords["y"].values dx, dy = self.dx, self.dy - return np.array( - [xs[0] - dx / 2, xs[-1] + dx / 2, ys[0] - dy / 2, ys[-1] + dy / 2] - ) + return np.array([ + xs[0] - dx / 2, + xs[-1] + dx / 2, + ys[0] - dy / 2, + ys[-1] + dy / 2, + ]) @property def bounds(self) -> NDArray: @@ -536,6 +548,9 @@ def indicatormatrix( Parameters ---------- shapes : Collection of shapely polygons + Shapes to compute the indicator matrix for. + shapes_crs : int or CRS, default 4326 + CRS of the shapes. Returns ------- @@ -558,8 +573,10 @@ def intersectionmatrix( Parameters ---------- - orig : Collection of shapely polygons - dest : Collection of shapely polygons + shapes : Collection of shapely polygons + Shapes to compute the intersection matrix for. + shapes_crs : int or CRS, default 4326 + CRS of the shapes. Returns ------- @@ -597,6 +614,11 @@ def area(self, crs: CrsLike = None) -> DataArray: def uniform_layout(self) -> DataArray: """ Get a uniform capacity layout for all grid cells. + + Returns + ------- + xr.DataArray + Layout with value 1 for all grid cells. """ return xr.DataArray(1, [self.coords["y"], self.coords["x"]]) @@ -625,7 +647,12 @@ def uniform_density_layout( def equals(self, other: Any) -> bool: """ - It overrides xarray.Dataset.equals and ignores the path attribute in the comparison + It overrides xarray.Dataset.equals and ignores the path attribute in the comparison. + + Returns + ------- + bool + Whether the two cutouts are equal. """ if not isinstance(other, Cutout): return NotImplemented # type: ignore[no-any-return] diff --git a/atlite/data.py b/atlite/data.py index 4a209d5c..bae7f325 100644 --- a/atlite/data.py +++ b/atlite/data.py @@ -123,7 +123,8 @@ def available_features(module: str | Sequence[str] | None = None) -> pd.Series[s """ features: dict[str, Any] = {name: m.features for name, m in datamodules.items()} features_frame: pd.Series[Any] = ( - pd.DataFrame(features) + pd + .DataFrame(features) .unstack() .dropna() .rename_axis(index=["module", "feature"]) @@ -211,6 +212,7 @@ def cutout_prepare( Parameters ---------- cutout : atlite.Cutout + The cutout to process. features : str/list, optional Feature(s) to be prepared. The default slice(None) results in all available features. @@ -250,8 +252,10 @@ def cutout_prepare( Raises ------ - NotADirectoryError - The argument `tmpdir` is not a valid path. + ValueError + If ``tmpdir`` is None. + FileNotFoundError + If ``tmpdir`` does not exist. """ if dask_kwargs is None: diff --git a/atlite/gis.py b/atlite/gis.py index ab7c5ad8..a4b0f69b 100644 --- a/atlite/gis.py +++ b/atlite/gis.py @@ -89,13 +89,11 @@ def get_coords( x = slice(*sorted([x.start, x.stop])) y = slice(*sorted([y.start, y.stop])) - ds = xr.Dataset( - { - "x": np.round(np.arange(-180, 180, dx), 9), - "y": np.round(np.arange(-90, 90, dy), 9), - "time": pd.date_range(start="1940", end="now", freq=dt), - } - ) + ds = xr.Dataset({ + "x": np.round(np.arange(-180, 180, dx), 9), + "y": np.round(np.arange(-90, 90, dy), 9), + "time": pd.date_range(start="1940", end="now", freq=dt), + }) ds = ds.assign_coords(lon=ds.coords["x"], lat=ds.coords["y"]) ds = ds.sel(x=x, y=y, time=time) return cast("Dataset", ds) @@ -175,7 +173,13 @@ def compute_indicatormatrix( Parameters ---------- orig : Collection of shapely polygons + Origin polygons. dest : Collection of shapely polygons + Destination polygons. + orig_crs : int or CRS, default 4326 + CRS of the origin polygons. + dest_crs : int or CRS, default 4326 + CRS of the destination polygons. Returns ------- @@ -226,7 +230,13 @@ def compute_intersectionmatrix( Parameters ---------- orig : Collection of shapely polygons + Origin polygons. dest : Collection of shapely polygons + Destination polygons. + orig_crs : int or CRS, default 4326 + CRS of the origin polygons. + dest_crs : int or CRS, default 4326 + CRS of the destination polygons. Returns ------- @@ -566,6 +576,8 @@ def add_raster( Buffer around the excluded areas in units of ExclusionContainer.crs. Use this to create a buffer around the excluded/included area. The default is 0. + nodata : int, optional + Value to use for nodata pixels. The default is 255. invert : bool, optional Whether to exclude (False) or include (True) the specified areas of the raster. The default is False. @@ -615,6 +627,11 @@ def add_geometry( def open_files(self) -> None: """ Open rasters and load geometries. + + Raises + ------ + ValueError + If a raster has an invalid CRS and none is provided. """ for d in self.rasters: raster = d["raster"] @@ -705,6 +722,11 @@ def compute_shape_availability( transform : rasterion.Affine Affine transform of the mask. + Raises + ------ + ValueError + If only some of ``dst_transform``, ``dst_crs``, ``dst_shape`` are given. + """ if isinstance(geometry, gpd.GeoDataFrame): geometry = geometry.geometry @@ -756,8 +778,9 @@ def plot_shape_availability( Geometry of which the eligible area is computed. If the series contains more than one geometry, the eligble area of the combined geometries is computed. - ax : matplotlib Axis, optional - set_title: boolean, optional + ax : matplotlib.axes.Axes, optional + Axes to plot on. If None, a new figure is created. + set_title : boolean, optional Whether to set the title with additional information on the share of eligible land. dst_transform : rasterio.Affine @@ -1040,7 +1063,8 @@ def _reproject(src: NDArray, **kwargs: Any) -> NDArray: return cast( "Dataset | DataArray", ( - xr.apply_ufunc( + xr + .apply_ufunc( _reproject, ds, input_core_dims=[[namey, namex]], @@ -1053,12 +1077,10 @@ def _reproject(src: NDArray, **kwargs: Any) -> NDArray: kwargs=kwargs, ) .rename({"yout": namey, "xout": namex}) - .assign_coords( - **{ - namey: (namey, dimy.data, ds.coords[namey].attrs), - namex: (namex, dimx.data, ds.coords[namex].attrs), - } - ) + .assign_coords(**{ + namey: (namey, dimy.data, ds.coords[namey].attrs), + namex: (namex, dimx.data, ds.coords[namex].attrs), + }) .assign_attrs(**ds.attrs) ), ) diff --git a/atlite/pv/irradiation.py b/atlite/pv/irradiation.py index 65f9cf22..e28a5d87 100644 --- a/atlite/pv/irradiation.py +++ b/atlite/pv/irradiation.py @@ -39,6 +39,11 @@ def DiffuseHorizontalIrrad( ------- xarray.DataArray Diffuse horizontal irradiation. + + Raises + ------ + KeyError + If ``clearsky_model`` is not ``'simple'`` or ``'enhanced'``. """ sinaltitude = sin(solar_position["altitude"]) influx_toa = ds["influx_toa"] @@ -189,6 +194,11 @@ def _albedo(ds: Dataset, influx: DataArray) -> DataArray: ------- xarray.DataArray Surface albedo. + + Raises + ------ + AssertionError + If the dataset lacks both ``albedo`` and ``outflux`` variables. """ if "albedo" in ds: return ds["albedo"] @@ -262,6 +272,8 @@ def TiltedIrradiation( incorporates ambient air temperature and relative humidity. NOTE: this option is only used if the used climate dataset doesn't provide direct and diffuse irradiation separately! + tracking : int or str, default 0 + Type of solar tracking. 0 for fixed, other values for tracking modes. altitude_threshold : float Threshold for solar altitude in degrees. Values in range (0, altitude_threshold] will be set to zero. Default value equals 1.0 degrees. @@ -278,6 +290,12 @@ def TiltedIrradiation( result : xarray.DataArray The desired irradiation quantity on the tilted surface. + Raises + ------ + AssertionError + If the dataset lacks required irradiation variables. + ValueError + If ``irradiation`` is not a recognized type. """ influx_toa = ds["influx_toa"] diff --git a/atlite/pv/orientation.py b/atlite/pv/orientation.py index a7d7b4ed..18b42a39 100644 --- a/atlite/pv/orientation.py +++ b/atlite/pv/orientation.py @@ -64,11 +64,10 @@ def make_latitude_optimal() -> Callable[ [2] http://dx.doi.org/10.1016/j.solener.2010.12.014 [3] https://github.com/renewables-ninja/gsee/blob/master/gsee/pv.py - Parameters - ---------- - lat : float - Latitude in degrees. - + Returns + ------- + callable + Orientation function returning latitude-optimal ``slope`` and ``azimuth``. """ def latitude_optimal( @@ -211,6 +210,16 @@ def SurfaceOrientation( """ Compute cos(incidence) for slope and panel azimuth. + Returns + ------- + xarray.Dataset + Dataset with ``cosincidence``, ``slope``, and ``azimuth``. + + Raises + ------ + AssertionError + If ``tracking`` is not a recognized tracking type. + References ---------- [1] Sproul, A. B., Derivation of the solar geometric relationships using @@ -299,18 +308,8 @@ def SurfaceOrientation( cosincidence = cosincidence.clip(min=0) - return xr.Dataset( - { - "cosincidence": cosincidence, - "slope": surface_slope, - "azimuth": surface_azimuth, - } - ) - - return xr.Dataset( - { - "cosincidence": cosincidence, - "slope": surface_slope, - "azimuth": surface_azimuth, - } - ) + return xr.Dataset({ + "cosincidence": cosincidence, + "slope": surface_slope, + "azimuth": surface_azimuth, + }) diff --git a/atlite/pv/solar_panel_model.py b/atlite/pv/solar_panel_model.py index 4659302d..c16166f5 100644 --- a/atlite/pv/solar_panel_model.py +++ b/atlite/pv/solar_panel_model.py @@ -25,6 +25,11 @@ def _power_huld( [1] Huld, T. et al., 2010. Mapping the performance of PV modules, effects of module type and data averaging. Solar Energy, 84(2), p.324-338. DOI: 10.1016/j.solener.2009.12.002 + + Returns + ------- + xr.DataArray + Specific generation in kWh/kWp. """ # normalized module temperature T_ = (pc["c_temp_amb"] * t_amb + pc["c_temp_irrad"] * irradiance) - pc["r_tmod"] @@ -61,6 +66,11 @@ def _power_bofinger( [2] Hans Beyer, Gerd Heilscher and Stefan Bofinger, 2004. A robust model for the MPP performance of different types of PV-modules applied for the performance check of grid connected systems. + + Returns + ------- + xr.DataArray + Specific generation in kWh/kWp. """ fraction = (pc["NOCT"] - pc["Tamb"]) / pc["Intc"] @@ -100,6 +110,11 @@ def SolarPanelModel( ------- xarray.DataArray Specific PV power output. + + Raises + ------ + AssertionError + If the panel model is unknown. """ model: Literal["huld", "bofinger"] = pc.get("model", "huld") diff --git a/atlite/pv/solar_position.py b/atlite/pv/solar_position.py index 97794e88..c83002c7 100644 --- a/atlite/pv/solar_position.py +++ b/atlite/pv/solar_position.py @@ -33,16 +33,18 @@ def SolarPosition(ds: Dataset, time_shift: str | pd.Timedelta = "0H") -> Dataset instantenous data (e.g. SARAH). Must be parseable by pandas.to_timedelta(). Default: "0H" + Returns + ------- + xarray.Dataset + Dataset with ``altitude`` and ``azimuth`` in radians. + References ---------- - [1] Michalsky, J. J., The astronomical almanac’s algorithm for approximate + [1] Michalsky, J. J., The astronomical almanac's algorithm for approximate solar position (1950–2050), Solar Energy, 40(3), 227–235 (1988). [2] Sproul, A. B., Derivation of the solar geometric relationships using vector analysis, Renewable Energy, 32(7), 1187–1205 (2007). [3] Kalogirou, Solar Energy Engineering (2009). - - More accurate algorithms would be - --------------------------------- [4] I. Reda and A. Andreas, Solar position algorithm for solar radiation applications. Solar Energy, vol. 76, no. 5, pp. 577-589, 2004. [5] I. Reda and A. Andreas, Corrigendum to Solar position algorithm for @@ -50,11 +52,6 @@ def SolarPosition(ds: Dataset, time_shift: str | pd.Timedelta = "0H") -> Dataset [6] Blanc, P., & Wald, L., The SG2 algorithm for a fast and accurate computation of the position of the sun for multi-decadal time period, Solar Energy, 86(10), 3072–3083 (2012). - - The unfortunately quite computationally intensive SPA algorithm [4,5] has - been implemented using numba or plain numpy for a single location at - https://github.com/pvlib/pvlib-python/blob/master/pvlib/spa.py. - """ # Act like a getter if these return variables are already in ds rvs = { diff --git a/atlite/resource.py b/atlite/resource.py index cf5e49d1..cfb835b7 100644 --- a/atlite/resource.py +++ b/atlite/resource.py @@ -94,6 +94,11 @@ def get_windturbineconfig( config : dict Config with details on the turbine + Raises + ------ + KeyError + If ``turbine`` is not a str, Path, or dict. + """ if not isinstance(turbine, (str, Path, dict)): raise KeyError( @@ -195,12 +200,10 @@ def get_cspinstallationconfig(installation: str | PathLike) -> CSPConfig: da = da.to_xarray()["value"] da = da.rename({"azimuth": "azimuth [deg]", "altitude": "altitude [deg]"}) - da = da.assign_coords( - { - "altitude": radians(da["altitude [deg]"]), - "azimuth": radians(da["azimuth [deg]"]), - } - ) + da = da.assign_coords({ + "altitude": radians(da["altitude [deg]"]), + "azimuth": radians(da["azimuth [deg]"]), + }) da = da.swap_dims({"altitude [deg]": "altitude", "azimuth [deg]": "azimuth"}) da = da.chunk("auto") @@ -225,6 +228,11 @@ def solarpanel_rated_capacity_per_unit(panel: str | PathLike | PanelConfig) -> f ------- float Rated capacity per unit area or per panel, depending on the model. + + Raises + ------ + ValueError + If the panel model is unknown. """ if isinstance(panel, (str, Path)): panel = get_solarpanelconfig(panel) @@ -357,6 +365,11 @@ def _validate_turbine_config_dict( dict validated and potentially modified turbine config dict + Raises + ------ + ValueError + If the turbine config dict is missing required keys or has invalid values. + """ if not all(key in turbine for key in ("POW", "V", "P", "hub_height")): err_msg = ( @@ -438,6 +451,10 @@ def get_oedb_windturbineconfig( >>> get_oedb_windturbineconfig(name="E-53/800", manufacturer="Enercon") {'V': ..., 'POW': ..., ...} + Raises + ------ + RuntimeError + If no turbine or multiple turbines match the search. """ # Parse information of different allowed 'turbine' values if isinstance(search, int): @@ -544,6 +561,6 @@ def get_oedb_windturbineconfig( _oedb_turbines = None windturbines = arrowdict({p.stem: p for p in WINDTURBINE_DIRECTORY.glob("*.yaml")}) solarpanels = arrowdict({p.stem: p for p in SOLARPANEL_DIRECTORY.glob("*.yaml")}) -cspinstallations = arrowdict( - {p.stem: p for p in CSPINSTALLATION_DIRECTORY.glob("*.yaml")} -) +cspinstallations = arrowdict({ + p.stem: p for p in CSPINSTALLATION_DIRECTORY.glob("*.yaml") +}) diff --git a/atlite/utils.py b/atlite/utils.py index f2df1904..c3d26d82 100644 --- a/atlite/utils.py +++ b/atlite/utils.py @@ -35,6 +35,16 @@ def migrate_from_cutout_directory(old_cutout_dir: PathLike, path: PathLike) -> Dataset: """ Convert an old style cutout directory to new style netcdf file. + + Returns + ------- + xarray.Dataset + The migrated cutout data. + + Raises + ------ + MergeError + If automatic migration of multi-file datasets fails. """ old_cutout_dir = Path(old_cutout_dir) with xr.open_dataset(old_cutout_dir / "meta.nc") as meta: diff --git a/atlite/wind.py b/atlite/wind.py index 6b86f02f..47f7e154 100644 --- a/atlite/wind.py +++ b/atlite/wind.py @@ -57,8 +57,12 @@ def extrapolate_wind_speed( Raises ------ + AssertionError + If no wind speed variables are found in the dataset. RuntimeError - If the cutout is missing the data for the chosen `method` + If the cutout is missing the data for the chosen `method`. + ValueError + If ``method`` is not ``'logarithmic'`` or ``'power'``. References ---------- @@ -75,9 +79,9 @@ def extrapolate_wind_speed( return ds[to_name] if from_height is None: - heights: NDArray = np.asarray( - [int(str(s)[3:-1]) for s in ds if re.match(r"wnd\d+m", str(s))] - ) + heights: NDArray = np.asarray([ + int(str(s)[3:-1]) for s in ds if re.match(r"wnd\d+m", str(s)) + ]) if len(heights) == 0: raise AssertionError("Wind speed is not in dataset") @@ -113,14 +117,12 @@ def extrapolate_wind_speed( f"Interpolation method must be 'logarithmic' or 'power', but is: {method}" ) - wnd_spd.attrs.update( - { - "long name": ( - f"extrapolated {to_height} m wind speed using {method_desc} " - f" and {from_height} m wind speed" - ), - "units": "m s**-1", - } - ) + wnd_spd.attrs.update({ + "long name": ( + f"extrapolated {to_height} m wind speed using {method_desc} " + f" and {from_height} m wind speed" + ), + "units": "m s**-1", + }) return cast("DataArray", wnd_spd.rename(to_name)) diff --git a/doc/conf.py b/doc/conf.py index 3b3a61a9..770e7ea5 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -33,7 +33,7 @@ # ones. extensions = [ "sphinx.ext.autodoc", - #'sphinx.ext.autosummary', + # 'sphinx.ext.autosummary', "sphinx.ext.intersphinx", "sphinx.ext.todo", "sphinx.ext.mathjax", @@ -41,8 +41,8 @@ "nbsphinx", "nbsphinx_link", # 'sphinx.ext.pngmath', - #'sphinxcontrib.tikz', - #'rinoh.frontend.sphinx', + # 'sphinxcontrib.tikz', + # 'rinoh.frontend.sphinx', "sphinx.ext.imgconverter", # for SVG conversion ] @@ -255,13 +255,13 @@ latex_elements: dict[str, str] = { # The paper size ('letterpaper' or 'a4paper'). - #'papersize': 'letterpaper', + # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). - #'pointsize': '10pt', + # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. - #'preamble': '', + # 'preamble': '', # Latex figure (float) alignment - #'figure_align': 'htbp', + # 'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples diff --git a/examples/historic-comparison-germany.ipynb b/examples/historic-comparison-germany.ipynb index f60cc169..37ac7a5e 100644 --- a/examples/historic-comparison-germany.ipynb +++ b/examples/historic-comparison-germany.ipynb @@ -261,31 +261,34 @@ "\n", " Parameters\n", " ----------\n", - " typ : str\n", - " Type of energy source, e.g. \"Solarstrom\" (PV), \"Windenergie\" (wind).\n", - " cap_range : (optional) list-like\n", - " Two entries, limiting the lower and upper range of capacities (in kW)\n", - " to include. Left-inclusive, right-exclusive.\n", - " until : str\n", - " String representation of a datetime object understood by pandas.to_datetime()\n", - " for limiting to installations existing until this datetime.\n", + " typ : str\n", + " Type of energy source, e.g. \"Solarstrom\" (PV), \"Windenergie\" (wind).\n", + " cap_range : (optional) list-like\n", + " Two entries, limiting the lower and upper range of capacities (in kW)\n", + " to include. Left-inclusive, right-exclusive.\n", + " until : str\n", + " String representation of a datetime object understood by pandas.to_datetime()\n", + " for limiting to installations existing until this datetime.\n", + "\n", + " Returns\n", + " -------\n", + " pandas.DataFrame\n", + " Filtered capacities.\n", "\n", " \"\"\"\n", "\n", " # Load locations of installed capacities and remove incomplete entries\n", - " cols = OrderedDict(\n", - " (\n", - " (\"installation_date\", 0),\n", - " (\"plz\", 2),\n", - " (\"city\", 3),\n", - " (\"type\", 6),\n", - " (\"capacity\", 8),\n", - " (\"level\", 9),\n", - " (\"lat\", 19),\n", - " (\"lon\", 20),\n", - " (\"validation\", 22),\n", - " )\n", - " )\n", + " cols = OrderedDict((\n", + " (\"installation_date\", 0),\n", + " (\"plz\", 2),\n", + " (\"city\", 3),\n", + " (\"type\", 6),\n", + " (\"capacity\", 8),\n", + " (\"level\", 9),\n", + " (\"lat\", 19),\n", + " (\"lon\", 20),\n", + " (\"validation\", 22),\n", + " ))\n", " database = pd.read_csv(\n", " \"eeg_anlagenregister_2015.08.utf8.csv\",\n", " sep=\";\",\n", @@ -455,9 +458,10 @@ ], "source": [ "compare = (\n", - " pd.DataFrame(\n", - " {\"atlite\": pv.squeeze().to_series(), \"opsd\": opsd[\"DE_solar_generation_actual\"]}\n", - " )\n", + " pd.DataFrame({\n", + " \"atlite\": pv.squeeze().to_series(),\n", + " \"opsd\": opsd[\"DE_solar_generation_actual\"],\n", + " })\n", " / 1e3\n", ") # in GW\n", "compare.resample(\"1W\").mean().plot(figsize=(8, 5))\n", @@ -492,12 +496,10 @@ "source": [ "pv_opt = cutout.pv(panel=\"CSi\", orientation=\"latitude_optimal\", layout=solar_layout)\n", "compare_opt = (\n", - " pd.DataFrame(\n", - " {\n", - " \"atlite\": pv_opt.squeeze().to_series(),\n", - " \"opsd\": opsd[\"DE_solar_generation_actual\"],\n", - " }\n", - " )\n", + " pd.DataFrame({\n", + " \"atlite\": pv_opt.squeeze().to_series(),\n", + " \"opsd\": opsd[\"DE_solar_generation_actual\"],\n", + " })\n", " / 1e3\n", ") # in GW\n", "compare_opt.resample(\"1W\").mean().plot(figsize=(8, 5))\n", @@ -1236,13 +1238,11 @@ }, "outputs": [], "source": [ - "compare = pd.DataFrame(\n", - " {\n", - " \"atlite\": wind[\"total\"].squeeze().to_series(),\n", - " \"< 1600 kW\": wind[\"< 1600.0 kW\"].squeeze().to_series(),\n", - " \"opsd\": opsd[\"DE_wind_generation_actual\"],\n", - " }\n", - ")\n", + "compare = pd.DataFrame({\n", + " \"atlite\": wind[\"total\"].squeeze().to_series(),\n", + " \"< 1600 kW\": wind[\"< 1600.0 kW\"].squeeze().to_series(),\n", + " \"opsd\": opsd[\"DE_wind_generation_actual\"],\n", + "})\n", "\n", "compare = compare / 1e3 # in GW" ] @@ -1400,7 +1400,8 @@ " filter(lambda r: r.attributes[\"iso_3166_2\"].startswith(\"DE\"), shp.records())\n", ")\n", "laender = (\n", - " gpd.GeoDataFrame([{**r.attributes, \"geometry\": r.geometry} for r in de_records])\n", + " gpd\n", + " .GeoDataFrame([{**r.attributes, \"geometry\": r.geometry} for r in de_records])\n", " .rename(columns={\"iso_3166_2\": \"state\"})\n", " .set_index(\"state\")\n", " .set_crs(4236)\n", diff --git a/pyproject.toml b/pyproject.toml index f97a62b2..dc815f69 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -81,6 +81,7 @@ include = ["atlite"] [tool.ruff] extend-include = ['*.ipynb'] +preview = true [tool.ruff.lint] select = [ @@ -99,6 +100,7 @@ select = [ 'NPY', # numpy 'G', # flake8-logging-format 'PTH', # flake8-use-pathlib + 'DOC', # pydoclint: docstring-signature alignment 'RUF013', # ruff: implicit-optional 'RUF100', # ruff: unused-noqa ] @@ -125,10 +127,11 @@ ignore = [ 'D205', # 1 blank line required between summary line and description 'D400', # First line should end with a period 'D415', # First line should end with a period, question mark, or exclamation point - 'D417', # Missing argument descriptions in the docstring - ] +[tool.ruff.lint.pydocstyle] +convention = "numpy" + [tool.mypy] python_version = "3.11" warn_return_any = true diff --git a/test/test_preparation_and_conversion.py b/test/test_preparation_and_conversion.py index 56411eb2..3fe953e2 100644 --- a/test/test_preparation_and_conversion.py +++ b/test/test_preparation_and_conversion.py @@ -107,7 +107,8 @@ def pv_test(cutout, time=TIME, skip_optimal_sum_test=False): return_capacity=True, ) cap_per_region = ( - cells.assign(cap_factor=cap_factor.stack(spatial=["y", "x"]).values) + cells + .assign(cap_factor=cap_factor.stack(spatial=["y", "x"]).values) .groupby("regions") .cap_factor.sum() ) @@ -229,7 +230,7 @@ def csp_test(cutout): Test the atlite.Cutout.csp function with different for different settings and technologies. """ - ## Test technology = "solar tower" + # Test technology = "solar tower" st = cutout.csp(atlite.cspinstallations.SAM_solar_tower, capacity_factor=True) assert st.notnull().all() @@ -241,7 +242,7 @@ def csp_test(cutout): ll = cutout.csp(atlite.cspinstallations.lossless_installation) assert (st <= ll).all() - ## Test technology = "parabolic trough" + # Test technology = "parabolic trough" pt = cutout.csp(atlite.cspinstallations.SAM_parabolic_trough, capacity_factor=True) assert pt.notnull().all() @@ -511,9 +512,7 @@ def test_pv_tracking_era5(cutout_era5): @staticmethod def test_pv_era5_2days_crossing_months(cutout_era5_2days_crossing_months): - """ - See https://github.com/PyPSA/atlite/issues/256. - """ + """See https://github.com/PyPSA/atlite/issues/256.""" # noqa: DOC201 return pv_test(cutout_era5_2days_crossing_months, time="2013-03-01") @staticmethod @@ -533,6 +532,11 @@ def test_pv_era5_and_era5t(cutout_era5t): Note: the above page says that ERA5 data are made available with a *3* month delay, but experience shows that it's with a *2* month delay. Hence the test with previous vs. second-previous month. + + Returns + ------- + object + PV test result. """ today = date.today() first_day_this_month = today.replace(day=1) From 80d6cdbdeaceb670d21e00e53992d2cac7849be8 Mon Sep 17 00:00:00 2001 From: Fabian Date: Tue, 24 Mar 2026 10:17:50 +0100 Subject: [PATCH 11/27] Address PR review: add 'legacy' default, drop False, extract _aggregate_time helper, consistent keep_attrs, use fixtures in tests --- atlite/convert.py | 74 ++++++++++++++--------------- test/test_aggregate_time.py | 92 ++++++++++++++++--------------------- 2 files changed, 78 insertions(+), 88 deletions(-) diff --git a/atlite/convert.py b/atlite/convert.py index 0a1a89cc..250cb785 100644 --- a/atlite/convert.py +++ b/atlite/convert.py @@ -47,6 +47,14 @@ from atlite.resource import TurbineConfig +def _aggregate_time(da: xr.DataArray, method: str | None) -> xr.DataArray: + if method == "sum": + return da.sum("time", keep_attrs=True) + elif method == "mean": + return da.mean("time", keep_attrs=True) + return da + + def convert_and_aggregate( cutout, convert_func, @@ -57,7 +65,7 @@ def convert_and_aggregate( shapes_crs=4326, per_unit=False, return_capacity=False, - aggregate_time: Literal["sum", "mean", False] | None = None, + aggregate_time: Literal["sum", "mean", "legacy"] | None = "legacy", capacity_factor=False, capacity_factor_timeseries=False, show_progress=False, @@ -93,17 +101,18 @@ def convert_and_aggregate( return_capacity : boolean Additionally returns the installed capacity at each bus corresponding to ``layout`` (defaults to False). - aggregate_time : "sum", "mean", False, or None + aggregate_time : "sum", "mean", "legacy", or None Controls temporal aggregation of results. ``"sum"`` sums over time, - ``"mean"`` averages over time, ``False`` returns full timeseries. - ``None`` keeps the historical default behavior: time-summed results + ``"mean"`` averages over time, ``None`` returns full timeseries. + ``"legacy"`` (default) preserves historical behavior: time-summed without spatial aggregation and full timeseries with spatial - aggregation. Replaces the deprecated ``capacity_factor`` and + aggregation; this option is deprecated and will be removed in a + future release. Replaces the deprecated ``capacity_factor`` and ``capacity_factor_timeseries`` parameters. capacity_factor : boolean Deprecated. Use ``aggregate_time="mean"`` instead. capacity_factor_timeseries : boolean - Deprecated. Use ``aggregate_time=False`` instead (which is the default). + Deprecated. Use ``aggregate_time=None`` instead (which is the default). show_progress : boolean, default False Whether to show a progress bar. dask_kwargs : dict, default {} @@ -126,16 +135,16 @@ def convert_and_aggregate( **Without aggregation** (none of the above given): - - ``aggregate_time=False``: per-cell timeseries ``(time, y, x)``. + - ``aggregate_time=None``: per-cell timeseries ``(time, y, x)``. - ``aggregate_time="mean"``: time-averaged per cell ``(y, x)``. - ``aggregate_time="sum"``: time-summed per cell ``(y, x)``. Legacy behavior (deprecated): + - ``aggregate_time="legacy"``: historical context-dependent default. - ``capacity_factor_timeseries=True``: equivalent to - ``aggregate_time=False``. + ``aggregate_time=None``. - ``capacity_factor=True``: equivalent to ``aggregate_time="mean"``. - - No flags: historical default behavior. units : xr.DataArray (optional) The installed units per bus in MW corresponding to ``layout`` @@ -147,21 +156,22 @@ def convert_and_aggregate( pv : Generate solar PV generation time-series. """ - if ( - aggregate_time is not None - and aggregate_time is not False - and aggregate_time - not in ( - "sum", - "mean", - ) - ): + if aggregate_time not in ("sum", "mean", "legacy", None): raise ValueError( - f"aggregate_time must be 'sum', 'mean', False, or None, got {aggregate_time!r}" + f"aggregate_time must be 'sum', 'mean', 'legacy', or None, " + f"got {aggregate_time!r}" + ) + + if aggregate_time == "legacy": + warnings.warn( + "aggregate_time='legacy' is deprecated and will be removed in a " + "future release. Pass 'sum', 'mean', or None explicitly.", + FutureWarning, + stacklevel=2, ) if capacity_factor or capacity_factor_timeseries: - if aggregate_time is not None and aggregate_time is not False: + if aggregate_time != "legacy": raise ValueError( "Cannot use 'aggregate_time' together with deprecated " "'capacity_factor' or 'capacity_factor_timeseries'." @@ -176,11 +186,11 @@ def convert_and_aggregate( if capacity_factor_timeseries: warnings.warn( "capacity_factor_timeseries is deprecated. " - "Use aggregate_time=False instead.", + "Use aggregate_time=None instead.", FutureWarning, stacklevel=2, ) - aggregate_time = False + aggregate_time = None func_name = convert_func.__name__.replace("convert_", "") logger.info(f"Convert and aggregate '{func_name}'.") @@ -195,13 +205,8 @@ def convert_and_aggregate( "given for `per_unit` or `return_capacity`" ) - effective_aggregate_time = "sum" if aggregate_time is None else aggregate_time - if effective_aggregate_time == "mean": - res = da.mean("time") - elif effective_aggregate_time == "sum": - res = da.sum("time", keep_attrs=True) - else: - res = da + agg = "sum" if aggregate_time == "legacy" else aggregate_time + res = _aggregate_time(da, agg) return maybe_progressbar(res, show_progress, **dask_kwargs) if matrix is not None: @@ -259,11 +264,8 @@ def convert_and_aggregate( else: results.attrs["units"] = "MW" - effective_aggregate_time = False if aggregate_time is None else aggregate_time - if effective_aggregate_time == "mean": - results = results.mean("time") - elif effective_aggregate_time == "sum": - results = results.sum("time", keep_attrs=True) + if aggregate_time != "legacy": + results = _aggregate_time(results, aggregate_time) if return_capacity: return maybe_progressbar(results, show_progress, **dask_kwargs), capacity @@ -715,7 +717,7 @@ def wind( Get per-cell capacity factor time series (no aggregation): >>> cf = cutout.wind(turbine="Vestas_V112_3MW", - ... aggregate_time=False) + ... aggregate_time=None) >>> cf.dims ('time', 'y', 'x') >>> location_cf = cf.sel(x=6.9, y=53.1, method="nearest") @@ -899,7 +901,7 @@ def pv(cutout, panel, orientation, tracking=None, clearsky_model=None, **params) Get per-cell capacity factor time series (no aggregation): >>> cf = cutout.pv(panel="CSi", orientation="latitude_optimal", - ... aggregate_time=False) + ... aggregate_time=None) >>> location_cf = cf.sel(x=6.9, y=53.1, method="nearest") References diff --git a/test/test_aggregate_time.py b/test/test_aggregate_time.py index 8a3f4ee2..86f0f469 100644 --- a/test/test_aggregate_time.py +++ b/test/test_aggregate_time.py @@ -2,9 +2,8 @@ # # SPDX-License-Identifier: MIT -import warnings - import numpy as np +import pandas as pd import pytest import xarray as xr @@ -15,8 +14,6 @@ class MockCutout: def __init__(self, data): self.data = data grid_coords = np.array([(x, y) for y in data.y.values for x in data.x.values]) - import pandas as pd - self.grid = pd.DataFrame(grid_coords, columns=["x", "y"]) @@ -45,8 +42,8 @@ def cutout(): class TestAggregateTimeNoSpatial: - def test_aggregate_time_false_returns_timeseries(self, cutout): - result = convert_and_aggregate(cutout, identity_convert, aggregate_time=False) + def test_aggregate_time_none_returns_timeseries(self, cutout): + result = convert_and_aggregate(cutout, identity_convert, aggregate_time=None) assert "time" in result.dims def test_aggregate_time_mean(self, cutout): @@ -61,57 +58,51 @@ def test_aggregate_time_sum(self, cutout): expected = cutout.data["var"].sum("time") np.testing.assert_allclose(result.values, expected.values) - def test_default_no_spatial_aggregates_over_time(self, cutout): - result = convert_and_aggregate(cutout, identity_convert) + def test_legacy_default_no_spatial_sums_over_time(self, cutout): + with pytest.warns(FutureWarning, match="aggregate_time='legacy'"): + result = convert_and_aggregate(cutout, identity_convert) expected = cutout.data["var"].sum("time") assert "time" not in result.dims xr.testing.assert_identical(result, expected) +@pytest.fixture +def layout(cutout): + return xr.DataArray( + np.ones((3, 4)), + dims=["y", "x"], + coords={"y": cutout.data.y, "x": cutout.data.x}, + ) + + +@pytest.fixture +def result_ts(cutout, layout): + return convert_and_aggregate( + cutout, identity_convert, layout=layout, aggregate_time=None + ) + + class TestAggregateTimeWithSpatial: - def test_aggregate_time_mean_with_layout(self, cutout): - layout = xr.DataArray( - np.ones((3, 4)), - dims=["y", "x"], - coords={"y": cutout.data.y, "x": cutout.data.x}, - ) - result_ts = convert_and_aggregate( - cutout, - identity_convert, - layout=layout, - aggregate_time=False, - ) + def test_aggregate_time_mean_with_layout(self, cutout, layout, result_ts): result_mean = convert_and_aggregate( - cutout, - identity_convert, - layout=layout, - aggregate_time="mean", + cutout, identity_convert, layout=layout, aggregate_time="mean" ) assert "time" in result_ts.dims assert "time" not in result_mean.dims np.testing.assert_allclose(result_mean.values, result_ts.mean("time").values) - def test_aggregate_time_sum_with_layout(self, cutout): - layout = xr.DataArray( - np.ones((3, 4)), - dims=["y", "x"], - coords={"y": cutout.data.y, "x": cutout.data.x}, - ) - result_ts = convert_and_aggregate( - cutout, - identity_convert, - layout=layout, - aggregate_time=False, - ) + def test_aggregate_time_sum_with_layout(self, cutout, layout, result_ts): result_sum = convert_and_aggregate( - cutout, - identity_convert, - layout=layout, - aggregate_time="sum", + cutout, identity_convert, layout=layout, aggregate_time="sum" ) assert "time" not in result_sum.dims np.testing.assert_allclose(result_sum.values, result_ts.sum("time").values) + def test_legacy_default_with_layout_returns_timeseries(self, cutout, layout): + with pytest.warns(FutureWarning, match="aggregate_time='legacy'"): + result = convert_and_aggregate(cutout, identity_convert, layout=layout) + assert "time" in result.dims + def test_aggregate_time_with_per_unit(self, cutout): layout = xr.DataArray( np.ones((3, 4)) * 2.0, @@ -132,33 +123,26 @@ def test_aggregate_time_with_per_unit(self, cutout): identity_convert, layout=layout, per_unit=True, - aggregate_time=False, + aggregate_time=None, ) np.testing.assert_allclose(result_pu.values, result_pu_ts.mean("time").values) class TestDeprecatedParams: def test_capacity_factor_warns(self, cutout): - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") + with pytest.warns(FutureWarning, match="capacity_factor is deprecated"): result = convert_and_aggregate( cutout, identity_convert, capacity_factor=True ) - assert any( - "capacity_factor is deprecated" in str(warning.message) for warning in w - ) assert "time" not in result.dims def test_capacity_factor_timeseries_warns(self, cutout): - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") + with pytest.warns( + FutureWarning, match="capacity_factor_timeseries is deprecated" + ): result = convert_and_aggregate( cutout, identity_convert, capacity_factor_timeseries=True ) - assert any( - "capacity_factor_timeseries is deprecated" in str(warning.message) - for warning in w - ) assert "time" in result.dims def test_capacity_factor_with_aggregate_time_raises(self, cutout): @@ -176,6 +160,10 @@ def test_invalid_aggregate_time_value(self, cutout): with pytest.raises(ValueError, match="aggregate_time must be"): convert_and_aggregate(cutout, identity_convert, aggregate_time="invalid") + def test_aggregate_time_false_raises(self, cutout): + with pytest.raises(ValueError, match="aggregate_time must be"): + convert_and_aggregate(cutout, identity_convert, aggregate_time=False) + def test_aggregate_time_true_raises(self, cutout): with pytest.raises(ValueError, match="aggregate_time must be"): convert_and_aggregate(cutout, identity_convert, aggregate_time=True) From 5a2fba925f13eb1370fb5ebba8cc9a492282eb34 Mon Sep 17 00:00:00 2001 From: Fabian Date: Tue, 24 Mar 2026 11:08:09 +0100 Subject: [PATCH 12/27] update release notes --- RELEASE_NOTES.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index ffb4a504..ccdf483b 100755 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -18,7 +18,7 @@ Upcoming Release ``pip install git+https://github.com/pypsa/atlite``. -* Add ``aggregate_time={"sum", "mean", False}`` to ``convert_and_aggregate`` for temporal +* Add ``aggregate_time={"sum", "mean", None}`` to ``convert_and_aggregate`` for temporal aggregation with and without spatial aggregation, and deprecate ``capacity_factor``/``capacity_factor_timeseries`` in favor of it From 0f1d4643d3a6b722a40d77f35c90b78275f28946 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 24 Mar 2026 10:15:29 +0000 Subject: [PATCH 13/27] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- atlite/datasets/era5.py | 12 ++-- atlite/hydro.py | 9 ++- .../building_stock_weather_aggregation.ipynb | 12 ++-- examples/plotting_with_atlite.ipynb | 6 +- examples/solarpv_tracking_options.ipynb | 6 +- examples/working-with-csp.ipynb | 68 +++++++++---------- 6 files changed, 57 insertions(+), 56 deletions(-) diff --git a/atlite/datasets/era5.py b/atlite/datasets/era5.py index 36287ca6..34e4be61 100644 --- a/atlite/datasets/era5.py +++ b/atlite/datasets/era5.py @@ -178,13 +178,11 @@ def get_data_temperature(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: ) ds = _rename_and_clean_coords(ds) - return ds.rename( - { - "t2m": "temperature", - "stl4": "soil temperature", - "d2m": "dewpoint temperature", - } - ) + return ds.rename({ + "t2m": "temperature", + "stl4": "soil temperature", + "d2m": "dewpoint temperature", + }) def get_data_runoff(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: diff --git a/atlite/hydro.py b/atlite/hydro.py index e1f9818e..47e3c37e 100644 --- a/atlite/hydro.py +++ b/atlite/hydro.py @@ -112,9 +112,12 @@ def determine_basins( f"but received `type(hydrobasins) = {type(hydrobasins)}`" ) - missing_columns = pd.Index( - ["HYBAS_ID", "DIST_MAIN", "NEXT_DOWN", "geometry"] - ).difference(hydrobasins.columns) + missing_columns = pd.Index([ + "HYBAS_ID", + "DIST_MAIN", + "NEXT_DOWN", + "geometry", + ]).difference(hydrobasins.columns) assert missing_columns.empty, ( "Couldn't find the column(s) {} in the hydrobasins dataset.".format( ", ".join(missing_columns) diff --git a/examples/building_stock_weather_aggregation.ipynb b/examples/building_stock_weather_aggregation.ipynb index fafc2373..727847c5 100644 --- a/examples/building_stock_weather_aggregation.ipynb +++ b/examples/building_stock_weather_aggregation.ipynb @@ -307,7 +307,8 @@ "# Clip the raster data and reproject the result back into EPSG:4326 to match the cutout,\n", "# also remove some unnecessary dimensions via `squeeze()`\n", "layout = (\n", - " population_density.rio.clip(\n", + " population_density.rio\n", + " .clip(\n", " finland_3035.geometry, from_disk=True\n", " ) # Clip the population density raster data with the reprojected Finland shape.\n", " .rio.reproject( # Reproject and resample the population density raster to match the cutout.\n", @@ -445,7 +446,8 @@ "dirs = {\"north\": 0.0, \"east\": 90.0, \"south\": 180.0, \"west\": 270.0}\n", "for name, lout in layouts.items():\n", " irr_total[name] = {\n", - " d: cutout.irradiation(\n", + " d: cutout\n", + " .irradiation(\n", " orientation={\"slope\": 90.0, \"azimuth\": az}, layout=lout.fillna(0.0)\n", " )\n", " .squeeze()\n", @@ -453,7 +455,8 @@ " for d, az in dirs.items()\n", " }\n", " irr_direct[name] = {\n", - " d: cutout.irradiation(\n", + " d: cutout\n", + " .irradiation(\n", " orientation={\"slope\": 90.0, \"azimuth\": az},\n", " layout=lout.fillna(0.0),\n", " irradiation=\"direct\",\n", @@ -463,7 +466,8 @@ " for d, az in dirs.items()\n", " }\n", " irr_diffuse[name] = {\n", - " d: cutout.irradiation(\n", + " d: cutout\n", + " .irradiation(\n", " orientation={\"slope\": 90.0, \"azimuth\": az},\n", " layout=lout.fillna(0.0),\n", " irradiation=\"diffuse\",\n", diff --git a/examples/plotting_with_atlite.ipynb b/examples/plotting_with_atlite.ipynb index f3031235..d69cbfa8 100644 --- a/examples/plotting_with_atlite.ipynb +++ b/examples/plotting_with_atlite.ipynb @@ -401,7 +401,8 @@ "cells_generation = sites.merge(cells, how=\"inner\").rename(pd.Series(sites.index))\n", "\n", "layout = (\n", - " xr.DataArray(cells_generation.set_index([\"y\", \"x\"]).capacity.unstack())\n", + " xr\n", + " .DataArray(cells_generation.set_index([\"y\", \"x\"]).capacity.unstack())\n", " .reindex_like(cap_factors)\n", " .rename(\"Installed Capacity [MW]\")\n", ")\n", @@ -519,7 +520,8 @@ " spine.set_edgecolor(\"white\")\n", "\n", "power_generation = (\n", - " cutout.wind(\"Vestas_V112_3MW\", layout=layout.fillna(0), shapes=UkIr)\n", + " cutout\n", + " .wind(\"Vestas_V112_3MW\", layout=layout.fillna(0), shapes=UkIr)\n", " .to_pandas()\n", " .rename_axis(index=\"\", columns=\"shapes\")\n", ")\n", diff --git a/examples/solarpv_tracking_options.ipynb b/examples/solarpv_tracking_options.ipynb index 436c39a7..50a49205 100644 --- a/examples/solarpv_tracking_options.ipynb +++ b/examples/solarpv_tracking_options.ipynb @@ -408,9 +408,9 @@ "source": [ "day_profiles = [ds.loc[day, point].squeeze() for ds in data]\n", "\n", - "df = pd.DataFrame(\n", - " {k: v.to_series() for k, v in zip(labels, day_profiles, strict=False)}\n", - ")\n", + "df = pd.DataFrame({\n", + " k: v.to_series() for k, v in zip(labels, day_profiles, strict=False)\n", + "})\n", "df.plot(figsize=(10, 5))\n", "plt.title(\"PV Tracking: Portugal @(-9°, 40°), May 1, 2019\")" ] diff --git a/examples/working-with-csp.ipynb b/examples/working-with-csp.ipynb index 4795e0e1..f34c1f06 100644 --- a/examples/working-with-csp.ipynb +++ b/examples/working-with-csp.ipynb @@ -588,18 +588,14 @@ ], "source": [ "# Calculate time-series for layout with both installation configurations\n", - "time_series = xr.merge(\n", - " [\n", - " cutout.csp(\n", - " installation=\"lossless_installation\",\n", - " technology=\"solar tower\",\n", - " layout=layout,\n", - " ).rename(\"lossless_installation\"),\n", - " cutout.csp(installation=\"SAM_solar_tower\", layout=layout).rename(\n", - " \"SAM_solar_tower\"\n", - " ),\n", - " ]\n", - ")\n", + "time_series = xr.merge([\n", + " cutout.csp(\n", + " installation=\"lossless_installation\",\n", + " technology=\"solar tower\",\n", + " layout=layout,\n", + " ).rename(\"lossless_installation\"),\n", + " cutout.csp(installation=\"SAM_solar_tower\", layout=layout).rename(\"SAM_solar_tower\"),\n", + "])\n", "\n", "# Load reference time-series from file\n", "df = pd.read_csv(\"../profiles_and_efficiencies_from_sam/ST-salt_time-series_spain.csv\")\n", @@ -769,18 +765,16 @@ "layout.loc[{\"x\": nearest_location[\"x\"], \"y\": nearest_location[\"y\"]}] = installed_power\n", "\n", "# Calculate time-series for layout with both installation configurations\n", - "time_series = xr.merge(\n", - " [\n", - " cutout.csp(\n", - " installation=\"lossless_installation\",\n", - " technology=\"parabolic trough\",\n", - " layout=layout,\n", - " ).rename(\"lossless_installation\"),\n", - " cutout.csp(installation=\"SAM_parabolic_trough\", layout=layout).rename(\n", - " \"SAM_parabolic_trough\"\n", - " ),\n", - " ]\n", - ")\n", + "time_series = xr.merge([\n", + " cutout.csp(\n", + " installation=\"lossless_installation\",\n", + " technology=\"parabolic trough\",\n", + " layout=layout,\n", + " ).rename(\"lossless_installation\"),\n", + " cutout.csp(installation=\"SAM_parabolic_trough\", layout=layout).rename(\n", + " \"SAM_parabolic_trough\"\n", + " ),\n", + "])\n", "\n", "# Load reference time-series from file\n", "df = pd.read_csv(\n", @@ -953,17 +947,15 @@ ], "source": [ "# Calculate time-series for layout with both installation configurations\n", - "time_series = xr.merge(\n", - " [\n", - " cutout_sarah.csp(installation=\"SAM_parabolic_trough\", layout=layout).rename(\n", - " \"SARAH\"\n", - " ),\n", - " cutout.csp(\n", - " installation=\"SAM_parabolic_trough\",\n", - " layout=layout,\n", - " ).rename(\"ERA5\"),\n", - " ]\n", - ")\n", + "time_series = xr.merge([\n", + " cutout_sarah.csp(installation=\"SAM_parabolic_trough\", layout=layout).rename(\n", + " \"SARAH\"\n", + " ),\n", + " cutout.csp(\n", + " installation=\"SAM_parabolic_trough\",\n", + " layout=layout,\n", + " ).rename(\"ERA5\"),\n", + "])\n", "\n", "# Load reference NREL SAM time-series from file\n", "df = pd.read_csv(\n", @@ -1189,14 +1181,16 @@ "# Interpolate values to a finer grid and fill missing values by extrapolation\n", "# Order is relevant: Start with Azimuth (where we have sufficient values) and then continue with altitude\n", "da = (\n", - " da.interpolate_na(\"azimuth\")\n", + " da\n", + " .interpolate_na(\"azimuth\")\n", " .interpolate_na(\"altitude\")\n", " .interpolate_na(\"azimuth\", fill_value=\"extrapolate\")\n", ")\n", "\n", "# Use rolling horizon to smooth values, average over 3x3 adjacent values per pixel\n", "da = (\n", - " da.rolling(azimuth=3, altitude=3)\n", + " da\n", + " .rolling(azimuth=3, altitude=3)\n", " .mean()\n", " .interpolate_na(\"altitude\", fill_value=\"extrapolate\")\n", " .interpolate_na(\"azimuth\", fill_value=\"extrapolate\")\n", From dfdcc97927fc55ab9123b46552896623d03992bb Mon Sep 17 00:00:00 2001 From: Fabian Date: Thu, 26 Mar 2026 12:42:12 +0100 Subject: [PATCH 14/27] Add docstrings to era5/convert modules and replace str with Literal types across codebase - Add NumPy-style docstrings to all functions in datasets/era5.py and improve convert.py docstrings - Define shared Literal type aliases (TrackingType, ClearskyModel, TrigonModel, etc.) in _types.py - Update ~30 function signatures across convert.py, pv/, and data.py to use Literal types - Fix CSPConfig TypedDict missing 'technology' key and widen orientation/panel param types --- atlite/__init__.py | 2 +- atlite/_types.py | 104 +------- atlite/convert.py | 530 ++++++++++++++++++++++++++------------- atlite/data.py | 6 +- atlite/datasets/era5.py | 291 +++++++++++++++++++++ atlite/gis.py | 16 +- atlite/pv/irradiation.py | 22 +- atlite/pv/orientation.py | 23 +- atlite/resource.py | 1 + atlite/utils.py | 49 +++- 10 files changed, 740 insertions(+), 304 deletions(-) diff --git a/atlite/__init__.py b/atlite/__init__.py index e05ca56f..03a78626 100644 --- a/atlite/__init__.py +++ b/atlite/__init__.py @@ -33,7 +33,7 @@ __version__ = version("atlite") # e.g. "0.17.0" # TODO, in the network structure it should use the dev version match = re.match(r"(\d+\.\d+(\.\d+)?)", __version__) -assert match, f"Could not determine release_version of pypsa: {__version__}" +assert match, f"Could not determine release_version of atlite: {__version__}" release_version = match.group(0) __all__ = [ diff --git a/atlite/_types.py b/atlite/_types.py index 3338d665..98bb6a00 100644 --- a/atlite/_types.py +++ b/atlite/_types.py @@ -5,7 +5,7 @@ from __future__ import annotations from pathlib import Path -from typing import TYPE_CHECKING, Any, Literal, TypeAlias, TypedDict +from typing import Any, Literal, TypeAlias, TypedDict import geopandas as gpd import numpy as np @@ -14,9 +14,6 @@ from pyproj import CRS from shapely.geometry.base import BaseGeometry -if TYPE_CHECKING: - from collections.abc import Callable, Sequence - NDArray: TypeAlias = np.ndarray[Any, np.dtype[np.floating[Any]]] NDArrayInt: TypeAlias = np.ndarray[Any, np.dtype[np.signedinteger[Any]]] NDArrayBool: TypeAlias = np.ndarray[Any, np.dtype[np.bool_]] @@ -31,37 +28,15 @@ CrsLike: TypeAlias = str | int | CRS | dict[str, Any] | None SparseMatrix: TypeAlias = sp.lil_matrix | sp.csr_matrix - -class CutoutPrepareConfig(TypedDict, total=False): - datasets: list[str] - months: list[int] - start_year: int - end_year: int - - -class DatasetConfig(TypedDict, total=False): - module: str - version: str - years: list[int] - - -class ConversionConfig(TypedDict, total=False): - data_source: str - temperature: bool - wind_speed: bool - solar_irradiance: bool - - -class PVConfig(TypedDict, total=False): - tracking: Literal["fixed", "horizontal", "vertical", "two_axis"] - orientation: Literal["south", "fixed"] - tilt: float | None - azimuth: float | None - racking: Literal[ - "open_rack_cell_glued_back", - "close_mount_cell_glued_back", - "open_rack_polymer_thinfilm_copper_covered_edge", - ] +TrackingType: TypeAlias = ( + Literal["horizontal", "tilted_horizontal", "vertical", "dual"] | None +) +ClearskyModel: TypeAlias = Literal["simple", "enhanced"] +TrigonModel: TypeAlias = Literal["simple", "perez"] +IrradiationType: TypeAlias = Literal["total", "direct", "diffuse", "ground"] +HeatPumpSource: TypeAlias = Literal["air", "soil"] +OrientationName: TypeAlias = Literal["latitude_optimal", "constant", "latitude"] +DataFormat: TypeAlias = Literal["grib", "netcdf"] class ERA5RetrievalParams(TypedDict, total=False): @@ -77,62 +52,3 @@ class ERA5RetrievalParams(TypedDict, total=False): day: list[str] | str time: str | list[str] variable: str | list[str] - - -class SarahCreationParams(TypedDict, total=False): - sarah_dir: str | Path - parallel: bool - sarah_interpolate: bool - - -class GebcoCreationParams(TypedDict, total=False): - gebco_path: str | Path - - -class TaskDict(TypedDict, total=False): - prepare_func: Callable[..., Any] - xs: Any - ys: Any - yearmonths: list[tuple[int, int]] - fn: str | Path - year: int - month: int | list[int] - yearmonth: tuple[int, int] - engine: str - oldname: str - newname: str - template: str - drop_time_vars: bool - - -class CSPConfig(TypedDict, total=False): - turbine: str - capacity: float - - -class WindConfig(TypedDict, total=False): - turbine: str - capacity: float - hub_height: float | None - - -class LayoutConfig(TypedDict, total=False): - layout: DataArray | None - capacity: float | None - - -class ShapeConfig(TypedDict, total=False): - shapes: Sequence[Geometry] | None - shapes_crs: CrsLike - - -class AggregationConfig(TypedDict, total=False): - matrix: SparseMatrix | DataArray | None - index: Any - per_unit: bool - return_capacity: bool - aggregate_time: Literal["sum", "mean"] | bool | None - capacity_factor: bool - capacity_factor_timeseries: bool - show_progress: bool - dask_kwargs: dict[str, Any] diff --git a/atlite/convert.py b/atlite/convert.py index 5ea4fb16..169d6d64 100644 --- a/atlite/convert.py +++ b/atlite/convert.py @@ -46,12 +46,24 @@ if TYPE_CHECKING: from collections.abc import Callable - from atlite._types import DataArray, Dataset, NumericArray + from atlite._types import ( + ClearskyModel, + DataArray, + Dataset, + HeatPumpSource, + IrradiationType, + NumericArray, + OrientationName, + TrackingType, + TrigonModel, + ) from atlite.cutout import Cutout - from atlite.resource import TurbineConfig + from atlite.resource import CSPConfig, PanelConfig, TurbineConfig -def _aggregate_time(da: xr.DataArray, method: str | None) -> xr.DataArray: +def _aggregate_time( + da: xr.DataArray, method: Literal["sum", "mean"] | None +) -> xr.DataArray: if method == "sum": return da.sum("time", keep_attrs=True) if method == "mean": @@ -331,6 +343,27 @@ def convert_temperature(ds: Dataset) -> DataArray: def temperature(cutout: Cutout, **params: Any) -> DataArray | NumericArray: + """ + Return ambient air temperature converted from Kelvin to degrees Celsius. + + Parameters + ---------- + cutout : atlite.Cutout + The cutout to process. + **params : Any + Additional keyword arguments passed to + :py:func:`convert_and_aggregate`. + + Returns + ------- + xr.DataArray + Ambient temperature in °C. + + Note + ---- + This function also accepts all keyword arguments of + :py:func:`convert_and_aggregate`. + """ return cutout.convert_and_aggregate(convert_func=convert_temperature, **params) @@ -358,6 +391,30 @@ def convert_soil_temperature(ds: Dataset) -> DataArray: def soil_temperature(cutout: Cutout, **params: Any) -> DataArray | NumericArray: + """ + Return soil temperature converted from Kelvin to degrees Celsius. + + Sea grid cells, where soil temperature is undefined, are filled with 0.0 + so they do not contribute during spatial aggregation. + + Parameters + ---------- + cutout : atlite.Cutout + The cutout to process. + **params : Any + Additional keyword arguments passed to + :py:func:`convert_and_aggregate`. + + Returns + ------- + xr.DataArray + Soil temperature in °C. + + Note + ---- + This function also accepts all keyword arguments of + :py:func:`convert_and_aggregate`. + """ return cutout.convert_and_aggregate(convert_func=convert_soil_temperature, **params) @@ -381,6 +438,27 @@ def convert_dewpoint_temperature(ds: Dataset) -> DataArray: def dewpoint_temperature(cutout: Cutout, **params: Any) -> DataArray | NumericArray: + """ + Return dew point temperature converted from Kelvin to degrees Celsius. + + Parameters + ---------- + cutout : atlite.Cutout + The cutout to process. + **params : Any + Additional keyword arguments passed to + :py:func:`convert_and_aggregate`. + + Returns + ------- + xr.DataArray + Dew point temperature in °C. + + Note + ---- + This function also accepts all keyword arguments of + :py:func:`convert_and_aggregate`. + """ return cutout.convert_and_aggregate( convert_func=convert_dewpoint_temperature, **params ) @@ -388,7 +466,7 @@ def dewpoint_temperature(cutout: Cutout, **params: Any) -> DataArray | NumericAr def convert_coefficient_of_performance( ds: Dataset, - source: str, + source: HeatPumpSource, sink_T: float, c0: float | None, c1: float | None, @@ -442,7 +520,7 @@ def convert_coefficient_of_performance( def coefficient_of_performance( cutout: Cutout, - source: str = "air", + source: HeatPumpSource = "air", sink_T: float = 55.0, c0: float | None = None, c1: float | None = None, @@ -451,34 +529,48 @@ def coefficient_of_performance( ) -> DataArray | NumericArray: """ Convert ambient or soil temperature to coefficient of performance (COP) of - air- or ground-sourced heat pumps. The COP is a function of temperature - difference from source to sink. The defaults for either source (c0, c1, c2) - are based on a quadratic regression in [1]. + air- or ground-sourced heat pumps. + + The COP is modelled as a quadratic function of the temperature difference + ``dT = sink_T - source_T``: ``COP = c0 + c1 * dT + c2 * dT**2``. Parameters ---------- cutout : atlite.Cutout The cutout to process. - source : str - The heat source. Can be 'air' or 'soil'. - sink_T : float - The temperature of the heat sink. - c0 : float - The constant regression coefficient for the temperature difference. - c1 : float - The linear regression coefficient for the temperature difference. - c2 : float - The quadratic regression coefficient for the temperature difference. - **params - Additional keyword arguments passed to `convert_and_aggregate`. + source : {"air", "soil"} + Heat source type. Default coefficients per source: + + - ``"air"``: ``c0=6.81, c1=-0.121, c2=0.000630`` + - ``"soil"``: ``c0=8.77, c1=-0.150, c2=0.000734`` + sink_T : float, default 55.0 + Heat sink temperature in °C. + c0 : float or None + Constant regression coefficient. If ``None``, uses source default. + c1 : float or None + Linear regression coefficient. If ``None``, uses source default. + c2 : float or None + Quadratic regression coefficient. If ``None``, uses source default. + **params : Any + Additional keyword arguments passed to + :py:func:`convert_and_aggregate`. Returns ------- xr.DataArray - Coefficient of performance time-series. + Coefficient of performance time-series (dimensionless). - Reference - --------- + See Also + -------- + heat_demand : Compute heating degree-day demand. + + Note + ---- + This function also accepts all keyword arguments of + :py:func:`convert_and_aggregate`. + + References + ---------- [1] Staffell, Brett, Brandon, Hawkes, A review of domestic heat pumps, Energy & Environmental Science (2012), 5, 9291-9306, https://doi.org/10.1039/C2EE22653G. @@ -521,7 +613,12 @@ def convert_heat_demand( Returns ------- xr.DataArray - Daily heat demand. + Daily heat demand in degree-day-like units. + + Notes + ----- + The formula is ``max(0, a * (threshold - T_daily_mean)) + constant`` + where ``T_daily_mean`` is the daily-averaged temperature. """ # Temperature is in Kelvin; take daily average T = ds["temperature"] @@ -550,52 +647,52 @@ def heat_demand( Convert outside temperature into daily heat demand using the degree-day approximation. - Since "daily average temperature" means different things in - different time zones and since xarray coordinates do not handle - time zones gracefully like pd.DateTimeIndex, you can provide an - hour_shift to redefine when the day starts. + The formula is ``max(0, a * (threshold - T_daily_mean)) + constant`` + where ``T_daily_mean`` is the daily-averaged temperature. Output is in + degree-day-like units (scaled by *a*). - E.g. for Moscow in winter, hour_shift = 4, for New York in winter, - hour_shift = -5 - - This time shift applies across the entire spatial scope of ds for - all times. More fine-grained control will be built in a some - point, i.e. space- and time-dependent time zones. - - WARNING: Because the original data is provided every month, at the - month boundaries there is untidiness if you use a time shift. The - resulting xarray will have duplicates in the index for the parts - of the day in each month at the boundary. You will have to - re-average these based on the number of hours in each month for - the duplicated day. + Since "daily average temperature" means different things in different time + zones, you can provide *hour_shift* to redefine when the day starts. + E.g. for Moscow in winter ``hour_shift=4``, for New York ``hour_shift=-5``. + The shift applies uniformly across all grid cells and times. Parameters ---------- cutout : atlite.Cutout The cutout to process. - threshold : float - Outside temperature in degrees Celsius above which there is no - heat demand. - a : float + threshold : float, default 15.0 + Outside temperature in °C above which there is no heat demand. + a : float, default 1.0 Linear factor relating heat demand to outside temperature. - constant : float - Constant part of heat demand that does not depend on outside - temperature (e.g. due to water heating). - hour_shift : float - Time shift relative to UTC for taking daily average + constant : float, default 0.0 + Constant part of heat demand independent of outside temperature + (e.g. water heating). + hour_shift : float, default 0.0 + Time shift in hours relative to UTC for daily averaging. **params : Any - Additional keyword arguments passed to `convert_and_aggregate`. + Additional keyword arguments passed to + :py:func:`convert_and_aggregate`. Returns ------- xr.DataArray - Heat demand time-series. + Daily heat demand time-series in degree-day-like units. + + Warnings + -------- + Because the original data is provided per month, at month boundaries + there is untidiness when using a time shift. The resulting array will + have duplicate indices for parts of the day at each boundary. You must + re-average these based on the number of hours in each month. + + See Also + -------- + cooling_demand : Degree-day cooling demand. Note ---- - You can also specify all of the general conversion arguments - documented in the `convert_and_aggregate` function. - + This function also accepts all keyword arguments of + :py:func:`convert_and_aggregate`. """ return cutout.convert_and_aggregate( convert_func=convert_heat_demand, @@ -634,7 +731,12 @@ def convert_cooling_demand( Returns ------- xr.DataArray - Daily cooling demand. + Daily cooling demand in degree-day-like units. + + Notes + ----- + The formula is ``max(0, a * (T_daily_mean - threshold)) + constant`` + where ``T_daily_mean`` is the daily-averaged temperature. """ # Temperature is in Kelvin; take daily average T = ds["temperature"] @@ -663,55 +765,54 @@ def cooling_demand( Convert outside temperature into daily cooling demand using the degree-day approximation. - Since "daily average temperature" means different things in - different time zones and since xarray coordinates do not handle - time zones gracefully like pd.DateTimeIndex, you can provide an - hour_shift to redefine when the day starts. + The formula is ``max(0, a * (T_daily_mean - threshold)) + constant`` + where ``T_daily_mean`` is the daily-averaged temperature. Output is in + degree-day-like units (scaled by *a*). - E.g. for Moscow in summer, hour_shift = 3, for New York in summer, - hour_shift = -4 - - This time shift applies across the entire spatial scope of ds for - all times. More fine-grained control will be built in a some - point, i.e. space- and time-dependent time zones. - - WARNING: Because the original data is provided every month, at the - month boundaries there is untidiness if you use a time shift. The - resulting xarray will have duplicates in the index for the parts - of the day in each month at the boundary. You will have to - re-average these based on the number of hours in each month for - the duplicated day. + Since "daily average temperature" means different things in different time + zones, you can provide *hour_shift* to redefine when the day starts. + E.g. for Moscow in summer ``hour_shift=3``, for New York ``hour_shift=-4``. + The shift applies uniformly across all grid cells and times. Parameters ---------- cutout : atlite.Cutout The cutout to process. - threshold : float - Outside temperature in degrees Celsius below which there is no - cooling demand. The default 23C is taken as a more liberal - estimation following European computational practices - (e.g. UK Met Office and European commission take as thresholds - 22C and 24C, respectively) - a : float + threshold : float, default 23.0 + Outside temperature in °C below which there is no cooling demand. + The default follows European computational practices (UK Met Office + uses 22 °C, European Commission uses 24 °C). + a : float, default 1.0 Linear factor relating cooling demand to outside temperature. - constant : float - Constant part of cooling demand that does not depend on outside - temperature (e.g. due to ventilation). - hour_shift : float - Time shift relative to UTC for taking daily average + constant : float, default 0.0 + Constant part of cooling demand independent of outside temperature + (e.g. ventilation). + hour_shift : float, default 0.0 + Time shift in hours relative to UTC for daily averaging. **params : Any - Additional keyword arguments passed to `convert_and_aggregate`. + Additional keyword arguments passed to + :py:func:`convert_and_aggregate`. Returns ------- xr.DataArray - Cooling demand time-series. + Daily cooling demand time-series in degree-day-like units. + + Warnings + -------- + Because the original data is provided per month, at month boundaries + there is untidiness when using a time shift. The resulting array will + have duplicate indices for parts of the day at each boundary. You must + re-average these based on the number of hours in each month. + + See Also + -------- + heat_demand : Degree-day heating demand. Note ---- - You can also specify all of the general conversion arguments - documented in the `convert_and_aggregate` function. - + This function also accepts all keyword arguments of + :py:func:`convert_and_aggregate`. """ return cutout.convert_and_aggregate( convert_func=convert_cooling_demand, @@ -727,8 +828,8 @@ def cooling_demand( def convert_solar_thermal( ds: Dataset, orientation: Callable, - trigon_model: str, - clearsky_model: str | None, + trigon_model: TrigonModel, + clearsky_model: ClearskyModel | None, c0: float, c1: float, t_store: float, @@ -754,7 +855,12 @@ def convert_solar_thermal( Returns ------- xr.DataArray - Specific solar thermal output. + Specific solar thermal output in W/m². + + Notes + ----- + Collector efficiency is ``eta = c0 - c1 * (T_store - T_amb) / G`` where + *G* is the tilted irradiation. Output is ``max(0, G * eta)``. """ # convert storage temperature to Kelvin in line with reanalysis data t_store += 273.15 @@ -780,9 +886,9 @@ def convert_solar_thermal( def solar_thermal( cutout: Cutout, - orientation: dict[str, float] | None = None, - trigon_model: str = "simple", - clearsky_model: str = "simple", + orientation: OrientationName | dict[str, float] | Callable | None = None, + trigon_model: TrigonModel = "simple", + clearsky_model: ClearskyModel = "simple", c0: float = 0.8, c1: float = 3.0, t_store: float = 80.0, @@ -792,47 +898,58 @@ def solar_thermal( Convert downward short-wave radiation flux and outside temperature into time series for solar thermal collectors. - Mathematical model and defaults for c0, c1 based on model in [1]. + Collector efficiency is ``eta = c0 - c1 * (T_store - T_amb) / G``. + Mathematical model and defaults for *c0*, *c1* based on [1]. Parameters ---------- cutout : atlite.Cutout The cutout to process. - orientation : dict or str or function - Panel orientation with slope and azimuth (units of degrees), or - 'latitude_optimal'. - trigon_model : str - Type of trigonometry model - clearsky_model : str or None - Type of clearsky model for diffuse irradiation. Either - 'simple' or 'enhanced'. - c0, c1 : float - Parameters for model in [1] (defaults to 0.8 and 3., respectively) - t_store : float - Store temperature in degree Celsius + orientation : dict, str, or callable, optional + Panel orientation. A dict with ``'slope'`` and ``'azimuth'`` keys + in degrees, the string ``'latitude_optimal'``, or a callable with + the same signature as callbacks from + ``atlite.pv.orientation.make_*``. Default: ``{'slope': 45.0, + 'azimuth': 180.0}``. + trigon_model : {"simple", "perez"}, default "simple" + Trigonometric model for tilted irradiation decomposition. + clearsky_model : {"simple", "enhanced"} or None, default "simple" + Clear-sky model for diffuse irradiation. ``'enhanced'`` also uses + ambient temperature and relative humidity. + c0 : float, default 0.8 + Optical efficiency parameter. + c1 : float, default 3.0 + Thermal loss coefficient in W/(m² K). + t_store : float, default 80.0 + Storage temperature in °C. **params : Any - Additional keyword arguments passed to `convert_and_aggregate`. + Additional keyword arguments passed to + :py:func:`convert_and_aggregate`. Returns ------- xr.DataArray Solar thermal generation time-series. + See Also + -------- + pv : Photovoltaic generation. + irradiation : Tilted surface irradiation. + Note ---- - You can also specify all of the general conversion arguments - documented in the `convert_and_aggregate` function. + This function also accepts all keyword arguments of + :py:func:`convert_and_aggregate`. References ---------- [1] Henning and Palzer, Renewable and Sustainable Energy Reviews 30 (2014) 1003-1018 - """ if orientation is None: orientation = {"slope": 45.0, "azimuth": 180.0} if not callable(orientation): - orientation = get_orientation(orientation) # type: ignore[assignment] + orientation = get_orientation(orientation) return cutout.convert_and_aggregate( convert_func=convert_solar_thermal, @@ -935,13 +1052,18 @@ def wind( Returns ------- resource : xr.DataArray - Wind generation time-series. See :py:func:`convert_and_aggregate` - for the return value depending on the aggregation arguments. + Wind generation time-series. Without aggregation, values are capacity + factors (MWh/MWp). With aggregation and ``per_unit=False``, values are + in MW. See :py:func:`convert_and_aggregate` for details. + + See Also + -------- + pv : Photovoltaic generation. Note ---- - You can also specify all of the general conversion arguments - documented in the :py:func:`convert_and_aggregate` function. + This function also accepts all keyword arguments of + :py:func:`convert_and_aggregate`. References ---------- @@ -981,13 +1103,13 @@ def wind( # irradiation def convert_irradiation( - ds, - orientation, - tracking=None, - irradiation="total", - trigon_model="simple", - clearsky_model="simple", -): + ds: Dataset, + orientation: Callable, + tracking: TrackingType = None, + irradiation: IrradiationType = "total", + trigon_model: TrigonModel = "simple", + clearsky_model: ClearskyModel | None = "simple", +) -> DataArray: """ Convert weather data to irradiation on a tilted surface. @@ -1009,7 +1131,7 @@ def convert_irradiation( Returns ------- xr.DataArray - Tilted surface irradiation. + Tilted surface irradiation in W/m². """ solar_position = SolarPosition(ds) surface_orientation = SurfaceOrientation(ds, solar_position, orientation, tracking) @@ -1025,13 +1147,13 @@ def convert_irradiation( def irradiation( - cutout, - orientation, - irradiation="total", - tracking=None, - clearsky_model=None, - **params, -): + cutout: Cutout, + orientation: OrientationName | dict[str, float] | Callable, + irradiation: IrradiationType = "total", + tracking: TrackingType = None, + clearsky_model: ClearskyModel | None = None, + **params: Any, +) -> DataArray | NumericArray: """ Calculate the total, direct, diffuse, or ground irradiation on a tilted surface. @@ -1070,19 +1192,26 @@ def irradiation( Returns ------- irradiation : xr.DataArray - The desired irradiation quantity on the tilted surface. Defaults to - "total". + Irradiation on the tilted surface in W/m². + + See Also + -------- + pv : Photovoltaic generation. + solar_thermal : Solar thermal collector output. + + Notes + ----- + The ``trigon_model`` is fixed to ``'simple'`` internally. Note ---- - You can also specify all of the general conversion arguments - documented in the `convert_and_aggregate` function. + This function also accepts all keyword arguments of + :py:func:`convert_and_aggregate`. References ---------- [1] D.T. Reindl, W.A. Beckman, and J.A. Duffie. Diffuse fraction correla- tions. Solar Energy, 45(1):1 – 7, 1990. - """ if not callable(orientation): orientation = get_orientation(orientation) @@ -1099,8 +1228,13 @@ def irradiation( # solar PV def convert_pv( - ds, panel, orientation, tracking, trigon_model="simple", clearsky_model="simple" -): + ds: Dataset, + panel: dict[str, Any], + orientation: Callable, + tracking: TrackingType, + trigon_model: TrigonModel = "simple", + clearsky_model: ClearskyModel | None = "simple", +) -> DataArray: """ Convert weather data to photovoltaic specific generation. @@ -1122,7 +1256,7 @@ def convert_pv( Returns ------- xr.DataArray - PV power output as specific yield per unit of installed capacity. + PV power output as capacity factors (unitless, 0–1). """ solar_position = SolarPosition(ds) surface_orientation = SurfaceOrientation(ds, solar_position, orientation, tracking) @@ -1137,7 +1271,14 @@ def convert_pv( return SolarPanelModel(ds, irradiation, panel) -def pv(cutout, panel, orientation, tracking=None, clearsky_model=None, **params): +def pv( + cutout: Cutout, + panel: str | PanelConfig, + orientation: OrientationName | dict[str, float] | Callable, + tracking: TrackingType = None, + clearsky_model: ClearskyModel | None = None, + **params: Any, +) -> DataArray | NumericArray: """ Convert downward-shortwave, upward-shortwave radiation flux and ambient temperature into a pv generation time-series. @@ -1148,8 +1289,8 @@ def pv(cutout, panel, orientation, tracking=None, clearsky_model=None, **params) The cutout to process. panel : str or dict Panel config dictionary with the parameters for the electrical - model in [3]. Alternatively, name of yaml file stored in - atlite.config.solarpanel_dir. + model in [3]. Alternatively, a name accepted by + :py:func:`atlite.resource.get_solarpanelconfig`. orientation : str, dict or callback Panel orientation can be chosen from either 'latitude_optimal', a constant orientation {'slope': 0.0, @@ -1173,14 +1314,20 @@ def pv(cutout, panel, orientation, tracking=None, clearsky_model=None, **params) Returns ------- pv : xr.DataArray - PV generation time-series. See :py:func:`convert_and_aggregate` - for the return value depending on the aggregation arguments. + PV generation time-series. Without aggregation, values are capacity + factors (unitless, 0–1). With aggregation and ``per_unit=False``, + values are in MW. See :py:func:`convert_and_aggregate` for details. + + See Also + -------- + wind : Wind generation. + irradiation : Tilted surface irradiation. + solar_thermal : Solar thermal collector output. Note ---- - You can also specify all of the general conversion arguments - documented in the :py:func:`convert_and_aggregate` function. - + This function also accepts all keyword arguments of + :py:func:`convert_and_aggregate`. References ---------- @@ -1239,7 +1386,7 @@ def convert_csp(ds, installation): Returns ------- xr.DataArray - CSP output as specific yield per unit of reference capacity. + CSP output as specific yield (kWh/kW_ref), clipped to [0, 1]. Raises ------ @@ -1277,7 +1424,12 @@ def convert_csp(ds, installation): return da.rename("specific generation") -def csp(cutout, installation, technology=None, **params): +def csp( + cutout: Cutout, + installation: str | CSPConfig, + technology: Literal["parabolic trough", "solar tower"] | None = None, + **params: Any, +) -> DataArray | NumericArray: """ Convert downward shortwave direct radiation into a csp generation time- series. @@ -1286,29 +1438,30 @@ def csp(cutout, installation, technology=None, **params): ---------- cutout : atlite.Cutout The cutout to process. - installation: str or xr.DataArray - CSP installation details determining the solar field efficiency dependent on - the local solar position. Can be either the name of one of the standard - installations provided through `atlite.cspinstallationsPanel` or an - xarray.DataArray with 'azimuth' (in rad) and 'altitude' (in rad) coordinates - and an 'efficiency' (in p.u.) entry. - technology: str - Overwrite CSP technology from the installation configuration. The technology - affects which direct radiation is considered. Either 'parabolic trough' (DHI) - or 'solar tower' (DNI). + installation : str or xr.DataArray + CSP installation details determining the solar field efficiency + dependent on the local solar position. Can be a name accepted by + :py:func:`atlite.resource.get_cspinstallationconfig` or an + ``xr.DataArray`` with ``'azimuth'`` (rad) and ``'altitude'`` (rad) + coordinates and an ``'efficiency'`` (p.u.) entry. + technology : {"parabolic trough", "solar tower"} or None + Overwrite CSP technology from the installation configuration. + ``'parabolic trough'`` uses direct horizontal irradiance (DHI), + ``'solar tower'`` uses direct normal irradiance (DNI). **params Additional keyword arguments passed to `convert_and_aggregate`. Returns ------- csp : xr.DataArray - Time-series or capacity factors based on additional general - conversion arguments. + CSP generation time-series in specific yield (kWh/kW_ref), clipped + to [0, 1]. See :py:func:`convert_and_aggregate` for details on + aggregation behaviour. Note ---- - You can also specify all of the general conversion arguments - documented in the `convert_and_aggregate` function. + This function also accepts all keyword arguments of + :py:func:`convert_and_aggregate`. References ---------- @@ -1375,11 +1528,13 @@ def runoff( cutout : atlite.Cutout Cutout providing weather data with runoff variables. smooth : bool or int, optional - If True, apply a rolling mean with the default window of ``24 * 7`` - hours. If an integer, use it as the rolling window size. + If ``True``, apply a rolling mean with the default window of + ``24 * 7`` time steps. If an integer, use it as the rolling window + size. Default ``None`` (no smoothing). lower_threshold_quantile : bool or float, optional - If True, use the default quantile ``5e-3``. If a float, set values - below that quantile to zero. + If ``True``, use the default quantile ``5e-3``. If a float, set + values below that quantile to zero. Default ``None`` (no + thresholding). normalize_using_yearly : pd.Series, optional Annual country totals used to scale ``countries``-indexed results over overlapping full years. One factor per country is derived from the @@ -1531,22 +1686,24 @@ def convert_line_rating( psi : float Line azimuth in degrees clockwise from north. R : float - Conductor resistance in ohm per meter at temperature ``Ts``. + Conductor resistance in Ω/m at temperature *Ts*. D : float, default 0.028 Conductor diameter in meters. Ts : float, default 373 Maximum conductor surface temperature in Kelvin. epsilon : float, default 0.6 - Conductor emissivity. + Conductor emissivity (dimensionless). alpha : float, default 0.6 - Conductor absorptivity. + Conductor absorptivity (dimensionless). per_unit : bool, default False Unused compatibility parameter. Returns ------- xr.DataArray or numpy.ndarray - Maximum current per time step in ampere. + Maximum current per time step in ampere. When *ds* is an + ``xr.Dataset`` the result is aggregated across intersecting cells + via ``.min("spatial")``. """ Ta = ds["temperature"] Tfilm = (Ta + Ts) / 2 @@ -1639,17 +1796,28 @@ def line_rating( Whether to show a progress bar. dask_kwargs : dict, default {} Dict with keyword arguments passed to `dask.compute`. - params : keyword arguments as float/series - Arguments to tweak/modify the line rating calculations based on [1]. - Defaults are: - * D : 0.028 (conductor diameter) - * Ts : 373 (maximally allowed surface temperature) - * epsilon : 0.6 (conductor emissivity) - * alpha : 0.6 (conductor absorptivity) + D : float, default 0.028 + Conductor diameter in meters. + Ts : float, default 373 + Maximum allowed conductor surface temperature in Kelvin. + epsilon : float, default 0.6 + Conductor emissivity (dimensionless). + alpha : float, default 0.6 + Conductor absorptivity (dimensionless). + **params : Any + Additional keyword arguments passed to + :py:func:`convert_line_rating`. Returns ------- - Current thermal limit timeseries with dimensions time x lines in Ampere. + xr.DataArray + Thermal current limit time-series with dimensions + ``(time, lines)`` in ampere. + + Note + ---- + This function also accepts all keyword arguments of + :py:func:`convert_and_aggregate`. Example ------- diff --git a/atlite/data.py b/atlite/data.py index bae7f325..d982da29 100644 --- a/atlite/data.py +++ b/atlite/data.py @@ -29,7 +29,7 @@ if TYPE_CHECKING: from collections.abc import Callable, Iterable, Sequence - from atlite._types import DataArray, Dataset, PathLike + from atlite._types import DataArray, DataFormat, Dataset, PathLike from atlite.cutout import Cutout logger = logging.getLogger(__name__) @@ -39,7 +39,7 @@ def get_features( cutout: Cutout, module: str, features: Iterable[str], - data_format: str, + data_format: DataFormat, tmpdir: PathLike | None = None, monthly_requests: bool = False, concurrent_requests: bool = False, @@ -190,7 +190,7 @@ def cutout_prepare( cutout: Cutout, features: str | Sequence[str] | None = None, tmpdir: PathLike | None = None, - data_format: str = "grib", + data_format: DataFormat = "grib", overwrite: bool = False, compression: dict[str, Any] | None = None, show_progress: bool = False, diff --git a/atlite/datasets/era5.py b/atlite/datasets/era5.py index 34e4be61..f8f4409e 100644 --- a/atlite/datasets/era5.py +++ b/atlite/datasets/era5.py @@ -72,6 +72,19 @@ def nullcontext(): def _add_height(ds: xr.Dataset) -> xr.Dataset: + """ + Convert geopotential to height and replace the 'z' variable. + + Parameters + ---------- + ds : xr.Dataset + Dataset containing geopotential variable 'z'. + + Returns + ------- + xr.Dataset + Dataset with 'height' variable in meters, 'z' removed. + """ g0 = 9.80665 z = ds["z"] if "time" in z.coords: @@ -81,6 +94,24 @@ def _add_height(ds: xr.Dataset) -> xr.Dataset: def _rename_and_clean_coords(ds: xr.Dataset, add_lon_lat: bool = True) -> xr.Dataset: + """ + Standardize coordinate names and clean up auxiliary variables. + + Renames longitude/latitude/valid_time to x/y/time, rounds spatial + coordinates, and drops 'expver'/'number' if present. + + Parameters + ---------- + ds : xr.Dataset + Raw ERA5 dataset with original coordinate names. + add_lon_lat : bool, optional + Whether to add 'lon'/'lat' as coordinate aliases. Default True. + + Returns + ------- + xr.Dataset + Dataset with standardized coordinates. + """ ds = ds.rename({"longitude": "x", "latitude": "y", "valid_time": "time"}) ds = ds.assign_coords( x=np.round(ds.x.astype(float), 5), y=np.round(ds.y.astype(float), 5) @@ -92,6 +123,22 @@ def _rename_and_clean_coords(ds: xr.Dataset, add_lon_lat: bool = True) -> xr.Dat def get_data_wind(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: + """ + Retrieve and compute wind speed variables from ERA5. + + Downloads u/v wind components at 10m and 100m, computes wind speed, + shear exponent, azimuth angle, and surface roughness. + + Parameters + ---------- + retrieval_params : ERA5RetrievalParams + CDS API retrieval parameters including area, time, and format. + + Returns + ------- + xr.Dataset + Dataset with variables: wnd100m, wnd_shear_exp, wnd_azimuth, roughness. + """ ds = retrieve_data( variable=[ "10m_u_component_of_wind", @@ -120,11 +167,41 @@ def get_data_wind(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: def sanitize_wind(ds: xr.Dataset) -> xr.Dataset: + """ + Clip negative roughness values to a minimum of 2e-4. + + Parameters + ---------- + ds : xr.Dataset + Wind dataset containing 'roughness' variable. + + Returns + ------- + xr.Dataset + Dataset with corrected roughness values. + """ ds["roughness"] = ds["roughness"].where(ds["roughness"] >= 0.0, 2e-4) return ds def get_data_influx(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: + """ + Retrieve and compute solar radiation variables from ERA5. + + Downloads radiation components, converts from J/m² to W/m², computes + albedo, diffuse radiation, and solar position (altitude/azimuth). + + Parameters + ---------- + retrieval_params : ERA5RetrievalParams + CDS API retrieval parameters including area, time, and format. + + Returns + ------- + xr.Dataset + Dataset with variables: influx_toa, influx_direct, influx_diffuse, + albedo, solar_altitude, solar_azimuth. + """ ds = retrieve_data( variable=[ "surface_net_solar_radiation", @@ -162,12 +239,40 @@ def get_data_influx(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: def sanitize_influx(ds: xr.Dataset) -> xr.Dataset: + """ + Clip negative radiation values to zero. + + Parameters + ---------- + ds : xr.Dataset + Influx dataset with influx_direct, influx_diffuse, influx_toa. + + Returns + ------- + xr.Dataset + Dataset with non-negative radiation values. + """ for a in ("influx_direct", "influx_diffuse", "influx_toa"): ds[a] = ds[a].clip(min=0.0) return ds def get_data_temperature(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: + """ + Retrieve temperature variables from ERA5. + + Downloads 2m temperature, soil temperature (level 4), and 2m dewpoint. + + Parameters + ---------- + retrieval_params : ERA5RetrievalParams + CDS API retrieval parameters including area, time, and format. + + Returns + ------- + xr.Dataset + Dataset with variables: temperature, soil temperature, dewpoint temperature. + """ ds = retrieve_data( variable=[ "2m_temperature", @@ -186,6 +291,19 @@ def get_data_temperature(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: def get_data_runoff(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: + """ + Retrieve runoff data from ERA5. + + Parameters + ---------- + retrieval_params : ERA5RetrievalParams + CDS API retrieval parameters including area, time, and format. + + Returns + ------- + xr.Dataset + Dataset with 'runoff' variable. + """ ds = retrieve_data(variable=["runoff"], **retrieval_params) ds = _rename_and_clean_coords(ds) @@ -193,11 +311,37 @@ def get_data_runoff(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: def sanitize_runoff(ds: xr.Dataset) -> xr.Dataset: + """ + Clip negative runoff values to zero. + + Parameters + ---------- + ds : xr.Dataset + Runoff dataset containing 'runoff' variable. + + Returns + ------- + xr.Dataset + Dataset with non-negative runoff values. + """ ds["runoff"] = ds["runoff"].clip(min=0.0) return ds def get_data_height(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: + """ + Retrieve geopotential and convert to terrain height. + + Parameters + ---------- + retrieval_params : ERA5RetrievalParams + CDS API retrieval parameters including area, time, and format. + + Returns + ------- + xr.Dataset + Dataset with 'height' variable in meters. + """ ds = retrieve_data(variable="geopotential", **retrieval_params) ds = _rename_and_clean_coords(ds) @@ -205,6 +349,19 @@ def get_data_height(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: def _area(coords: dict[str, xr.DataArray]) -> list[float]: + """ + Extract CDS API bounding box from coordinates. + + Parameters + ---------- + coords : dict[str, xr.DataArray] + Coordinate arrays with 'x' (longitude) and 'y' (latitude). + + Returns + ------- + list[float] + Bounding box as [north, west, south, east]. + """ x0, x1 = coords["x"].min().item(), coords["x"].max().item() y0, y1 = coords["y"].min().item(), coords["y"].max().item() return [y1, x0, y0, x1] @@ -215,6 +372,27 @@ def retrieval_times( static: bool = False, monthly_requests: bool = False, ) -> dict[str, Any] | list[dict[str, Any]]: + """ + Generate time parameter chunks for CDS API requests. + + Splits the time coordinate into year-based (or year-month-based) chunks + suitable for the CDS API query format. + + Parameters + ---------- + coords : dict[str, xr.DataArray] + Coordinate arrays with 'time' dimension. + static : bool, optional + If True, return a single time point (for time-invariant fields). + monthly_requests : bool, optional + If True, split requests by month within each year. + + Returns + ------- + dict[str, Any] or list[dict[str, Any]] + Single dict if static, otherwise list of dicts with + 'year', 'month', 'day', 'time' keys. + """ time = coords["time"].to_index() if static: return { @@ -248,6 +426,14 @@ def retrieval_times( def noisy_unlink(path: PathLike) -> None: + """ + Remove a file with debug logging, handling PermissionError gracefully. + + Parameters + ---------- + path : PathLike + Path to the file to delete. + """ logger.debug("Deleting file %s", path) try: Path(path).unlink() @@ -256,11 +442,40 @@ def noisy_unlink(path: PathLike) -> None: def add_finalizer(ds: xr.Dataset, target: PathLike) -> None: + """ + Register a weak-reference callback to delete a temp file when the dataset + is garbage collected. + + Parameters + ---------- + ds : xr.Dataset + Dataset whose lifetime controls the temp file. + target : PathLike + Path to the temporary file to clean up. + """ logger.debug("Adding finalizer for %s", target) weakref.finalize(ds._close.__self__.ds, noisy_unlink, target) def sanitize_chunks(chunks: Any, **dim_mapping: str) -> Any: + """ + Remap internal dimension names to ERA5/CDS dimension names in chunk specs. + + Translates atlite dimension names (time, x, y) to the corresponding + ERA5 names (valid_time, longitude, latitude). + + Parameters + ---------- + chunks : Any + Chunk specification. If not a dict, returned as-is. + **dim_mapping : str + Additional or override dimension name mappings. + + Returns + ------- + Any + Remapped chunk dict, or original value if not a dict. + """ dim_mapping = { "time": "valid_time", "x": "longitude", @@ -281,6 +496,26 @@ def open_with_grib_conventions( chunks: dict[str, int] | None = None, tmpdir: PathLike | None = None, ) -> xr.Dataset: + """ + Open a GRIB file using cfgrib with standardized coordinate conventions. + + Renames forecast/pressure/model dimensions and expands missing dimensions. + If ``tmpdir`` is None, registers a finalizer to delete the file on GC. + + Parameters + ---------- + grib_file : PathLike + Path to the GRIB file. + chunks : dict[str, int] or None, optional + Dask chunk specification for lazy loading. + tmpdir : PathLike or None, optional + If set, the file is kept (managed externally). + + Returns + ------- + xr.Dataset + Opened dataset with standardized dimensions. + """ ds = xr.open_dataset( grib_file, engine="cfgrib", @@ -332,6 +567,30 @@ def retrieve_data( lock: SerializableLock | None = None, **updates: Any, ) -> xr.Dataset: + """ + Download ERA5 data from the CDS API and return as an xarray Dataset. + + Parameters + ---------- + product : str + CDS product name (e.g. 'reanalysis-era5-single-levels'). + chunks : dict[str, int] or None, optional + Dask chunk specification for lazy loading. + tmpdir : PathLike or None, optional + Directory for temporary download files. If None, files are + cleaned up via finalizer on GC. + lock : SerializableLock or None, optional + Lock for thread-safe file creation. + **updates : Any + Additional CDS API request parameters. Must include at least + 'variable', 'year', and 'month'. + + Returns + ------- + xr.Dataset + Downloaded ERA5 data. + + """ request: dict[str, Any] = { "product_type": ["reanalysis"], "download_format": "unarchived", @@ -383,6 +642,38 @@ def get_data( concurrent_requests: bool = False, **creation_parameters: Any, ) -> xr.Dataset: + """ + Main entry point for downloading ERA5 data for a given feature. + + Dispatches to feature-specific ``get_data_{feature}`` functions, + optionally applies ``sanitize_{feature}``, and concatenates time chunks. + + Parameters + ---------- + cutout : Cutout + Cutout object defining the spatial and temporal extent. + feature : str + Feature to retrieve (e.g. 'wind', 'influx', 'temperature', + 'runoff', 'height'). + tmpdir : PathLike + Directory for temporary download files. + lock : SerializableLock or None, optional + Lock for thread-safe file creation. + data_format : {{'grib', 'netcdf'}}, optional + Download format. Default 'grib'. + monthly_requests : bool, optional + If True, split API requests by month. Default False. + concurrent_requests : bool, optional + If True, use dask.delayed for parallel downloads. Default False. + **creation_parameters : Any + Additional parameters; 'sanitize' (bool, default True) controls + whether post-processing is applied. + + Returns + ------- + xr.Dataset + ERA5 data for the requested feature, aligned to cutout coordinates. + """ coords = cutout.coords sanitize = creation_parameters.get("sanitize", True) diff --git a/atlite/gis.py b/atlite/gis.py index a4b0f69b..7d7a33f1 100644 --- a/atlite/gis.py +++ b/atlite/gis.py @@ -419,7 +419,7 @@ def shape_availability( ---------- geometry : geopandas.Series Geometry of which the eligible area is computed. If the series contains - more than one geometry, the eligble area of the combined geometries is + more than one geometry, the eligible area of the combined geometries is computed. excluder : atlite.gis.ExclusionContainer Container of all meta data or objects which to exclude, i.e. @@ -429,7 +429,7 @@ def shape_availability( ------- masked : np.array Mask whith eligible raster cells indicated by 1 and excluded cells by 0. - transform : rasterion.Affine + transform : rasterio.Affine Affine transform of the mask. """ @@ -703,7 +703,7 @@ def compute_shape_availability( ---------- geometry : geopandas.Series Geometry of which the eligible area is computed. If the series contains - more than one geometry, the eligble area of the combined geometries is + more than one geometry, the eligible area of the combined geometries is computed. dst_transform : rasterio.Affine Transform of the target raster. Define if the availability @@ -719,7 +719,7 @@ def compute_shape_availability( ------- masked : np.array Mask whith eligible raster cells indicated by 1 and excluded cells by 0. - transform : rasterion.Affine + transform : rasterio.Affine Affine transform of the mask. Raises @@ -769,14 +769,14 @@ def plot_shape_availability( This function uses its own default values for ``rasterio.plot.show`` and ``geopandas.GeoSeries.plot``. Therefore eligible land is drawn in green - Note that this funtion will likely fail if another CRS than the one of the + Note that this function will likely fail if another CRS than the one of the ExclusionContainer is used in the axis (e.g. cartopy projections). Parameters ---------- geometry : geopandas.Series Geometry of which the eligible area is computed. If the series contains - more than one geometry, the eligble area of the combined geometries is + more than one geometry, the eligible area of the combined geometries is computed. ax : matplotlib.axes.Axes, optional Axes to plot on. If None, a new figure is created. @@ -799,8 +799,8 @@ def plot_shape_availability( Returns ------- - _type_ - _description_ + matplotlib.axes.Axes + Axes with the plotted availability. """ if show_kwargs is None: diff --git a/atlite/pv/irradiation.py b/atlite/pv/irradiation.py index e28a5d87..d201b0eb 100644 --- a/atlite/pv/irradiation.py +++ b/atlite/pv/irradiation.py @@ -11,13 +11,23 @@ from dask.array import cos, fmax, fmin, radians, sin, sqrt if TYPE_CHECKING: - from atlite._types import DataArray, Dataset + from atlite._types import ( + ClearskyModel, + DataArray, + Dataset, + IrradiationType, + TrackingType, + TrigonModel, + ) logger = logging.getLogger(__name__) def DiffuseHorizontalIrrad( - ds: Dataset, solar_position: Dataset, clearsky_model: str | None, influx: DataArray + ds: Dataset, + solar_position: Dataset, + clearsky_model: ClearskyModel | None, + influx: DataArray, ) -> DataArray: """ Estimate diffuse horizontal irradiation from total horizontal irradiation. @@ -244,11 +254,11 @@ def TiltedIrradiation( ds: Dataset, solar_position: Dataset, surface_orientation: Dataset, - trigon_model: str, - clearsky_model: str | None, - tracking: int | str = 0, + trigon_model: TrigonModel, + clearsky_model: ClearskyModel | None, + tracking: TrackingType | int = 0, altitude_threshold: float = 1.0, - irradiation: str = "total", + irradiation: IrradiationType = "total", ) -> DataArray: """ Calculate the irradiation on a tilted surface. diff --git a/atlite/pv/orientation.py b/atlite/pv/orientation.py index 18b42a39..3b28eaf2 100644 --- a/atlite/pv/orientation.py +++ b/atlite/pv/orientation.py @@ -15,11 +15,11 @@ if TYPE_CHECKING: from collections.abc import Callable - from atlite._types import Dataset, NumericArray + from atlite._types import Dataset, NumericArray, OrientationName, TrackingType def get_orientation( - name: str | dict[str, Any], **params: Any + name: OrientationName | dict[str, Any], **params: Any ) -> Callable[[NumericArray, NumericArray, Dataset], dict[str, NumericArray]]: """ Return an orientation factory by name. @@ -205,11 +205,28 @@ def SurfaceOrientation( orientation: Callable[ [NumericArray, NumericArray, Dataset], dict[str, NumericArray] ], - tracking: str | None = None, + tracking: TrackingType = None, ) -> Dataset: """ Compute cos(incidence) for slope and panel azimuth. + Parameters + ---------- + ds : xarray.Dataset + Weather dataset containing ``lon`` and ``lat`` coordinates in degrees. + solar_position : xarray.Dataset + Dataset with solar position variables ``altitude`` and ``azimuth`` + in radians. + orientation : callable + Function returning a dict with ``slope`` and ``azimuth`` (in radians) + given ``(lon, lat, solar_position)``. Typically produced by + :func:`get_orientation`. + tracking : {None, 'horizontal', 'tilted_horizontal', 'vertical', 'dual'}, optional + Tracking type. ``None`` for fixed panels, ``'horizontal'`` for 1-axis + horizontal tracking, ``'tilted_horizontal'`` for 1-axis horizontal + tracking of a tilted panel, ``'vertical'`` for 1-axis vertical + tracking, or ``'dual'`` for 2-axis tracking. + Returns ------- xarray.Dataset diff --git a/atlite/resource.py b/atlite/resource.py index cfb835b7..7bfaca2e 100644 --- a/atlite/resource.py +++ b/atlite/resource.py @@ -59,6 +59,7 @@ class PanelConfig(TypedDict): class CSPConfig(TypedDict): efficiency: DataArray path: PathLike + technology: NotRequired[str] name: NotRequired[str] source: NotRequired[str] diff --git a/atlite/utils.py b/atlite/utils.py index c3d26d82..d9ef88d9 100644 --- a/atlite/utils.py +++ b/atlite/utils.py @@ -34,17 +34,25 @@ def migrate_from_cutout_directory(old_cutout_dir: PathLike, path: PathLike) -> Dataset: """ - Convert an old style cutout directory to new style netcdf file. + Convert an old-style cutout directory to a new-style netCDF file. + + Parameters + ---------- + old_cutout_dir : str or Path + Path to the legacy cutout directory containing ``meta.nc``. + path : str or Path + Output path for the migrated ``.nc`` file. Returns ------- - xarray.Dataset + xr.Dataset The migrated cutout data. Raises ------ MergeError If automatic migration of multi-file datasets fails. + """ old_cutout_dir = Path(old_cutout_dir) with xr.open_dataset(old_cutout_dir / "meta.nc") as meta: @@ -105,17 +113,30 @@ def migrate_from_cutout_directory(old_cutout_dir: PathLike, path: PathLike) -> D def timeindex_from_slice(timeslice: Any) -> pd.DatetimeIndex: + """ + Create an hourly DatetimeIndex from a slice with start/end month strings. + + Parameters + ---------- + timeslice : slice + Slice with start and end as month strings (e.g. ``"2013-01"``). + + Returns + ------- + pd.DatetimeIndex + Hourly index spanning the given months. + """ end = pd.Timestamp(timeslice.end) + pd.offsets.DateOffset(months=1) return pd.date_range(timeslice.start, end, freq="1h", closed="left") class arrowdict(dict[str, Any]): """ - A subclass of dict, which allows you to get items in the dict using the - attribute syntax! + Dict subclass enabling attribute-style access to items. """ def __getattr__(self, item: str) -> Any: + """Retrieve a dict value as an attribute, raising AttributeError on missing keys.""" # noqa: DOC201, DOC501 try: return self.__getitem__(item) except KeyError as e: @@ -124,6 +145,7 @@ def __getattr__(self, item: str) -> Any: _re_pattern = re.compile("[a-zA-Z_][a-zA-Z0-9_]*") def __dir__(self) -> list[str]: + """List keys that are valid Python identifiers for tab-completion.""" # noqa: DOC201 dict_keys: list[str] = [] for k in self.keys(): if isinstance(k, str): @@ -135,11 +157,9 @@ def __dir__(self) -> list[str]: class CachedAttribute: """ - Computes attribute value and caches it in the instance. + Descriptor that computes an attribute value once and caches it on the instance. - From the Python Cookbook (Denis Otkidach) This decorator allows you - to create a property which can be computed once and accessed many - times. Sort of like memoization. + Based on the Python Cookbook recipe by Denis Otkidach. """ method: Callable[[Any], Any] @@ -152,11 +172,24 @@ def __init__( name: str | None = None, doc: str | None = None, ) -> None: + """ + Initialize the cached attribute descriptor. + + Parameters + ---------- + method : callable + Method whose return value will be cached. + name : str, optional + Attribute name for caching. Defaults to ``method.__name__``. + doc : str, optional + Docstring override. Defaults to ``method.__doc__``. + """ self.method = method self.name = name or method.__name__ self.__doc__ = doc or method.__doc__ def __get__(self, inst: Any, cls: type[Any] | None) -> Any: + """Compute on first access, cache the result, and return it.""" # noqa: DOC201 if inst is None: return self result = self.method(inst) From fd68cd3c9fbf276f126947acabe2bba16a272d70 Mon Sep 17 00:00:00 2001 From: Fabian Date: Thu, 26 Mar 2026 12:47:15 +0100 Subject: [PATCH 15/27] Enforce D103 (missing docstring in public function) via ruff Per-file ignores for datasets modules still lacking docstrings (cordex, ncep, sarah, gebco). --- atlite/datasets/era5.py | 2 +- pyproject.toml | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/atlite/datasets/era5.py b/atlite/datasets/era5.py index f8f4409e..e612ace5 100644 --- a/atlite/datasets/era5.py +++ b/atlite/datasets/era5.py @@ -44,7 +44,7 @@ import contextlib @contextlib.contextmanager # type: ignore[no-redef] - def nullcontext(): + def nullcontext(): # noqa: D103 yield diff --git a/pyproject.toml b/pyproject.toml index dc815f69..a2d5618e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -120,7 +120,7 @@ ignore = [ 'D100', # Missing docstring in public module 'D101', # Missing docstring in public class 'D102', # Missing docstring in public method - 'D103', # Missing docstring in public function + # 'D103', # Missing docstring in public function — enforced, per-file ignores below 'D107', # Missing docstring in __init__ 'D202', # No blank lines allowed after function docstring 'D203', # 1 blank line required before class docstring @@ -129,6 +129,13 @@ ignore = [ 'D415', # First line should end with a period, question mark, or exclamation point ] +[tool.ruff.lint.per-file-ignores] +# TODO: add docstrings to these modules +"atlite/datasets/cordex.py" = ["D103"] +"atlite/datasets/ncep.py" = ["D103"] +"atlite/datasets/sarah.py" = ["D103"] +"atlite/datasets/gebco.py" = ["D103"] + [tool.ruff.lint.pydocstyle] convention = "numpy" From 2d0497cc8fb43891109cce55062883db286e2a8d Mon Sep 17 00:00:00 2001 From: Fabian Date: Thu, 26 Mar 2026 12:54:26 +0100 Subject: [PATCH 16/27] Add docstrings to all dataset modules (sarah, ncep, cordex, gebco) and remove per-file D103 ignores --- atlite/datasets/cordex.py | 137 +++++++++++++++++ atlite/datasets/gebco.py | 38 +++++ atlite/datasets/ncep.py | 306 ++++++++++++++++++++++++++++++++++++++ atlite/datasets/sarah.py | 86 +++++++++++ pyproject.toml | 7 - 5 files changed, 567 insertions(+), 7 deletions(-) diff --git a/atlite/datasets/cordex.py b/atlite/datasets/cordex.py index 7489f147..62cec5b8 100644 --- a/atlite/datasets/cordex.py +++ b/atlite/datasets/cordex.py @@ -40,6 +40,19 @@ def rename_and_clean_coords(ds: xr.Dataset) -> xr.Dataset: + """Rename rotated coordinates and drop auxiliary variables. + + Parameters + ---------- + ds : xr.Dataset + CORDEX dataset with rotated lon/lat coordinates. + + Returns + ------- + xr.Dataset + Dataset with ``rlon``/``rlat`` renamed to ``x``/``y`` and + ``bnds``, ``height``, ``rotated_pole`` removed if present. + """ ds = ds.rename({"rlon": "x", "rlat": "y"}) return ds.drop( (set(ds.coords) | set(ds.data_vars)) & {"bnds", "height", "rotated_pole"} @@ -55,6 +68,30 @@ def prepare_data_cordex( xs: slice | np.ndarray[Any, Any], ys: slice | np.ndarray[Any, Any], ) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: + """Load and prepare time-varying CORDEX data, yielding per-month slices. + + Parameters + ---------- + fn : PathLike + Path to the NetCDF file. + year : int + Year to extract. + months : list of int + Months to extract. + oldname : str + Original variable name in the dataset. + newname : str + Target variable name after renaming. + xs : slice or np.ndarray + Spatial selection along x. + ys : slice or np.ndarray + Spatial selection along y. + + Yields + ------ + tuple of ((int, int), xr.Dataset) + ``(year, month)`` key and the corresponding monthly dataset slice. + """ with xr.open_dataset(fn) as ds: ds = rename_and_clean_coords(ds) ds = ds.rename({oldname: newname}) @@ -83,6 +120,30 @@ def prepare_static_data_cordex( xs: slice | np.ndarray[Any, Any], ys: slice | np.ndarray[Any, Any], ) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: + """Load and prepare static (time-invariant) CORDEX data. + + Parameters + ---------- + fn : PathLike + Path to the NetCDF file. + year : int + Year key for the yielded tuples. + months : list of int + Months to yield entries for. + oldname : str + Original variable name in the dataset. + newname : str + Target variable name after renaming. + xs : slice or np.ndarray + Spatial selection along x. + ys : slice or np.ndarray + Spatial selection along y. + + Yields + ------ + tuple of ((int, int), xr.Dataset) + ``(year, month)`` key and the static dataset (same for each month). + """ with xr.open_dataset(fn) as ds: ds = rename_and_clean_coords(ds) ds = ds.rename({oldname: newname}) @@ -101,6 +162,30 @@ def prepare_weather_types_cordex( xs: slice | np.ndarray[Any, Any], ys: slice | np.ndarray[Any, Any], ) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: + """Load and prepare CORDEX weather type classification data. + + Parameters + ---------- + fn : PathLike + Path to the NetCDF file. + year : int + Year to extract. + months : list of int + Months to extract. + oldname : str + Original variable name in the dataset. + newname : str + Target variable name after renaming. + xs : slice or np.ndarray + Unused, kept for interface consistency. + ys : slice or np.ndarray + Unused, kept for interface consistency. + + Yields + ------ + tuple of ((int, int), xr.Dataset) + ``(year, month)`` key and the corresponding monthly dataset slice. + """ with xr.open_dataset(fn) as ds: ds = ds.rename({oldname: newname}) for m in months: @@ -117,6 +202,32 @@ def prepare_meta_cordex( module: Any, model: str = "MPI-M-MPI-ESM-LR", ) -> xr.Dataset: + """Build metadata dataset for a CORDEX cutout including height. + + Parameters + ---------- + xs : slice or np.ndarray + Spatial selection along x. + ys : slice or np.ndarray + Spatial selection along y. + year : int + Reference year. + month : int + Reference month. + template : str + Glob template for locating NetCDF files. + height_config : dict + Configuration for height data retrieval. + module : Any + Dataset module reference. + model : str, optional + Climate model identifier. + + Returns + ------- + xr.Dataset + Coordinate metadata dataset with height variable. + """ fn = next(glob.iglob(template.format(year=year, model=model))) # noqa: PTH207 with xr.open_dataset(fn) as ds: ds = rename_and_clean_coords(ds) @@ -149,6 +260,32 @@ def tasks_yearly_cordex( newname: str, meta_attrs: dict[str, Any], ) -> list[dict[str, Any]]: + """Create yearly preparation task dicts for CORDEX data retrieval. + + Parameters + ---------- + xs : slice or np.ndarray + Spatial selection along x. + ys : slice or np.ndarray + Spatial selection along y. + yearmonths : list of (int, int) + ``(year, month)`` pairs to process. + prepare_func : callable + Function to call for data preparation. + template : str + Glob template for locating NetCDF files. + oldname : str + Original variable name in the dataset. + newname : str + Target variable name after renaming. + meta_attrs : dict + Cutout metadata attributes; must contain ``"model"`` key. + + Returns + ------- + list of dict + One task dict per year with keys needed by ``prepare_func``. + """ model = meta_attrs["model"] if not isinstance(xs, slice): diff --git a/atlite/datasets/gebco.py b/atlite/datasets/gebco.py index 9b32dc02..14e47ddc 100755 --- a/atlite/datasets/gebco.py +++ b/atlite/datasets/gebco.py @@ -29,6 +29,22 @@ def get_data_gebco_height( xs: xr.DataArray, ys: xr.DataArray, gebco_path: PathLike ) -> xr.DataArray: + """Read and resample GEBCO bathymetry/elevation to the target grid. + + Parameters + ---------- + xs : xr.DataArray + Target x (longitude) coordinates. + ys : xr.DataArray + Target y (latitude) coordinates. + gebco_path : PathLike + Path to the GEBCO GeoTIFF or NetCDF file. + + Returns + ------- + xr.DataArray + Height data resampled to the target grid with dimensions ``(y, x)``. + """ x, X = xs.data[[0, -1]] y, Y = ys.data[[0, -1]] @@ -60,6 +76,28 @@ def get_data( concurrent_requests: bool = False, **creation_parameters: Any, ) -> xr.Dataset: + """Retrieve GEBCO height data for a cutout. + + Parameters + ---------- + cutout : Any + Cutout instance providing target coordinates. + feature : str + Feature name (expected ``"height"``). + tmpdir : PathLike + Temporary directory (unused, kept for interface consistency). + monthly_requests : bool, optional + Unused, kept for interface consistency. + concurrent_requests : bool, optional + Unused, kept for interface consistency. + **creation_parameters : Any + Must include ``"gebco_path"`` pointing to the GEBCO data file. + + Returns + ------- + xr.Dataset + Dataset with height variable on the cutout grid. + """ if "gebco_path" not in creation_parameters: logger.error('Argument "gebco_path" not defined') path = creation_parameters["gebco_path"] diff --git a/atlite/datasets/ncep.py b/atlite/datasets/ncep.py index 2fdf52b2..339ccec8 100644 --- a/atlite/datasets/ncep.py +++ b/atlite/datasets/ncep.py @@ -34,6 +34,22 @@ def convert_lons_lats_ncep( ds: xr.Dataset, xs: slice | np.ndarray[Any, Any], ys: slice | np.ndarray[Any, Any] ) -> xr.Dataset: + """Select and rename NCEP longitude/latitude coordinates, handling wraparound. + + Parameters + ---------- + ds : xr.Dataset + Dataset with ``lon_0`` and ``lat_0`` coordinates. + xs : slice or np.ndarray + Longitude selection range or array. + ys : slice or np.ndarray + Latitude selection range or array. + + Returns + ------- + xr.Dataset + Dataset with coordinates renamed to ``x``/``y`` and ``lon``/``lat``. + """ if not isinstance(xs, slice): first, second, last = np.asarray(xs)[[0, 1, -1]] xs = slice(first - 0.1 * (second - first), last + 0.1 * (second - first)) @@ -63,6 +79,20 @@ def convert_lons_lats_ncep( def convert_time_hourly_ncep(ds: xr.Dataset, drop_time_vars: bool = True) -> xr.Dataset: + """Stack initial and forecast times into a single hourly time dimension. + + Parameters + ---------- + ds : xr.Dataset + Dataset with ``initial_time0_hours`` and ``forecast_time0`` dimensions. + drop_time_vars : bool, optional + Drop auxiliary time variables (default ``True``). + + Returns + ------- + xr.Dataset + Dataset with a unified ``time`` coordinate. + """ ds = ds.stack(time=("initial_time0_hours", "forecast_time0")).assign_coords( time=np.ravel( ds.coords["initial_time0_hours"] @@ -76,6 +106,19 @@ def convert_time_hourly_ncep(ds: xr.Dataset, drop_time_vars: bool = True) -> xr. def convert_unaverage_ncep(ds: xr.Dataset) -> xr.Dataset: + """Convert running-average variables (``*_avg``) to instantaneous values. + + Parameters + ---------- + ds : xr.Dataset + Dataset with variables ending in ``_avg``. + + Returns + ------- + xr.Dataset + Dataset with un-averaged variables replacing the originals. + """ + def unaverage(da: xr.DataArray, dim: str = "forecast_time0") -> xr.DataArray: coords = da.coords[dim] y = da * xr.DataArray( @@ -93,6 +136,19 @@ def unaverage(da: xr.DataArray, dim: str = "forecast_time0") -> xr.DataArray: def convert_unaccumulate_ncep(ds: xr.Dataset) -> xr.Dataset: + """Convert accumulated variables (``*_acc``) to per-timestep values. + + Parameters + ---------- + ds : xr.Dataset + Dataset with variables ending in ``_acc``. + + Returns + ------- + xr.Dataset + Dataset with de-accumulated variables replacing the originals. + """ + def unaccumulate(da: xr.DataArray, dim: str = "forecast_time0") -> xr.DataArray: return da - da.shift(**{dim: 1}).fillna(0.0) @@ -108,6 +164,24 @@ def unaccumulate(da: xr.DataArray, dim: str = "forecast_time0") -> xr.DataArray: def convert_clip_lower( ds: xr.Dataset, variable: str, a_min: float, value: float ) -> xr.Dataset: + """Replace values at or below a threshold with a fill value. + + Parameters + ---------- + ds : xr.Dataset + Input dataset. + variable : str + Name of the variable to clip. + a_min : float + Threshold; values <= ``a_min`` are replaced. + value : float + Replacement value. + + Returns + ------- + xr.Dataset + Dataset with clipped variable. + """ ds[variable] = ds[variable].where(ds[variable] > a_min).fillna(value) return ds @@ -119,6 +193,26 @@ def prepare_wnd10m_ncep( ys: slice | np.ndarray[Any, Any], engine: str = engine, ) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: + """Prepare 10-m wind speed from NCEP U/V components. + + Parameters + ---------- + fn : PathLike + Path to the GRIB2 file. + yearmonth : tuple of (int, int) + Year and month identifier. + xs : slice or np.ndarray + Longitude selection. + ys : slice or np.ndarray + Latitude selection. + engine : str, optional + xarray backend engine. + + Yields + ------ + tuple of (tuple of (int, int), xr.Dataset) + ``(yearmonth, dataset)`` with ``wnd10m`` variable. + """ with xr.open_dataset(fn, engine=engine) as ds: ds = convert_lons_lats_ncep(ds, xs, ys) ds = convert_time_hourly_ncep(ds) @@ -136,6 +230,26 @@ def prepare_influx_ncep( ys: slice | np.ndarray[Any, Any], engine: str = engine, ) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: + """Prepare downward shortwave radiation flux from NCEP data. + + Parameters + ---------- + fn : PathLike + Path to the GRIB2 file. + yearmonth : tuple of (int, int) + Year and month identifier. + xs : slice or np.ndarray + Longitude selection. + ys : slice or np.ndarray + Latitude selection. + engine : str, optional + xarray backend engine. + + Yields + ------ + tuple of (tuple of (int, int), xr.Dataset) + ``(yearmonth, dataset)`` with ``influx`` variable. + """ with xr.open_dataset(fn, engine=engine) as ds: ds = convert_lons_lats_ncep(ds, xs, ys) ds = convert_unaverage_ncep(ds) @@ -153,6 +267,26 @@ def prepare_outflux_ncep( ys: slice | np.ndarray[Any, Any], engine: str = engine, ) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: + """Prepare upward shortwave radiation flux from NCEP data. + + Parameters + ---------- + fn : PathLike + Path to the GRIB2 file. + yearmonth : tuple of (int, int) + Year and month identifier. + xs : slice or np.ndarray + Longitude selection. + ys : slice or np.ndarray + Latitude selection. + engine : str, optional + xarray backend engine. + + Yields + ------ + tuple of (tuple of (int, int), xr.Dataset) + ``(yearmonth, dataset)`` with ``outflux`` variable. + """ with xr.open_dataset(fn, engine=engine) as ds: ds = convert_lons_lats_ncep(ds, xs, ys) ds = convert_unaverage_ncep(ds) @@ -170,6 +304,26 @@ def prepare_temperature_ncep( ys: slice | np.ndarray[Any, Any], engine: str = engine, ) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: + """Prepare 2-m air temperature from NCEP data. + + Parameters + ---------- + fn : PathLike + Path to the GRIB2 file. + yearmonth : tuple of (int, int) + Year and month identifier. + xs : slice or np.ndarray + Longitude selection. + ys : slice or np.ndarray + Latitude selection. + engine : str, optional + xarray backend engine. + + Yields + ------ + tuple of (tuple of (int, int), xr.Dataset) + ``(yearmonth, dataset)`` with ``temperature`` variable. + """ with xr.open_dataset(fn, engine=engine) as ds: ds = convert_lons_lats_ncep(ds, xs, ys) ds = convert_time_hourly_ncep(ds) @@ -185,6 +339,26 @@ def prepare_soil_temperature_ncep( ys: slice | np.ndarray[Any, Any], engine: str = engine, ) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: + """Prepare soil temperature from NCEP data. + + Parameters + ---------- + fn : PathLike + Path to the GRIB2 file. + yearmonth : tuple of (int, int) + Year and month identifier. + xs : slice or np.ndarray + Longitude selection. + ys : slice or np.ndarray + Latitude selection. + engine : str, optional + xarray backend engine. + + Yields + ------ + tuple of (tuple of (int, int), xr.Dataset) + ``(yearmonth, dataset)`` with ``soil temperature`` variable. + """ with xr.open_dataset(fn, engine=engine) as ds: ds = convert_lons_lats_ncep(ds, xs, ys) ds = convert_time_hourly_ncep(ds) @@ -200,6 +374,26 @@ def prepare_runoff_ncep( ys: slice | np.ndarray[Any, Any], engine: str = engine, ) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: + """Prepare surface runoff from NCEP data. + + Parameters + ---------- + fn : PathLike + Path to the GRIB2 file. + yearmonth : tuple of (int, int) + Year and month identifier. + xs : slice or np.ndarray + Longitude selection. + ys : slice or np.ndarray + Latitude selection. + engine : str, optional + xarray backend engine. + + Yields + ------ + tuple of (tuple of (int, int), xr.Dataset) + ``(yearmonth, dataset)`` with ``runoff`` variable. + """ with xr.open_dataset(fn, engine=engine) as ds: ds = convert_lons_lats_ncep(ds, xs, ys) ds = ds.fillna(0.0) @@ -217,6 +411,26 @@ def prepare_height_ncep( yearmonths: list[tuple[int, int]], engine: str = engine, ) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: + """Prepare geopotential height from NCEP data. + + Parameters + ---------- + fn : PathLike + Path to the GRIB2 file. + xs : slice or np.ndarray + Longitude selection. + ys : slice or np.ndarray + Latitude selection. + yearmonths : list of tuple of (int, int) + Year-month pairs to yield the same height data for. + engine : str, optional + xarray backend engine. + + Yields + ------ + tuple of (tuple of (int, int), xr.Dataset) + ``(yearmonth, dataset)`` with ``height`` variable for each yearmonth. + """ with xr.open_dataset(fn, engine=engine) as ds: ds = convert_lons_lats_ncep(ds, xs, ys) ds = ds.rename({"HGT_P0_L105_GGA0": "height"}) @@ -231,6 +445,26 @@ def prepare_roughness_ncep( ys: slice | np.ndarray[Any, Any], engine: str = engine, ) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: + """Prepare surface roughness from NCEP data. + + Parameters + ---------- + fn : PathLike + Path to the GRIB2 file. + yearmonth : tuple of (int, int) + Year and month identifier. + xs : slice or np.ndarray + Longitude selection. + ys : slice or np.ndarray + Latitude selection. + engine : str, optional + xarray backend engine. + + Yields + ------ + tuple of (tuple of (int, int), xr.Dataset) + ``(yearmonth, dataset)`` with ``roughness`` variable. + """ with xr.open_dataset(fn, engine=engine) as ds: ds = convert_lons_lats_ncep(ds, xs, ys) ds = ds.rename({"SFCR_P8_L1_GGA0": "roughness"}) @@ -248,6 +482,32 @@ def prepare_meta_ncep( module: Any, engine: str = engine, ) -> xr.Dataset: + """Prepare cutout metadata including coordinates and height. + + Parameters + ---------- + xs : slice or np.ndarray + Longitude selection. + ys : slice or np.ndarray + Latitude selection. + year : int + Reference year. + month : int + Reference month. + template : str + Glob-able file path template with ``{year}`` and ``{month}`` placeholders. + height_config : dict + Configuration dict for height preparation (must include ``tasks_func``). + module : Any + Dataset module reference. + engine : str, optional + xarray backend engine. + + Returns + ------- + xr.Dataset + Metadata dataset with coordinates, time, and ``height``. + """ fn = next(glob.iglob(template.format(year=year, month=month))) # noqa: PTH207 with xr.open_dataset(fn, engine=engine) as ds: ds = ds.coords.to_dataset() @@ -279,6 +539,28 @@ def tasks_monthly_ncep( template: str, meta_attrs: dict[str, Any], ) -> list[dict[str, Any]]: + """Build per-month task dicts for NCEP data preparation. + + Parameters + ---------- + xs : slice or np.ndarray + Longitude selection. + ys : slice or np.ndarray + Latitude selection. + yearmonths : list of tuple of (int, int) + Year-month pairs to create tasks for. + prepare_func : callable + Preparation function to invoke per task. + template : str + Glob-able file path template with ``{year}`` and ``{month}`` placeholders. + meta_attrs : dict + Metadata attributes (unused, kept for interface consistency). + + Returns + ------- + list of dict + Task dictionaries keyed for the preparation function. + """ return [ { "prepare_func": prepare_func, @@ -301,6 +583,30 @@ def tasks_height_ncep( meta_attrs: dict[str, Any], **extra_args: Any, ) -> list[dict[str, Any]]: + """Build a single task dict for NCEP height data preparation. + + Parameters + ---------- + xs : slice or np.ndarray + Longitude selection. + ys : slice or np.ndarray + Latitude selection. + yearmonths : list of tuple of (int, int) + Year-month pairs passed through to the preparation function. + prepare_func : callable + Preparation function to invoke. + template : str + Glob-able file path to the height data. + meta_attrs : dict + Metadata attributes (unused, kept for interface consistency). + **extra_args + Additional keyword arguments forwarded to the task dict. + + Returns + ------- + list of dict + Single-element list with the height task dictionary. + """ return [ dict( prepare_func=prepare_func, diff --git a/atlite/datasets/sarah.py b/atlite/datasets/sarah.py index d548eb5c..c1ec2c53 100644 --- a/atlite/datasets/sarah.py +++ b/atlite/datasets/sarah.py @@ -45,6 +45,23 @@ def get_filenames(sarah_dir: str | PathLike, coords: dict[str, Any]) -> pd.DataFrame: + """ + Build a DataFrame of SIS and SID file paths for the requested time range. + + Parameters + ---------- + sarah_dir : str or PathLike + Root directory containing SARAH NetCDF files. + coords : dict + Coordinate mapping with a ``"time"`` key whose index defines the + requested temporal range. + + Returns + ------- + pd.DataFrame + DataFrame with columns ``"sis"`` and ``"sid"`` indexed by date. + """ + def _filenames_starting_with(name: str) -> pd.Series[str]: pattern = str(Path(sarah_dir) / "**" / f"{name}*.nc") files = pd.Series([str(f) for f in Path(sarah_dir).rglob(f"{name}*.nc")]) @@ -82,6 +99,22 @@ def _filenames_starting_with(name: str) -> pd.Series[str]: def interpolate( ds: xr.Dataset | xr.DataArray, dim: str = "time" ) -> xr.Dataset | xr.DataArray: + """ + Linearly interpolate NaN values along a dimension. + + Parameters + ---------- + ds : xr.Dataset or xr.DataArray + Input data with potential NaN gaps. + dim : str, default "time" + Dimension along which to interpolate. + + Returns + ------- + xr.Dataset or xr.DataArray + Data with NaN values filled by linear interpolation. + """ + def _interpolate1d( y: np.ndarray[Any, np.dtype[np.floating[Any]]], ) -> np.ndarray[Any, np.dtype[np.floating[Any]]]: @@ -118,6 +151,21 @@ def _interpolate(a: Any) -> Any: def as_slice(bounds: slice | tuple[float, float], pad: bool = True) -> slice: + """ + Convert coordinate bounds to a slice, optionally with small padding. + + Parameters + ---------- + bounds : slice or tuple of float + Existing slice or ``(start, stop)`` tuple. + pad : bool, default True + If *bounds* is a tuple, add ±0.01 padding. + + Returns + ------- + slice + Slice suitable for ``.sel()`` indexing. + """ if not isinstance(bounds, slice): bounds = bounds + (-0.01, 0.01) # type: ignore[assignment] bounds = slice(*bounds) @@ -125,6 +173,19 @@ def as_slice(bounds: slice | tuple[float, float], pad: bool = True) -> slice: def hourly_mean(ds: xr.Dataset) -> xr.Dataset: + """ + Compute hourly means from 30-minute data by averaging consecutive pairs. + + Parameters + ---------- + ds : xr.Dataset + Dataset with 30-minute temporal resolution. + + Returns + ------- + xr.Dataset + Dataset resampled to hourly resolution. + """ ds1 = ds.isel(time=slice(None, None, 2)) ds2 = ds.isel(time=slice(1, None, 2)) ds2 = ds2.assign_coords(time=ds2.indexes["time"] - pd.Timedelta(30, "m")) @@ -143,6 +204,31 @@ def get_data( monthly_requests: bool = False, **creation_parameters: Any, ) -> xr.Dataset: + """ + Retrieve and process SARAH solar irradiance data for a cutout. + + Parameters + ---------- + cutout : Cutout + Target cutout defining the spatial and temporal extent. + feature : str + Feature name (e.g. ``"influx"``). + tmpdir : PathLike + Temporary directory for intermediate files. + lock : lock-like, optional + Lock for thread-safe file access. + monthly_requests : bool, default False + Whether to split requests by month. + **creation_parameters + Must include ``sarah_dir``. Optional keys: ``parallel`` (bool), + ``sarah_interpolate`` (bool). + + Returns + ------- + xr.Dataset + Dataset with ``influx_direct``, ``influx_diffuse``, + ``solar_altitude``, and ``solar_azimuth`` variables. + """ assert cutout.dt in ("30min", "30T", "h", "1h") coords = cutout.coords diff --git a/pyproject.toml b/pyproject.toml index a2d5618e..e491443c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -129,13 +129,6 @@ ignore = [ 'D415', # First line should end with a period, question mark, or exclamation point ] -[tool.ruff.lint.per-file-ignores] -# TODO: add docstrings to these modules -"atlite/datasets/cordex.py" = ["D103"] -"atlite/datasets/ncep.py" = ["D103"] -"atlite/datasets/sarah.py" = ["D103"] -"atlite/datasets/gebco.py" = ["D103"] - [tool.ruff.lint.pydocstyle] convention = "numpy" From 67f7366fc1e35e108e46db05c74f8994303924f2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 26 Mar 2026 12:31:37 +0000 Subject: [PATCH 17/27] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- atlite/convert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/atlite/convert.py b/atlite/convert.py index 71e005cf..2384b84c 100644 --- a/atlite/convert.py +++ b/atlite/convert.py @@ -74,7 +74,7 @@ def _aggregate_time( def _aggregate_time(da: xr.DataArray, method: str | None) -> xr.DataArray: if method == "sum": return da.sum("time", keep_attrs=True) - elif method == "mean": + if method == "mean": return da.mean("time", keep_attrs=True) return da From aef7b64731dc3efb0e5117bcb722f8894f6601d2 Mon Sep 17 00:00:00 2001 From: Fabian Date: Thu, 26 Mar 2026 14:02:39 +0100 Subject: [PATCH 18/27] Ignore D103 in test/ and examples/, fix duplicate _aggregate_time definition --- atlite/convert.py | 8 -------- pyproject.toml | 4 ++++ 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/atlite/convert.py b/atlite/convert.py index 2384b84c..169d6d64 100644 --- a/atlite/convert.py +++ b/atlite/convert.py @@ -71,14 +71,6 @@ def _aggregate_time( return da -def _aggregate_time(da: xr.DataArray, method: str | None) -> xr.DataArray: - if method == "sum": - return da.sum("time", keep_attrs=True) - if method == "mean": - return da.mean("time", keep_attrs=True) - return da - - def convert_and_aggregate( cutout: Cutout, convert_func: Callable[..., Any], diff --git a/pyproject.toml b/pyproject.toml index e491443c..a158dc73 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -129,6 +129,10 @@ ignore = [ 'D415', # First line should end with a period, question mark, or exclamation point ] +[tool.ruff.lint.per-file-ignores] +"test/**" = ["D103"] +"examples/**" = ["D103"] + [tool.ruff.lint.pydocstyle] convention = "numpy" From 182b93e63ccce6a642563887b90e1280743e1f30 Mon Sep 17 00:00:00 2001 From: Fabian Date: Thu, 26 Mar 2026 14:04:10 +0100 Subject: [PATCH 19/27] Remove commented-out D103 ignore line --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a158dc73..ac46963a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -120,7 +120,6 @@ ignore = [ 'D100', # Missing docstring in public module 'D101', # Missing docstring in public class 'D102', # Missing docstring in public method - # 'D103', # Missing docstring in public function — enforced, per-file ignores below 'D107', # Missing docstring in __init__ 'D202', # No blank lines allowed after function docstring 'D203', # 1 blank line required before class docstring From 0c807ce97cfe67cb586a1420739b5feb3af37117 Mon Sep 17 00:00:00 2001 From: Fabian Date: Thu, 26 Mar 2026 15:11:25 +0100 Subject: [PATCH 20/27] Fix docstring lint errors: D205, D401, D100, D101, D105, DOC201 --- atlite/aggregate.py | 4 +- atlite/convert.py | 31 +++------ atlite/csp.py | 4 +- atlite/cutout.py | 78 +++++++--------------- atlite/data.py | 4 +- atlite/datasets/__init__.py | 4 +- atlite/datasets/cordex.py | 3 +- atlite/datasets/era5.py | 5 +- atlite/datasets/gebco.py | 4 +- atlite/datasets/ncep.py | 3 +- atlite/datasets/sarah.py | 5 +- atlite/gis.py | 29 ++++---- atlite/hydro.py | 4 +- atlite/pv/irradiation.py | 2 + atlite/pv/orientation.py | 5 +- atlite/pv/solar_panel_model.py | 5 +- atlite/pv/solar_position.py | 2 + atlite/resource.py | 16 +++-- atlite/utils.py | 8 +-- atlite/wind.py | 4 +- doc/conf.py | 3 +- examples/historic-comparison-germany.ipynb | 1 - pyproject.toml | 20 +----- test/conftest.py | 2 + test/test_aggregate_time.py | 2 + test/test_dynamic_line_rating.py | 8 +-- test/test_gis.py | 44 +++--------- test/test_preparation_and_conversion.py | 72 +++++--------------- 28 files changed, 119 insertions(+), 253 deletions(-) diff --git a/atlite/aggregate.py b/atlite/aggregate.py index 111127b9..2f6b90d2 100644 --- a/atlite/aggregate.py +++ b/atlite/aggregate.py @@ -1,9 +1,7 @@ # SPDX-FileCopyrightText: Contributors to atlite # # SPDX-License-Identifier: MIT -""" -Functions for aggregating results. -""" +"""Functions for aggregating results.""" from __future__ import annotations diff --git a/atlite/convert.py b/atlite/convert.py index 169d6d64..14dd99cd 100644 --- a/atlite/convert.py +++ b/atlite/convert.py @@ -1,9 +1,7 @@ # SPDX-FileCopyrightText: Contributors to atlite # # SPDX-License-Identifier: MIT -""" -All functions for converting weather data into energy system model data. -""" +"""All functions for converting weather data into energy system model data.""" from __future__ import annotations @@ -528,8 +526,7 @@ def coefficient_of_performance( **params: Any, ) -> DataArray | NumericArray: """ - Convert ambient or soil temperature to coefficient of performance (COP) of - air- or ground-sourced heat pumps. + Convert temperature to heat pump coefficient of performance (COP). The COP is modelled as a quadratic function of the temperature difference ``dT = sink_T - source_T``: ``COP = c0 + c1 * dT + c2 * dT**2``. @@ -644,8 +641,7 @@ def heat_demand( **params: Any, ) -> DataArray | NumericArray: """ - Convert outside temperature into daily heat demand using the degree-day - approximation. + Convert outside temperature into daily heat demand using degree-day approximation. The formula is ``max(0, a * (threshold - T_daily_mean)) + constant`` where ``T_daily_mean`` is the daily-averaged temperature. Output is in @@ -762,8 +758,7 @@ def cooling_demand( **params: Any, ) -> DataArray | NumericArray: """ - Convert outside temperature into daily cooling demand using the degree-day - approximation. + Convert outside temperature into daily cooling demand using degree-day approximation. The formula is ``max(0, a * (T_daily_mean - threshold)) + constant`` where ``T_daily_mean`` is the daily-averaged temperature. Output is in @@ -895,8 +890,7 @@ def solar_thermal( **params: Any, ) -> DataArray | NumericArray: """ - Convert downward short-wave radiation flux and outside temperature into - time series for solar thermal collectors. + Convert radiation and temperature into solar thermal collector time series. Collector efficiency is ``eta = c0 - c1 * (T_store - T_amb) / G``. Mathematical model and defaults for *c0*, *c1* based on [1]. @@ -1155,8 +1149,7 @@ def irradiation( **params: Any, ) -> DataArray | NumericArray: """ - Calculate the total, direct, diffuse, or ground irradiation on a tilted - surface. + Calculate irradiation on a tilted surface. Parameters ---------- @@ -1280,8 +1273,7 @@ def pv( **params: Any, ) -> DataArray | NumericArray: """ - Convert downward-shortwave, upward-shortwave radiation flux and ambient - temperature into a pv generation time-series. + Convert radiation and temperature into PV generation time series. Parameters ---------- @@ -1431,8 +1423,7 @@ def csp( **params: Any, ) -> DataArray | NumericArray: """ - Convert downward shortwave direct radiation into a csp generation time- - series. + Convert direct radiation into CSP generation time series. Parameters ---------- @@ -1520,8 +1511,7 @@ def runoff( **params, ): """ - Compute aggregated surface runoff output with optional smoothing, - thresholding, and normalization. + Compute aggregated surface runoff with optional smoothing and normalization. Parameters ---------- @@ -1609,8 +1599,7 @@ def hydro( **kwargs, ): """ - Compute inflow time-series for `plants` by aggregating over catchment - basins from `hydrobasins` + Compute inflow time series for plants by aggregating over catchment basins. Parameters ---------- diff --git a/atlite/csp.py b/atlite/csp.py index a590e06d..bfd678d1 100644 --- a/atlite/csp.py +++ b/atlite/csp.py @@ -1,9 +1,7 @@ # SPDX-FileCopyrightText: Contributors to atlite # # SPDX-License-Identifier: MIT -""" -Functions for use in conjunction with csp data generation. -""" +"""Functions for use in conjunction with csp data generation.""" from __future__ import annotations diff --git a/atlite/cutout.py b/atlite/cutout.py index aaddbfb2..b48342ff 100644 --- a/atlite/cutout.py +++ b/atlite/cutout.py @@ -1,9 +1,7 @@ # SPDX-FileCopyrightText: Contributors to atlite # # SPDX-License-Identifier: MIT -""" -Base class for atlite. -""" +"""Base class for atlite.""" # There is a binary incompatibility between the pip wheels of netCDF4 and # rasterio, which leads to the first one to work correctly while the second @@ -239,37 +237,27 @@ def __init__(self, path: PathLike, **cutoutparams: Any) -> None: @property def name(self) -> str: - """ - Name of the cutout. - """ + """Name of the cutout.""" return self.path.stem @property def module(self) -> str | list[str]: - """ - Data module of the cutout. - """ + """Data module of the cutout.""" return self.data.attrs.get("module") # type: ignore[no-any-return] @property def crs(self) -> CRS: - """ - Coordinate Reference System of the cutout. - """ + """Coordinate Reference System of the cutout.""" return CRS(datamodules[atleast_1d(self.module)[0]].crs) @property def available_features(self) -> pd.Index: - """ - List of available weather data features for the cutout. - """ + """List of available weather data features for the cutout.""" return available_features(self.module) @property def chunks(self) -> dict[str, int] | None: - """ - Chunking of the cutout data used by dask. - """ + """Chunking of the cutout data used by dask.""" chunks = { k.lstrip("chunksize_"): v for k, v in self.data.attrs.items() @@ -279,23 +267,17 @@ def chunks(self) -> dict[str, int] | None: @property def coords(self) -> xr.Coordinates: - """ - Geographic coordinates of the cutout. - """ + """Geographic coordinates of the cutout.""" return self.data.coords @property def shape(self) -> tuple[int, int]: - """ - Size of spatial dimensions (y, x) of the cutout data. - """ + """Size of spatial dimensions (y, x) of the cutout data.""" return len(self.coords["y"]), len(self.coords["x"]) @property def extent(self) -> NDArray: - """ - Total extent of the area covered by the cutout (x, X, y, Y). - """ + """Total extent of the area covered by the cutout (x, X, y, Y).""" xs, ys = self.coords["x"].values, self.coords["y"].values dx, dy = self.dx, self.dy return np.array([ @@ -307,16 +289,12 @@ def extent(self) -> NDArray: @property def bounds(self) -> NDArray: - """ - Total bounds of the area covered by the cutout (x, y, X, Y). - """ + """Total bounds of the area covered by the cutout (x, y, X, Y).""" return self.extent[[0, 2, 1, 3]] @property def transform(self) -> rio.Affine: - """ - Get the affine transform of the cutout. - """ + """Get the affine transform of the cutout.""" return rio.Affine( self.dx, 0, @@ -328,9 +306,7 @@ def transform(self) -> rio.Affine: @property def transform_r(self) -> rio.Affine: - """ - Get the affine transform of the cutout with reverse y-order. - """ + """Get the affine transform of the cutout with reverse y-order.""" return rio.Affine( self.dx, 0, @@ -342,41 +318,31 @@ def transform_r(self) -> rio.Affine: @property def dx(self) -> float: - """ - Spatial resolution on the x coordinates. - """ + """Spatial resolution on the x coordinates.""" x = self.coords["x"] return round((x[-1] - x[0]).item() / (x.size - 1), 8) # type: ignore[no-any-return] @property def dy(self) -> float: - """ - Spatial resolution on the y coordinates. - """ + """Spatial resolution on the y coordinates.""" y = self.coords["y"] return round((y[-1] - y[0]).item() / (y.size - 1), 8) # type: ignore[no-any-return] @property def dt(self) -> str | None: - """ - Time resolution of the cutout. - """ + """Time resolution of the cutout.""" return pd.infer_freq(self.coords["time"].to_index()) # type: ignore[no-any-return] @property def prepared(self) -> bool: - """ - Boolean indicating whether all available features are prepared. - """ + """Boolean indicating whether all available features are prepared.""" return self.prepared_features.sort_index().equals( # type: ignore[no-any-return] self.available_features.sort_index() ) @property def prepared_features(self) -> pd.Series[Any]: - """ - Get the list of prepared features in the cutout. - """ + """Get the list of prepared features in the cutout.""" index = [ (self.data[v].attrs["module"], self.data[v].attrs["feature"]) for v in self.data @@ -506,6 +472,13 @@ def to_file(self, fn: PathLike | None = None) -> None: self.data.to_netcdf(fn) def __repr__(self) -> str: + """Return string representation of the cutout. + + Returns + ------- + str + Human-readable summary of the cutout. + """ start = np.datetime_as_string(self.coords["time"].values[0], unit="D") end = np.datetime_as_string(self.coords["time"].values[-1], unit="D") return ( @@ -647,7 +620,7 @@ def uniform_density_layout( def equals(self, other: Any) -> bool: """ - It overrides xarray.Dataset.equals and ignores the path attribute in the comparison. + Compare equality with another cutout, ignoring the path attribute. Returns ------- @@ -693,7 +666,6 @@ def layout_from_capacity_list(self, data, col="Capacity"): >>> pv.plot() """ - x_grid = self.data.x.values y_grid = self.data.y.values diff --git a/atlite/data.py b/atlite/data.py index d982da29..9776b2f9 100644 --- a/atlite/data.py +++ b/atlite/data.py @@ -1,9 +1,7 @@ # SPDX-FileCopyrightText: Contributors to atlite # # SPDX-License-Identifier: MIT -""" -Management of data retrieval and structure. -""" +"""Management of data retrieval and structure.""" from __future__ import annotations diff --git a/atlite/datasets/__init__.py b/atlite/datasets/__init__.py index bb006105..7a1ba209 100644 --- a/atlite/datasets/__init__.py +++ b/atlite/datasets/__init__.py @@ -2,9 +2,7 @@ # # SPDX-License-Identifier: MIT -""" -atlite datasets. -""" +"""atlite datasets.""" from __future__ import annotations diff --git a/atlite/datasets/cordex.py b/atlite/datasets/cordex.py index 62cec5b8..f7ecfe32 100644 --- a/atlite/datasets/cordex.py +++ b/atlite/datasets/cordex.py @@ -2,8 +2,7 @@ # # SPDX-License-Identifier: MIT """ -Module containing specific operations for creating cutouts from the CORDEX -dataset. +Module for creating cutouts from the CORDEX dataset. DEPRECATED ---------- diff --git a/atlite/datasets/era5.py b/atlite/datasets/era5.py index e612ace5..2f5cd7aa 100644 --- a/atlite/datasets/era5.py +++ b/atlite/datasets/era5.py @@ -443,8 +443,7 @@ def noisy_unlink(path: PathLike) -> None: def add_finalizer(ds: xr.Dataset, target: PathLike) -> None: """ - Register a weak-reference callback to delete a temp file when the dataset - is garbage collected. + Register a weak-reference callback to delete a temp file on garbage collection. Parameters ---------- @@ -643,7 +642,7 @@ def get_data( **creation_parameters: Any, ) -> xr.Dataset: """ - Main entry point for downloading ERA5 data for a given feature. + Download ERA5 data for a given feature. Dispatches to feature-specific ``get_data_{feature}`` functions, optionally applies ``sanitize_{feature}``, and concatenates time chunks. diff --git a/atlite/datasets/gebco.py b/atlite/datasets/gebco.py index 14e47ddc..4d51dedd 100755 --- a/atlite/datasets/gebco.py +++ b/atlite/datasets/gebco.py @@ -3,9 +3,7 @@ # SPDX-FileCopyrightText: Contributors to atlite # # SPDX-License-Identifier: MIT -""" -Module for loading gebco data. -""" +"""Module for loading gebco data.""" from __future__ import annotations diff --git a/atlite/datasets/ncep.py b/atlite/datasets/ncep.py index 339ccec8..87cdcea2 100644 --- a/atlite/datasets/ncep.py +++ b/atlite/datasets/ncep.py @@ -2,8 +2,7 @@ # # SPDX-License-Identifier: MIT """ -Module containing specific operations for creating cutouts from the NCEP -dataset. +Module for creating cutouts from the NCEP dataset. DEPRECATED ---------- diff --git a/atlite/datasets/sarah.py b/atlite/datasets/sarah.py index c1ec2c53..3ade934f 100644 --- a/atlite/datasets/sarah.py +++ b/atlite/datasets/sarah.py @@ -1,10 +1,7 @@ # SPDX-FileCopyrightText: Contributors to atlite # # SPDX-License-Identifier: MIT -""" -Module containing specific operations for creating cutouts from the SARAH2 -dataset. -""" +"""Module for creating cutouts from the SARAH2 dataset.""" from __future__ import annotations diff --git a/atlite/gis.py b/atlite/gis.py index 7d7a33f1..b22b7c36 100644 --- a/atlite/gis.py +++ b/atlite/gis.py @@ -1,9 +1,7 @@ # SPDX-FileCopyrightText: Contributors to atlite # # SPDX-License-Identifier: MIT -""" -Functions for Geographic Information System. -""" +"""Functions for Geographic Information System.""" from __future__ import annotations @@ -520,9 +518,7 @@ def shape_availability_reprojected( class ExclusionContainer: - """ - Container for exclusion objects and meta data. - """ + """Container for exclusion objects and meta data.""" def __init__(self, crs: CrsLike = 3035, res: float = 100) -> None: """ @@ -664,23 +660,26 @@ def open_files(self) -> None: @property def all_closed(self) -> bool: - """ - Check whether all files in the raster container are closed. - """ + """Check whether all files in the raster container are closed.""" return all(isinstance(d["raster"], (str | Path)) for d in self.rasters) and all( isinstance(d["geometry"], (str | Path)) for d in self.geometries ) @property def all_open(self) -> bool: - """ - Check whether all files in the raster container are open. - """ + """Check whether all files in the raster container are open.""" return all( isinstance(d["raster"], rio.DatasetReader) for d in self.rasters ) and all(isinstance(d["geometry"], gpd.GeoSeries) for d in self.geometries) def __repr__(self) -> str: + """Return string representation of the exclusion container. + + Returns + ------- + str + Human-readable summary of the exclusion container. + """ return ( f"Exclusion Container" f"\n registered rasters: {len(self.rasters)} " @@ -696,8 +695,7 @@ def compute_shape_availability( dst_shape: tuple[int, int] | None = None, ) -> tuple[NDArray, rio.Affine]: """ - Compute the eligible area in one or more geometries and optionally - reproject. + Compute the eligible area in one or more geometries. Parameters ---------- @@ -764,8 +762,7 @@ def plot_shape_availability( plot_kwargs: dict[str, Any] | None = None, ) -> Axes: """ - Plot the eligible area for one or more geometries and optionally - reproject. + Plot the eligible area for one or more geometries. This function uses its own default values for ``rasterio.plot.show`` and ``geopandas.GeoSeries.plot``. Therefore eligible land is drawn in green diff --git a/atlite/hydro.py b/atlite/hydro.py index 47e3c37e..e449d1cf 100644 --- a/atlite/hydro.py +++ b/atlite/hydro.py @@ -1,9 +1,7 @@ # SPDX-FileCopyrightText: Contributors to atlite # # SPDX-License-Identifier: MIT -""" -Module involving hydro operations in atlite. -""" +"""Module involving hydro operations in atlite.""" from __future__ import annotations diff --git a/atlite/pv/irradiation.py b/atlite/pv/irradiation.py index d201b0eb..52941d39 100644 --- a/atlite/pv/irradiation.py +++ b/atlite/pv/irradiation.py @@ -2,6 +2,8 @@ # # SPDX-License-Identifier: MIT +"""Solar irradiation decomposition and transposition models.""" + from __future__ import annotations import logging diff --git a/atlite/pv/orientation.py b/atlite/pv/orientation.py index 3b28eaf2..27e645d9 100644 --- a/atlite/pv/orientation.py +++ b/atlite/pv/orientation.py @@ -2,6 +2,8 @@ # # SPDX-License-Identifier: MIT +"""Panel orientation and tilt angle utilities.""" + from __future__ import annotations import sys @@ -49,8 +51,7 @@ def make_latitude_optimal() -> Callable[ [NumericArray, NumericArray, Dataset], dict[str, xr.DataArray] ]: """ - Returns an optimal tilt angle for the given ``lat``, assuming that the - panel is facing towards the equator, using a simple method from [1]. + Return an optimal tilt angle assuming the panel faces the equator. This method only works for latitudes between 0 and 50. For higher latitudes, a static 40 degree angle is returned. diff --git a/atlite/pv/solar_panel_model.py b/atlite/pv/solar_panel_model.py index c16166f5..2fc02004 100644 --- a/atlite/pv/solar_panel_model.py +++ b/atlite/pv/solar_panel_model.py @@ -2,6 +2,8 @@ # # SPDX-License-Identifier: MIT +"""Solar panel electrical performance models.""" + from __future__ import annotations from typing import TYPE_CHECKING, Any, Literal @@ -58,8 +60,7 @@ def _power_bofinger( irradiance: DataArray, t_amb: DataArray, pc: dict[str, Any] ) -> DataArray: """ - AC power per capacity predicted by bofinger model, based on W/m2 - irradiance. + Predict AC power per capacity using the Bofinger model. Maximum power point tracking is assumed. diff --git a/atlite/pv/solar_position.py b/atlite/pv/solar_position.py index c83002c7..30bf3813 100644 --- a/atlite/pv/solar_position.py +++ b/atlite/pv/solar_position.py @@ -2,6 +2,8 @@ # # SPDX-License-Identifier: MIT +"""Solar position calculation utilities.""" + from __future__ import annotations from typing import TYPE_CHECKING diff --git a/atlite/resource.py b/atlite/resource.py index 7bfaca2e..3177d9ed 100644 --- a/atlite/resource.py +++ b/atlite/resource.py @@ -1,10 +1,7 @@ # SPDX-FileCopyrightText: Contributors to atlite # # SPDX-License-Identifier: MIT -""" -Module for providing access to external ressources, like windturbine or pv -panel configurations. -""" +"""Module for accessing external resources like wind turbine and PV panel configurations.""" from __future__ import annotations @@ -39,6 +36,8 @@ from atlite._types import DataArray, NDArray, PathLike class TurbineConfig(TypedDict): + """Wind turbine configuration dictionary.""" + V: NDArray POW: NDArray P: float @@ -48,6 +47,8 @@ class TurbineConfig(TypedDict): source: NotRequired[str] class PanelConfig(TypedDict): + """Solar panel configuration dictionary.""" + model: NotRequired[Literal["huld", "bofinger"]] efficiency: NotRequired[float] A: NotRequired[float] @@ -57,6 +58,8 @@ class PanelConfig(TypedDict): source: NotRequired[str] class CSPConfig(TypedDict): + """CSP installation configuration dictionary.""" + efficiency: DataArray path: PathLike technology: NotRequired[str] @@ -167,8 +170,7 @@ def get_solarpanelconfig(panel: str | PathLike) -> PanelConfig: def get_cspinstallationconfig(installation: str | PathLike) -> CSPConfig: """ - Load the 'installation'.yaml file from local disk to provide the system - efficiencies. + Load a CSP installation configuration from a YAML file. Parameters ---------- @@ -349,7 +351,7 @@ def _validate_turbine_config_dict( turbine: dict[str, Any], add_cutout_windspeed: bool ) -> TurbineConfig: """ - Checks the turbine config dict format and power curve. + Check the turbine config dict format and power curve. Parameters ---------- diff --git a/atlite/utils.py b/atlite/utils.py index d9ef88d9..f0ee9865 100644 --- a/atlite/utils.py +++ b/atlite/utils.py @@ -1,9 +1,7 @@ # SPDX-FileCopyrightText: Contributors to atlite # # SPDX-License-Identifier: MIT -""" -General utility functions for internal use. -""" +"""General utility functions for internal use.""" from __future__ import annotations @@ -131,9 +129,7 @@ def timeindex_from_slice(timeslice: Any) -> pd.DatetimeIndex: class arrowdict(dict[str, Any]): - """ - Dict subclass enabling attribute-style access to items. - """ + """Dict subclass enabling attribute-style access to items.""" def __getattr__(self, item: str) -> Any: """Retrieve a dict value as an attribute, raising AttributeError on missing keys.""" # noqa: DOC201, DOC501 diff --git a/atlite/wind.py b/atlite/wind.py index 47f7e154..c2e95e1d 100644 --- a/atlite/wind.py +++ b/atlite/wind.py @@ -1,9 +1,7 @@ # SPDX-FileCopyrightText: Contributors to atlite # # SPDX-License-Identifier: MIT -""" -Functions for use in conjunction with wind data generation. -""" +"""Functions for use in conjunction with wind data generation.""" from __future__ import annotations diff --git a/doc/conf.py b/doc/conf.py index 770e7ea5..15e6e9d2 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -2,6 +2,8 @@ # # SPDX-License-Identifier: MIT +"""Sphinx configuration for atlite documentation.""" + # # atlite documentation build configuration file, created by # sphinx-quickstart on Tue Jan 5 10:04:42 2016. @@ -15,7 +17,6 @@ # All configuration values have a default; values that are commented out # serve to show the default. - from importlib.metadata import version as get_version # If extensions (or modules to document with autodoc) are in another directory, diff --git a/examples/historic-comparison-germany.ipynb b/examples/historic-comparison-germany.ipynb index 37ac7a5e..64dbe871 100644 --- a/examples/historic-comparison-germany.ipynb +++ b/examples/historic-comparison-germany.ipynb @@ -276,7 +276,6 @@ " Filtered capacities.\n", "\n", " \"\"\"\n", - "\n", " # Load locations of installed capacities and remove incomplete entries\n", " cols = OrderedDict((\n", " (\"installation_date\", 0),\n", diff --git a/pyproject.toml b/pyproject.toml index ac46963a..5bfa288b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -108,28 +108,10 @@ select = [ ignore = [ 'E501', # line too long 'E741', # ambiguous variable names - 'D105', # Missing docstring in magic method - 'D212', # Multi-line docstring summary should start at the second line - 'D200', # One-line docstring should fit on one line with quotes - 'D401', # First line should be in imperative mood - 'D404', # First word of the docstring should not be "This - 'D413', # Missing blank line after last section - - # # pydocstyle ignores, which could be enabled in future when existing - # # issues are fixed - 'D100', # Missing docstring in public module - 'D101', # Missing docstring in public class - 'D102', # Missing docstring in public method - 'D107', # Missing docstring in __init__ - 'D202', # No blank lines allowed after function docstring - 'D203', # 1 blank line required before class docstring - 'D205', # 1 blank line required between summary line and description - 'D400', # First line should end with a period - 'D415', # First line should end with a period, question mark, or exclamation point ] [tool.ruff.lint.per-file-ignores] -"test/**" = ["D103"] +"test/**" = ["D101", "D102", "D103", "D205"] "examples/**" = ["D103"] [tool.ruff.lint.pydocstyle] diff --git a/test/conftest.py b/test/conftest.py index 412c9eea..77235d36 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -2,6 +2,8 @@ # # SPDX-License-Identifier: MIT +"""Shared pytest fixtures for atlite tests.""" + import os from datetime import date from pathlib import Path diff --git a/test/test_aggregate_time.py b/test/test_aggregate_time.py index 98b8ee3a..270fae91 100644 --- a/test/test_aggregate_time.py +++ b/test/test_aggregate_time.py @@ -2,6 +2,8 @@ # # SPDX-License-Identifier: MIT +"""Tests for time aggregation functionality.""" + import numpy as np import pandas as pd import pytest diff --git a/test/test_dynamic_line_rating.py b/test/test_dynamic_line_rating.py index 58ea591d..3532a415 100644 --- a/test/test_dynamic_line_rating.py +++ b/test/test_dynamic_line_rating.py @@ -103,9 +103,7 @@ def test_suedkabel_sample_case(): def test_right_angle_in_different_configuration(): - """ - Test different configurations of angle difference of 90 degree. - """ + """Test different configurations of angle difference of 90 degree.""" ds = { "temperature": 313, "wnd100m": 0.61, @@ -149,9 +147,7 @@ def test_right_angle_in_different_configuration(): def test_angle_increase(): - """ - Test an increasing angle which should lead to an increasing capacity. - """ + """Test an increasing angle which should lead to an increasing capacity.""" ds = { "temperature": 313, "wnd100m": 0.61, diff --git a/test/test_gis.py b/test/test_gis.py index e05dc813..6d003f6b 100755 --- a/test/test_gis.py +++ b/test/test_gis.py @@ -142,9 +142,7 @@ def raster_codes(tmp_path_factory): def test_exclusioncontainer_repr(ref): - """ - Test ExclusionContainer.__repr__. - """ + """Test ExclusionContainer.__repr__.""" excluder = ExclusionContainer(ref.crs, res=0.01) assert "Exclusion Container" in repr(excluder) @@ -202,9 +200,7 @@ def test_open_closed_checks(ref, geometry, raster): def test_area(ref): - """ - Test the area of the cutout. - """ + """Test the area of the cutout.""" area = ref.area(crs=3035) assert isinstance(area, xr.DataArray) assert area.dims == ("y", "x") @@ -252,9 +248,7 @@ def test_bounds(ref): def test_regrid(): - """ - Test the atlite.gis.regrid function with average resampling. - """ + """Test the atlite.gis.regrid function with average resampling.""" # define blocks A = 0.25 B = 0.5 @@ -296,9 +290,7 @@ def test_regrid(): def test_pad_extent(): - """ - Test whether padding works with arrays of dimension > 2. - """ + """Test whether padding works with arrays of dimension > 2.""" src = np.ones((3, 2)) src_trans = rio.Affine(1, 0, 0, 0, 1, 0) dst_trans = rio.Affine(2, 0, 0, 0, 2, 0) @@ -382,9 +374,7 @@ def test_availability_matrix_flat_parallel_anonymous_function(ref, raster_codes) def test_availability_matrix_flat_wo_progressbar(ref): - """ - Same as `test_availability_matrix_flat` but without progressbar. - """ + """Same as `test_availability_matrix_flat` but without progressbar.""" shapes = gpd.GeoSeries( [box(X0 + 1, Y0 + 1, X1 - 1, Y1 - 1)], crs=ref.crs ).rename_axis("shape") @@ -413,9 +403,7 @@ def test_availability_matrix_flat_parallel_wo_progressbar(ref): def test_shape_availability_area(ref): - """ - Area of the mask and the shape must be close. - """ + """Area of the mask and the shape must be close.""" shapes = gpd.GeoSeries([box(X0 + 1, Y0 + 1, X1 - 1, Y1 - 1)], crs=ref.crs) res = 100 excluder = ExclusionContainer(res=res) @@ -484,9 +472,7 @@ def test_shape_availability_exclude_geometry(ref): def test_shape_availability_exclude_raster(ref, raster): - """ - When excluding the half of the geometry, the eligible area must be half. - """ + """When excluding the half of the geometry, the eligible area must be half.""" shapes = gpd.GeoSeries([box(X0, Y0, X1, Y1)], crs=ref.crs) res = 0.01 @@ -521,9 +507,7 @@ def test_shape_availability_exclude_raster(ref, raster): def test_shape_availability_excluder_partial_overlap(ref, raster): - """ - Test behavior, when a raster only overlaps half of the geometry. - """ + """Test behavior, when a raster only overlaps half of the geometry.""" bounds = X0 - 2, Y0, X0 + 2, Y1 area = abs((bounds[2] - bounds[0]) * (bounds[3] - bounds[1])) shapes = gpd.GeoSeries([box(*bounds)], crs=ref.crs) @@ -546,9 +530,7 @@ def test_shape_availability_excluder_partial_overlap(ref, raster): def test_shape_availability_excluder_raster_no_overlap(ref, raster): - """ - Check if the allow_no_overlap flag works. - """ + """Check if the allow_no_overlap flag works.""" bounds = X0 - 10.0, Y0 - 10.0, X0 - 2.0, Y0 - 2.0 area = abs((bounds[2] - bounds[0]) * (bounds[3] - bounds[1])) shapes = gpd.GeoSeries([box(*bounds)], crs=ref.crs) @@ -630,9 +612,7 @@ def test_availability_matrix_rastered_repro(ref, raster_reproject): def test_shape_availability_exclude_raster_codes(ref, raster_codes): - """ - Test exclusion of multiple raster codes. - """ + """Test exclusion of multiple raster codes.""" shapes = gpd.GeoSeries([box(X0, Y0, X1, Y1)], crs=ref.crs) res = 0.01 @@ -656,9 +636,7 @@ def test_shape_availability_exclude_raster_codes(ref, raster_codes): def test_plot_shape_availability(ref, raster): - """ - Test plotting of shape availability. - """ + """Test plotting of shape availability.""" shapes = gpd.GeoSeries([box(X0, Y0, X1, Y1)], crs=ref.crs) res = 0.01 diff --git a/test/test_preparation_and_conversion.py b/test/test_preparation_and_conversion.py index 3fe953e2..a9e698b7 100644 --- a/test/test_preparation_and_conversion.py +++ b/test/test_preparation_and_conversion.py @@ -37,16 +37,12 @@ def all_notnull_test(cutout): - """ - Test if no nan's in the prepared data occur. - """ + """Test if no nan's in the prepared data occur.""" assert cutout.data.notnull().all() def prepared_features_test(cutout): - """ - The prepared features series should contain all variables in cutout.data. - """ + """Verify that prepared features contain all variables in cutout.data.""" assert set(cutout.prepared_features) == set(cutout.data) @@ -256,27 +252,21 @@ def csp_test(cutout): def solar_thermal_test(cutout): - """ - Test the atlite.Cutout.solar_thermal function with different settings. - """ + """Test the atlite.Cutout.solar_thermal function with different settings.""" cap_factor = cutout.solar_thermal() assert cap_factor.notnull().all() assert cap_factor.sum() > 0 def heat_demand_test(cutout): - """ - Test the atlite.Cutout.heat_demand function with different settings. - """ + """Test the atlite.Cutout.heat_demand function with different settings.""" demand = cutout.heat_demand() assert demand.notnull().all() assert demand.sum() > 0 def soil_temperature_test(cutout): - """ - Test the atlite.Cutout.soil_temperature function with different settings. - """ + """Test the atlite.Cutout.soil_temperature function with different settings.""" demand = cutout.soil_temperature() assert demand.notnull().all() assert demand.sum() > 0 @@ -360,9 +350,7 @@ def runoff_test(cutout): def hydro_test(cutout): - """ - Test the atlite.Cutout.hydro function. - """ + """Test the atlite.Cutout.hydro function.""" plants = pd.DataFrame( cutout.grid.loc[[0], ["x", "y"]].values, columns=["lon", "lat"] ) @@ -388,9 +376,7 @@ def line_rating_test(cutout): def coefficient_of_performance_test(cutout): - """ - Test the coefficient_of_performance function. - """ + """Test the coefficient_of_performance function.""" cap_factor = cutout.coefficient_of_performance(source="air") assert cap_factor.notnull().all() assert cap_factor.sum() > 0 @@ -412,16 +398,12 @@ def test_data_module_arguments_era5(cutout_era5): @staticmethod def test_all_non_na_era5(cutout_era5): - """ - Every cells should have data. - """ + """Every cells should have data.""" assert np.isfinite(cutout_era5.data).all() @staticmethod def test_all_non_na_era5_coarse(cutout_era5_coarse): - """ - Every cells should have data. - """ + """Every cells should have data.""" assert np.isfinite(cutout_era5_coarse.data).all() @staticmethod @@ -430,24 +412,18 @@ def test_all_non_na_era5_coarse(cutout_era5_coarse): reason="This test breaks on windows machine on CI due to unknown reasons.", ) def test_all_non_na_era5_weird_resolution(cutout_era5_weird_resolution): - """ - Every cells should have data. - """ + """Every cells should have data.""" assert np.isfinite(cutout_era5_weird_resolution.data).all() @staticmethod def test_dx_dy_preservation_era5(cutout_era5): - """ - The coordinates should be the same after preparation. - """ + """The coordinates should be the same after preparation.""" assert np.allclose(np.diff(cutout_era5.data.x), 0.25) assert np.allclose(np.diff(cutout_era5.data.y), 0.25) @staticmethod def test_dx_dy_preservation_era5_coarse(cutout_era5_coarse): - """ - The coordinates should be the same after preparation. - """ + """The coordinates should be the same after preparation.""" assert np.allclose( np.diff(cutout_era5_coarse.data.x), cutout_era5_coarse.data.attrs["dx"] ) @@ -461,9 +437,7 @@ def test_dx_dy_preservation_era5_coarse(cutout_era5_coarse): reason="This test breaks on windows machine on CI due to unknown reasons.", ) def test_dx_dy_preservation_era5_weird_resolution(cutout_era5_weird_resolution): - """ - The coordinates should be the same after preparation. - """ + """The coordinates should be the same after preparation.""" assert np.allclose( np.diff(cutout_era5_weird_resolution.data.x), cutout_era5_weird_resolution.data.attrs["dx"], @@ -598,30 +572,22 @@ def test_line_rating_era5(cutout_era5): class TestSarah: @staticmethod def test_all_non_na_sarah(cutout_sarah): - """ - Every cells should have data. - """ + """Every cells should have data.""" assert np.isfinite(cutout_sarah.data).all() @staticmethod def test_all_non_na_sarah_fine(cutout_sarah_fine): - """ - Every cells should have data. - """ + """Every cells should have data.""" assert np.isfinite(cutout_sarah_fine.data).all() @staticmethod def test_all_non_na_sarah_weird_resolution(cutout_sarah_weird_resolution): - """ - Every cells should have data. - """ + """Every cells should have data.""" assert np.isfinite(cutout_sarah_weird_resolution.data).all() @staticmethod def test_dx_dy_preservation_sarah(cutout_sarah): - """ - The coordinates should be the same after preparation. - """ + """The coordinates should be the same after preparation.""" assert np.allclose(np.diff(cutout_sarah.data.x), 0.25) assert np.allclose(np.diff(cutout_sarah.data.y), 0.25) @@ -652,7 +618,5 @@ def test_runoff_sarah(cutout_sarah): class TestGebco: @staticmethod def test_all_non_na_gebco(cutout_gebco): - """ - Every cells should have data. - """ + """Every cells should have data.""" assert np.isfinite(cutout_gebco.data).all() From 54601f8c5ed1fe5600dc15b9546640eeb478d57c Mon Sep 17 00:00:00 2001 From: Fabian Date: Mon, 20 Apr 2026 10:46:23 +0200 Subject: [PATCH 21/27] address review comments --- .gitignore | 1 + atlite/_types.py | 17 ++------- atlite/aggregate.py | 8 ++--- atlite/convert.py | 62 ++++++++++++++++---------------- atlite/cutout.py | 29 +++++++-------- atlite/data.py | 10 +++--- atlite/datasets/__init__.py | 9 +---- atlite/datasets/cordex.py | 20 +++++------ atlite/datasets/era5.py | 18 ++++++++-- atlite/datasets/ncep.py | 18 +++++----- atlite/gis.py | 62 +++++++++++++++----------------- atlite/pv/irradiation.py | 66 ++++++++++++++++++---------------- atlite/pv/orientation.py | 32 ++++++++--------- atlite/pv/solar_panel_model.py | 18 +++++----- atlite/pv/solar_position.py | 6 +--- atlite/resource.py | 5 +-- atlite/wind.py | 16 +++++---- pyproject.toml | 5 +-- 18 files changed, 196 insertions(+), 206 deletions(-) diff --git a/.gitignore b/.gitignore index c67bdd8a..23e0ac9d 100644 --- a/.gitignore +++ b/.gitignore @@ -19,6 +19,7 @@ examples/*.nc examples/*.csv examples/*.zip examples/*.tif +benchmarks/ atlite/version.py paper .coverage* diff --git a/atlite/_types.py b/atlite/_types.py index 98bb6a00..f6933575 100644 --- a/atlite/_types.py +++ b/atlite/_types.py @@ -7,30 +7,19 @@ from pathlib import Path from typing import Any, Literal, TypeAlias, TypedDict -import geopandas as gpd import numpy as np -import scipy.sparse as sp import xarray as xr from pyproj import CRS -from shapely.geometry.base import BaseGeometry NDArray: TypeAlias = np.ndarray[Any, np.dtype[np.floating[Any]]] NDArrayInt: TypeAlias = np.ndarray[Any, np.dtype[np.signedinteger[Any]]] NDArrayBool: TypeAlias = np.ndarray[Any, np.dtype[np.bool_]] -DataArray: TypeAlias = xr.DataArray -Dataset: TypeAlias = xr.Dataset PathLike: TypeAlias = str | Path -NumericArray: TypeAlias = NDArray | DataArray +NumericArray: TypeAlias = NDArray | xr.DataArray Number: TypeAlias = int | float | np.number[Any] -GeoDataFrame: TypeAlias = gpd.GeoDataFrame -GeoSeries: TypeAlias = gpd.GeoSeries -Geometry: TypeAlias = BaseGeometry -CrsLike: TypeAlias = str | int | CRS | dict[str, Any] | None -SparseMatrix: TypeAlias = sp.lil_matrix | sp.csr_matrix +CrsLike: TypeAlias = str | int | CRS | dict[str, Any] -TrackingType: TypeAlias = ( - Literal["horizontal", "tilted_horizontal", "vertical", "dual"] | None -) +TrackingType: TypeAlias = Literal["horizontal", "tilted_horizontal", "vertical", "dual"] ClearskyModel: TypeAlias = Literal["simple", "enhanced"] TrigonModel: TypeAlias = Literal["simple", "perez"] IrradiationType: TypeAlias = Literal["total", "direct", "diffuse", "ground"] diff --git a/atlite/aggregate.py b/atlite/aggregate.py index 2f6b90d2..e54ec633 100644 --- a/atlite/aggregate.py +++ b/atlite/aggregate.py @@ -14,14 +14,12 @@ import pandas as pd from scipy.sparse import spmatrix - from atlite._types import DataArray - def aggregate_matrix( - da: DataArray, + da: xr.DataArray, matrix: spmatrix, index: pd.Index, -) -> DataArray: +) -> xr.DataArray: """ Aggregate spatial data with a sparse matrix. @@ -53,6 +51,6 @@ def aggregate_matrix( output_dtypes=[da.dtype], dask_gufunc_kwargs={"output_sizes": {index.name: index.size}}, ).assign_coords(**{index.name: index}) - return cast("DataArray", result) + return cast("xr.DataArray", result) da = da.stack(spatial=("y", "x")).transpose("spatial", "time") return xr.DataArray(matrix * da, [index, da.coords["time"]]) diff --git a/atlite/convert.py b/atlite/convert.py index 14dd99cd..b0f927c8 100644 --- a/atlite/convert.py +++ b/atlite/convert.py @@ -46,8 +46,6 @@ from atlite._types import ( ClearskyModel, - DataArray, - Dataset, HeatPumpSource, IrradiationType, NumericArray, @@ -294,8 +292,8 @@ def convert_and_aggregate( def maybe_progressbar( - ds: Dataset | DataArray, show_progress: bool, **kwargs: Any -) -> Dataset | DataArray: + ds: xr.Dataset | xr.DataArray, show_progress: bool, **kwargs: Any +) -> xr.Dataset | xr.DataArray: """ Load a dataset or data array, optionally showing a dask progress bar. @@ -322,7 +320,7 @@ def maybe_progressbar( # temperature -def convert_temperature(ds: Dataset) -> DataArray: +def convert_temperature(ds: xr.Dataset) -> xr.DataArray: """ Convert ambient air temperature from Kelvin to degrees Celsius. @@ -340,7 +338,7 @@ def convert_temperature(ds: Dataset) -> DataArray: return ds["temperature"] - 273.15 -def temperature(cutout: Cutout, **params: Any) -> DataArray | NumericArray: +def temperature(cutout: Cutout, **params: Any) -> xr.DataArray | NumericArray: """ Return ambient air temperature converted from Kelvin to degrees Celsius. @@ -366,7 +364,7 @@ def temperature(cutout: Cutout, **params: Any) -> DataArray | NumericArray: # soil temperature -def convert_soil_temperature(ds: Dataset) -> DataArray: +def convert_soil_temperature(ds: xr.Dataset) -> xr.DataArray: """ Convert soil temperature from Kelvin to degrees Celsius. @@ -388,7 +386,7 @@ def convert_soil_temperature(ds: Dataset) -> DataArray: return (ds["soil temperature"] - 273.15).fillna(0.0) -def soil_temperature(cutout: Cutout, **params: Any) -> DataArray | NumericArray: +def soil_temperature(cutout: Cutout, **params: Any) -> xr.DataArray | NumericArray: """ Return soil temperature converted from Kelvin to degrees Celsius. @@ -417,7 +415,7 @@ def soil_temperature(cutout: Cutout, **params: Any) -> DataArray | NumericArray: # dewpoint temperature -def convert_dewpoint_temperature(ds: Dataset) -> DataArray: +def convert_dewpoint_temperature(ds: xr.Dataset) -> xr.DataArray: """ Convert dew point temperature from Kelvin to degrees Celsius. @@ -435,7 +433,7 @@ def convert_dewpoint_temperature(ds: Dataset) -> DataArray: return ds["dewpoint temperature"] - 273.15 -def dewpoint_temperature(cutout: Cutout, **params: Any) -> DataArray | NumericArray: +def dewpoint_temperature(cutout: Cutout, **params: Any) -> xr.DataArray | NumericArray: """ Return dew point temperature converted from Kelvin to degrees Celsius. @@ -463,13 +461,13 @@ def dewpoint_temperature(cutout: Cutout, **params: Any) -> DataArray | NumericAr def convert_coefficient_of_performance( - ds: Dataset, + ds: xr.Dataset, source: HeatPumpSource, sink_T: float, c0: float | None, c1: float | None, c2: float | None, -) -> DataArray: +) -> xr.DataArray: """ Convert source temperatures to heat pump COP values. @@ -524,7 +522,7 @@ def coefficient_of_performance( c1: float | None = None, c2: float | None = None, **params: Any, -) -> DataArray | NumericArray: +) -> xr.DataArray | NumericArray: """ Convert temperature to heat pump coefficient of performance (COP). @@ -585,12 +583,12 @@ def coefficient_of_performance( # heat demand def convert_heat_demand( - ds: Dataset, + ds: xr.Dataset, threshold: float, a: float, constant: float, hour_shift: float, -) -> DataArray: +) -> xr.DataArray: """ Convert ambient temperature to daily heat demand by degree days. @@ -639,7 +637,7 @@ def heat_demand( constant: float = 0.0, hour_shift: float = 0.0, **params: Any, -) -> DataArray | NumericArray: +) -> xr.DataArray | NumericArray: """ Convert outside temperature into daily heat demand using degree-day approximation. @@ -702,12 +700,12 @@ def heat_demand( # cooling demand def convert_cooling_demand( - ds: Dataset, + ds: xr.Dataset, threshold: float, a: float, constant: float, hour_shift: float, -) -> DataArray: +) -> xr.DataArray: """ Convert ambient temperature to daily cooling demand by degree days. @@ -756,7 +754,7 @@ def cooling_demand( constant: float = 0.0, hour_shift: float = 0.0, **params: Any, -) -> DataArray | NumericArray: +) -> xr.DataArray | NumericArray: """ Convert outside temperature into daily cooling demand using degree-day approximation. @@ -821,14 +819,14 @@ def cooling_demand( # solar thermal collectors def convert_solar_thermal( - ds: Dataset, + ds: xr.Dataset, orientation: Callable, trigon_model: TrigonModel, clearsky_model: ClearskyModel | None, c0: float, c1: float, t_store: float, -) -> DataArray: +) -> xr.DataArray: """ Convert weather data to solar thermal collector output. @@ -888,7 +886,7 @@ def solar_thermal( c1: float = 3.0, t_store: float = 80.0, **params: Any, -) -> DataArray | NumericArray: +) -> xr.DataArray | NumericArray: """ Convert radiation and temperature into solar thermal collector time series. @@ -1097,13 +1095,13 @@ def wind( # irradiation def convert_irradiation( - ds: Dataset, + ds: xr.Dataset, orientation: Callable, - tracking: TrackingType = None, + tracking: TrackingType | None = None, irradiation: IrradiationType = "total", trigon_model: TrigonModel = "simple", clearsky_model: ClearskyModel | None = "simple", -) -> DataArray: +) -> xr.DataArray: """ Convert weather data to irradiation on a tilted surface. @@ -1144,10 +1142,10 @@ def irradiation( cutout: Cutout, orientation: OrientationName | dict[str, float] | Callable, irradiation: IrradiationType = "total", - tracking: TrackingType = None, + tracking: TrackingType | None = None, clearsky_model: ClearskyModel | None = None, **params: Any, -) -> DataArray | NumericArray: +) -> xr.DataArray | NumericArray: """ Calculate irradiation on a tilted surface. @@ -1221,13 +1219,13 @@ def irradiation( # solar PV def convert_pv( - ds: Dataset, + ds: xr.Dataset, panel: dict[str, Any], orientation: Callable, tracking: TrackingType, trigon_model: TrigonModel = "simple", clearsky_model: ClearskyModel | None = "simple", -) -> DataArray: +) -> xr.DataArray: """ Convert weather data to photovoltaic specific generation. @@ -1268,10 +1266,10 @@ def pv( cutout: Cutout, panel: str | PanelConfig, orientation: OrientationName | dict[str, float] | Callable, - tracking: TrackingType = None, + tracking: TrackingType | None = None, clearsky_model: ClearskyModel | None = None, **params: Any, -) -> DataArray | NumericArray: +) -> xr.DataArray | NumericArray: """ Convert radiation and temperature into PV generation time series. @@ -1421,7 +1419,7 @@ def csp( installation: str | CSPConfig, technology: Literal["parabolic trough", "solar tower"] | None = None, **params: Any, -) -> DataArray | NumericArray: +) -> xr.DataArray | NumericArray: """ Convert direct radiation into CSP generation time series. diff --git a/atlite/cutout.py b/atlite/cutout.py index b48342ff..d02fb9d6 100644 --- a/atlite/cutout.py +++ b/atlite/cutout.py @@ -32,19 +32,16 @@ if TYPE_CHECKING: from collections.abc import Sequence + import scipy.sparse as sp + from shapely.geometry.base import BaseGeometry + from atlite._types import ( CrsLike, - DataArray, - GeoDataFrame, - Geometry, NDArray, Number, PathLike, - SparseMatrix, ) - pass - from atlite.convert import ( coefficient_of_performance, convert_and_aggregate, @@ -351,7 +348,7 @@ def prepared_features(self) -> pd.Series[Any]: return pd.Series(list(self.data), index, dtype=object) @CachedAttribute - def grid(self) -> GeoDataFrame: + def grid(self) -> gpd.GeoDataFrame: """ Cutout grid with coordinates and geometries. @@ -387,7 +384,7 @@ def sel( ---------- path : str | path-like File where to store the sub-cutout. Defaults to a temporary file. - bounds : GeoSeries.bounds | DataFrame, optional + bounds : gpd.GeoSeries.bounds | DataFrame, optional The outer bounds of the cutout or as a DataFrame containing (min.long, min.lat, max.long, max.lat). buffer : float, optional @@ -504,8 +501,8 @@ def __repr__(self) -> str: ) def indicatormatrix( - self, shapes: Sequence[Geometry], shapes_crs: CrsLike = 4326 - ) -> SparseMatrix: + self, shapes: Sequence[BaseGeometry], shapes_crs: CrsLike = 4326 + ) -> sp.lil_matrix | sp.csr_matrix: """ Compute the indicatormatrix. @@ -534,8 +531,8 @@ def indicatormatrix( return compute_indicatormatrix(self.grid, shapes, self.crs, shapes_crs) def intersectionmatrix( - self, shapes: Sequence[Geometry], shapes_crs: CrsLike = 4326 - ) -> SparseMatrix: + self, shapes: Sequence[BaseGeometry], shapes_crs: CrsLike = 4326 + ) -> sp.lil_matrix | sp.csr_matrix: """ Compute the intersectionmatrix. @@ -559,7 +556,7 @@ def intersectionmatrix( """ return compute_intersectionmatrix(self.grid, shapes, self.crs, shapes_crs) - def area(self, crs: CrsLike = None) -> DataArray: + def area(self, crs: CrsLike | None = None) -> xr.DataArray: """ Get the area per grid cell as a DataArray with coords (x,y). @@ -584,7 +581,7 @@ def area(self, crs: CrsLike = None) -> DataArray: [self.coords["y"], self.coords["x"]], ) - def uniform_layout(self) -> DataArray: + def uniform_layout(self) -> xr.DataArray: """ Get a uniform capacity layout for all grid cells. @@ -596,8 +593,8 @@ def uniform_layout(self) -> DataArray: return xr.DataArray(1, [self.coords["y"], self.coords["x"]]) def uniform_density_layout( - self, capacity_density: Number, crs: CrsLike = None - ) -> DataArray: + self, capacity_density: Number, crs: CrsLike | None = None + ) -> xr.DataArray: """ Get a capacity layout from a uniform capacity density. diff --git a/atlite/data.py b/atlite/data.py index 9776b2f9..e5dfbe2b 100644 --- a/atlite/data.py +++ b/atlite/data.py @@ -27,7 +27,7 @@ if TYPE_CHECKING: from collections.abc import Callable, Iterable, Sequence - from atlite._types import DataArray, DataFormat, Dataset, PathLike + from atlite._types import DataFormat, PathLike from atlite.cutout import Cutout logger = logging.getLogger(__name__) @@ -41,7 +41,7 @@ def get_features( tmpdir: PathLike | None = None, monthly_requests: bool = False, concurrent_requests: bool = False, -) -> Dataset: +) -> xr.Dataset: """ Load feature datasets for a cutout module. @@ -87,9 +87,9 @@ def get_features( datasets = dask_compute(*datasets) - ds: Dataset = xr.merge(datasets, compat="equals") + ds: xr.Dataset = xr.merge(datasets, compat="equals") for v in ds: - da: DataArray = ds[v] + da: xr.DataArray = ds[v] da.attrs["module"] = module fd: Iterable[tuple[str, Any]] = datamodules[module].features.items() da.attrs["feature"] = [k for k, l in fd if v in l].pop() @@ -294,7 +294,7 @@ def cutout_prepare( missing_features: np.ndarray[Any, np.dtype[Any]] = missing_vars.index.unique( "feature" ) - ds: Dataset = get_features( + ds: xr.Dataset = get_features( cutout, module, missing_features, diff --git a/atlite/datasets/__init__.py b/atlite/datasets/__init__.py index 7a1ba209..69dd418d 100644 --- a/atlite/datasets/__init__.py +++ b/atlite/datasets/__init__.py @@ -4,13 +4,6 @@ """atlite datasets.""" -from __future__ import annotations - -from typing import TYPE_CHECKING - from atlite.datasets import era5, gebco, sarah -if TYPE_CHECKING: - from types import ModuleType - -modules: dict[str, ModuleType] = {"era5": era5, "sarah": sarah, "gebco": gebco} +modules = {"era5": era5, "sarah": sarah, "gebco": gebco} diff --git a/atlite/datasets/cordex.py b/atlite/datasets/cordex.py index f7ecfe32..9a594fe1 100644 --- a/atlite/datasets/cordex.py +++ b/atlite/datasets/cordex.py @@ -319,7 +319,7 @@ def tasks_yearly_cordex( "prepare_func": prepare_data_cordex, "oldname": "rsds", "newname": "influx", - "template": os.path.join( # noqa: PTH118 + "template": os.path.join( config.cordex_dir, "{model}", "influx", @@ -331,7 +331,7 @@ def tasks_yearly_cordex( "prepare_func": prepare_data_cordex, "oldname": "rsus", "newname": "outflux", - "template": os.path.join( # noqa: PTH118 + "template": os.path.join( config.cordex_dir, "{model}", "outflux", @@ -343,7 +343,7 @@ def tasks_yearly_cordex( "prepare_func": prepare_data_cordex, "oldname": "tas", "newname": "temperature", - "template": os.path.join( # noqa: PTH118 + "template": os.path.join( config.cordex_dir, "{model}", "temperature", @@ -355,7 +355,7 @@ def tasks_yearly_cordex( "prepare_func": prepare_data_cordex, "oldname": "hurs", "newname": "humidity", - "template": os.path.join( # noqa: PTH118 + "template": os.path.join( config.cordex_dir, "{model}", "humidity", @@ -367,7 +367,7 @@ def tasks_yearly_cordex( "prepare_func": prepare_data_cordex, "oldname": "sfcWind", "newname": "wnd10m", - "template": os.path.join( # noqa: PTH118 + "template": os.path.join( config.cordex_dir, "{model}", "wind", @@ -379,7 +379,7 @@ def tasks_yearly_cordex( "prepare_func": prepare_static_data_cordex, "oldname": "rlst", "newname": "roughness", - "template": os.path.join( # noqa: PTH118 + "template": os.path.join( config.cordex_dir, "{model}", "roughness", @@ -391,7 +391,7 @@ def tasks_yearly_cordex( "prepare_func": prepare_data_cordex, "oldname": "mrro", "newname": "runoff", - "template": os.path.join( # noqa: PTH118 + "template": os.path.join( config.cordex_dir, "{model}", "runoff", @@ -403,7 +403,7 @@ def tasks_yearly_cordex( "prepare_func": prepare_static_data_cordex, "oldname": "orog", "newname": "height", - "template": os.path.join( # noqa: PTH118 + "template": os.path.join( config.cordex_dir, "{model}", "altitude", @@ -415,7 +415,7 @@ def tasks_yearly_cordex( "prepare_func": prepare_weather_types_cordex, "oldname": "CWT", "newname": "CWT", - "template": os.path.join( # noqa: PTH118 + "template": os.path.join( config.cordex_dir, "{model}", "weather_types", @@ -432,7 +432,7 @@ def tasks_yearly_cordex( meta_data_config = { "prepare_func": prepare_meta_cordex, - "template": os.path.join( # noqa: PTH118 + "template": os.path.join( config.cordex_dir, "{model}", "temperature", diff --git a/atlite/datasets/era5.py b/atlite/datasets/era5.py index 2f5cd7aa..1cc76ebd 100644 --- a/atlite/datasets/era5.py +++ b/atlite/datasets/era5.py @@ -84,6 +84,11 @@ def _add_height(ds: xr.Dataset) -> xr.Dataset: ------- xr.Dataset Dataset with 'height' variable in meters, 'z' removed. + + References + ---------- + [1] ERA5: surface elevation and orography, retrieved: 10.02.2019 + https://confluence.ecmwf.int/display/CKB/ERA5%3A+surface+elevation+and+orography """ g0 = 9.80665 z = ds["z"] @@ -113,6 +118,7 @@ def _rename_and_clean_coords(ds: xr.Dataset, add_lon_lat: bool = True) -> xr.Dat Dataset with standardized coordinates. """ ds = ds.rename({"longitude": "x", "latitude": "y", "valid_time": "time"}) + # round coords since cds coords are float32 which would lead to mismatches ds = ds.assign_coords( x=np.round(ds.x.astype(float), 5), y=np.round(ds.y.astype(float), 5) ) @@ -159,6 +165,7 @@ def get_data_wind(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: np.log(ds["wnd10m"] / ds["wnd100m"]) / np.log(10 / 100) ).assign_attrs(units="", long_name="wind shear exponent") + # span the whole circle: 0 is north, π/2 is east, -π is south, 3π/2 is west azimuth = arctan2(ds["u100"], ds["v100"]) ds["wnd_azimuth"] = azimuth.where(azimuth >= 0, azimuth + 2 * np.pi) @@ -225,10 +232,15 @@ def get_data_influx(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: ) ds = ds.drop_vars(["ssrd", "ssr"]) + # Convert from energy to power J m**-2 -> W m**-2 and clip negative fluxes for a in ("influx_direct", "influx_diffuse", "influx_toa"): ds[a] = ds[a] / (60.0 * 60.0) ds[a].attrs["units"] = "W m**-2" + # ERA5 variables are mean values for previous hour, i.e. 13:01 to 14:00 + # are labelled as "14:00". Account by calculating the SolarPosition for the + # center of the interval for aggregation. + # See https://github.com/PyPSA/atlite/issues/158 with warnings.catch_warnings(): warnings.simplefilter("ignore", DeprecationWarning) time_shift = pd.to_timedelta("-30 minutes") @@ -498,8 +510,9 @@ def open_with_grib_conventions( """ Open a GRIB file using cfgrib with standardized coordinate conventions. - Renames forecast/pressure/model dimensions and expands missing dimensions. - If ``tmpdir`` is None, registers a finalizer to delete the file on GC. + Performs the same conversion as the CDS backend, but locally. + Based on the documentation at + https://confluence.ecmwf.int/display/CKB/GRIB+to+netCDF+conversion+on+new+CDS+and+ADS+systems Parameters ---------- @@ -621,6 +634,7 @@ def retrieve_data( logger.info("CDS: Downloading variables\n\t%s\n", varstr) result.download(target) + # Convert from grib to netcdf locally, same conversion as in CDS backend if request["data_format"] == "grib": ds = open_with_grib_conventions(target, chunks=chunks, tmpdir=tmpdir) else: diff --git a/atlite/datasets/ncep.py b/atlite/datasets/ncep.py index 87cdcea2..5835a9ba 100644 --- a/atlite/datasets/ncep.py +++ b/atlite/datasets/ncep.py @@ -626,7 +626,7 @@ def tasks_height_ncep( "influx": { "tasks_func": tasks_monthly_ncep, "prepare_func": prepare_influx_ncep, - "template": os.path.join( # noqa: PTH118 + "template": os.path.join( config.ncep_dir, "{year}{month:0>2}/dswsfc.*.grb2", ), @@ -634,7 +634,7 @@ def tasks_height_ncep( "outflux": { "tasks_func": tasks_monthly_ncep, "prepare_func": prepare_outflux_ncep, - "template": os.path.join( # noqa: PTH118 + "template": os.path.join( config.ncep_dir, "{year}{month:0>2}/uswsfc.*.grb2", ), @@ -642,7 +642,7 @@ def tasks_height_ncep( "temperature": { "tasks_func": tasks_monthly_ncep, "prepare_func": prepare_temperature_ncep, - "template": os.path.join( # noqa: PTH118 + "template": os.path.join( config.ncep_dir, "{year}{month:0>2}/tmp2m.*.grb2", ), @@ -650,7 +650,7 @@ def tasks_height_ncep( "soil temperature": { "tasks_func": tasks_monthly_ncep, "prepare_func": prepare_soil_temperature_ncep, - "template": os.path.join( # noqa: PTH118 + "template": os.path.join( config.ncep_dir, "{year}{month:0>2}/soilt1.*.grb2", ), @@ -658,7 +658,7 @@ def tasks_height_ncep( "wnd10m": { "tasks_func": tasks_monthly_ncep, "prepare_func": prepare_wnd10m_ncep, - "template": os.path.join( # noqa: PTH118 + "template": os.path.join( config.ncep_dir, "{year}{month:0>2}/wnd10m.*.grb2", ), @@ -666,7 +666,7 @@ def tasks_height_ncep( "runoff": { "tasks_func": tasks_monthly_ncep, "prepare_func": prepare_runoff_ncep, - "template": os.path.join( # noqa: PTH118 + "template": os.path.join( config.ncep_dir, "{year}{month:0>2}/runoff.*.grb2", ), @@ -674,7 +674,7 @@ def tasks_height_ncep( "roughness": { "tasks_func": tasks_monthly_ncep, "prepare_func": prepare_roughness_ncep, - "template": os.path.join( # noqa: PTH118 + "template": os.path.join( config.ncep_dir, "{year}{month:0>2}/flxf.gdas.*.grb2", ), @@ -682,7 +682,7 @@ def tasks_height_ncep( "height": { "tasks_func": tasks_height_ncep, "prepare_func": prepare_height_ncep, - "template": os.path.join( # noqa: PTH118 + "template": os.path.join( config.ncep_dir, "height/cdas1.20130101.splgrbanl.grb2", ), @@ -697,7 +697,7 @@ def tasks_height_ncep( meta_data_config = { "prepare_func": prepare_meta_ncep, - "template": os.path.join( # noqa: PTH118 + "template": os.path.join( config.ncep_dir, "{year}{month:0>2}/tmp2m.*.grb2", ), diff --git a/atlite/gis.py b/atlite/gis.py index b22b7c36..dd31fe1b 100644 --- a/atlite/gis.py +++ b/atlite/gis.py @@ -35,14 +35,10 @@ from collections.abc import Callable, Iterable, Sequence from matplotlib.axes import Axes + from shapely.geometry.base import BaseGeometry from atlite._types import ( CrsLike, - DataArray, - Dataset, - GeoDataFrame, - Geometry, - GeoSeries, NDArray, PathLike, ) @@ -58,7 +54,7 @@ def get_coords( dy: float = 0.25, dt: str = "h", **kwargs: Any, -) -> Dataset: +) -> xr.Dataset: """ Create cutout coordinates from slices and resolutions. @@ -94,7 +90,7 @@ def get_coords( }) ds = ds.assign_coords(lon=ds.coords["x"], lat=ds.coords["y"]) ds = ds.sel(x=x, y=y, time=time) - return cast("Dataset", ds) + return cast("xr.Dataset", ds) def spdiag(v: NDArray | Sequence[float]) -> sp.sparse.csr_matrix: @@ -117,10 +113,10 @@ def spdiag(v: NDArray | Sequence[float]) -> sp.sparse.csr_matrix: def reproject_shapes( - shapes: Iterable[Geometry] | pd.Series | dict[Any, Geometry], + shapes: Iterable[BaseGeometry] | pd.Series | dict[Any, BaseGeometry], crs1: CrsLike, crs2: CrsLike, -) -> Iterable[Geometry] | pd.Series | OrderedDict[Any, Geometry]: +) -> Iterable[BaseGeometry] | pd.Series | OrderedDict[Any, BaseGeometry]: """ Reproject a collection of geometries. @@ -140,7 +136,7 @@ def reproject_shapes( """ transformer = Transformer.from_crs(crs1, crs2, always_xy=True) - def _reproject_shape(shape: Geometry) -> Geometry: + def _reproject_shape(shape: BaseGeometry) -> BaseGeometry: return transform(transformer.transform, shape) if isinstance(shapes, pd.Series): @@ -151,8 +147,8 @@ def _reproject_shape(shape: Geometry) -> Geometry: def compute_indicatormatrix( - orig: GeoDataFrame | GeoSeries | Iterable[Geometry], - dest: GeoDataFrame | GeoSeries | Iterable[Geometry], + orig: gpd.GeoDataFrame | gpd.GeoSeries | Iterable[BaseGeometry], + dest: gpd.GeoDataFrame | gpd.GeoSeries | Iterable[BaseGeometry], orig_crs: CrsLike = 4326, dest_crs: CrsLike = 4326, ) -> sp.sparse.lil_matrix: @@ -212,8 +208,8 @@ def compute_indicatormatrix( def compute_intersectionmatrix( - orig: GeoDataFrame | GeoSeries | Iterable[Geometry], - dest: GeoDataFrame | GeoSeries | Iterable[Geometry], + orig: gpd.GeoDataFrame | gpd.GeoSeries | Iterable[BaseGeometry], + dest: gpd.GeoDataFrame | gpd.GeoSeries | Iterable[BaseGeometry], orig_crs: CrsLike = 4326, dest_crs: CrsLike = 4326, ) -> sp.sparse.lil_matrix: @@ -292,10 +288,10 @@ def padded_transform_and_shape( def projected_mask( raster: rio.DatasetReader, - geom: GeoSeries, + geom: gpd.GeoSeries, transform: rio.Affine | None = None, shape: tuple[int, int] | None = None, - crs: CrsLike = None, + crs: CrsLike | None = None, allow_no_overlap: bool = False, **kwargs: Any, ) -> tuple[NDArray, rio.Affine]: @@ -408,7 +404,7 @@ def pad_extent( def shape_availability( - geometry: GeoSeries, excluder: ExclusionContainer + geometry: gpd.GeoSeries, excluder: ExclusionContainer ) -> tuple[NDArray, rio.Affine]: """ Compute the eligible area in one or more geometries. @@ -475,7 +471,7 @@ def shape_availability( def shape_availability_reprojected( - geometry: GeoSeries, + geometry: gpd.GeoSeries, excluder: ExclusionContainer, dst_transform: rio.Affine, dst_crs: CrsLike, @@ -553,7 +549,7 @@ def add_raster( invert: bool = False, nodata: int = 255, allow_no_overlap: bool = False, - crs: CrsLike = None, + crs: CrsLike | None = None, ) -> None: """ Register a raster to the ExclusionContainer. @@ -598,7 +594,7 @@ def add_raster( def add_geometry( self, - geometry: PathLike | GeoDataFrame | GeoSeries, + geometry: PathLike | gpd.GeoDataFrame | gpd.GeoSeries, buffer: float = 0, invert: bool = False, ) -> None: @@ -689,9 +685,9 @@ def __repr__(self) -> str: def compute_shape_availability( self, - geometry: GeoDataFrame | GeoSeries, + geometry: gpd.GeoDataFrame | gpd.GeoSeries, dst_transform: rio.Affine | None = None, - dst_crs: CrsLike = None, + dst_crs: CrsLike | None = None, dst_shape: tuple[int, int] | None = None, ) -> tuple[NDArray, rio.Affine]: """ @@ -752,11 +748,11 @@ def compute_shape_availability( def plot_shape_availability( self, - geometry: GeoDataFrame | GeoSeries, + geometry: gpd.GeoDataFrame | gpd.GeoSeries, ax: Axes | None = None, set_title: bool = True, dst_transform: rio.Affine | None = None, - dst_crs: CrsLike = None, + dst_crs: CrsLike | None = None, dst_shape: tuple[int, int] | None = None, show_kwargs: dict[str, Any] | None = None, plot_kwargs: dict[str, Any] | None = None, @@ -830,7 +826,7 @@ def plot_shape_availability( return ax -_mp_shapes: GeoSeries +_mp_shapes: gpd.GeoSeries _mp_excluder: ExclusionContainer _mp_dst_transform: rio.Affine _mp_dst_crs: CrsLike @@ -838,7 +834,7 @@ def plot_shape_availability( def _init_process( - shapes_: GeoSeries, + shapes_: gpd.GeoSeries, excluder_: ExclusionContainer, dst_transform_: rio.Affine, dst_crs_: CrsLike, @@ -862,11 +858,11 @@ def _process_func(i: Any) -> NDArray: def compute_availabilitymatrix( cutout: Any, - shapes: GeoDataFrame | GeoSeries, + shapes: gpd.GeoDataFrame | gpd.GeoSeries, excluder: ExclusionContainer, nprocesses: int | None = None, disable_progressbar: bool = True, -) -> DataArray: +) -> xr.DataArray: """ Compute the eligible share within cutout cells in the overlap with shapes. @@ -955,8 +951,8 @@ def compute_availabilitymatrix( def maybe_swap_spatial_dims( - ds: Dataset | DataArray, namex: str = "x", namey: str = "y" -) -> Dataset | DataArray: + ds: xr.Dataset | xr.DataArray, namex: str = "x", namey: str = "y" +) -> xr.Dataset | xr.DataArray: """ Ensure spatial coordinates follow atlite's axis ordering. @@ -997,11 +993,11 @@ def _as_transform(x: pd.Index, y: pd.Index) -> rio.Affine: def regrid( - ds: Dataset | DataArray, + ds: xr.Dataset | xr.DataArray, dimx: pd.Index, dimy: pd.Index, **kwargs: Any, -) -> Dataset | DataArray: +) -> xr.Dataset | xr.DataArray: """ Reproject data to a new spatial grid. @@ -1058,7 +1054,7 @@ def _reproject(src: NDArray, **kwargs: Any) -> NDArray: assert len(dtypes) == 1, "regrid can only reproject datasets with homogeneous dtype" return cast( - "Dataset | DataArray", + "xr.Dataset | xr.DataArray", ( xr .apply_ufunc( diff --git a/atlite/pv/irradiation.py b/atlite/pv/irradiation.py index 52941d39..98337b35 100644 --- a/atlite/pv/irradiation.py +++ b/atlite/pv/irradiation.py @@ -13,10 +13,10 @@ from dask.array import cos, fmax, fmin, radians, sin, sqrt if TYPE_CHECKING: + import xarray as xr + from atlite._types import ( ClearskyModel, - DataArray, - Dataset, IrradiationType, TrackingType, TrigonModel, @@ -26,14 +26,19 @@ def DiffuseHorizontalIrrad( - ds: Dataset, - solar_position: Dataset, + ds: xr.Dataset, + solar_position: xr.Dataset, clearsky_model: ClearskyModel | None, - influx: DataArray, -) -> DataArray: + influx: xr.DataArray, +) -> xr.DataArray: """ Estimate diffuse horizontal irradiation from total horizontal irradiation. + Clearsky model from Reindl 1990 to split downward radiation into direct + and diffuse contributions. Should switch to more up-to-date model, e.g. + Ridley et al. (2010) http://dx.doi.org/10.1016/j.renene.2009.07.018 , + Lauret et al. (2013): http://dx.doi.org/10.1016/j.renene.2012.01.049 + Parameters ---------- ds : xarray.Dataset @@ -65,7 +70,8 @@ def DiffuseHorizontalIrrad( "enhanced" if "temperature" in ds and "humidity" in ds else "simple" ) - k = influx / influx_toa + # Reindl 1990 clearsky model + k = influx / influx_toa # clearsky index if clearsky_model == "simple": fraction = ( @@ -103,14 +109,14 @@ def DiffuseHorizontalIrrad( def TiltedDiffuseIrrad( - ds: Dataset, - solar_position: Dataset, - surface_orientation: Dataset, - direct: DataArray, - diffuse: DataArray, -) -> DataArray: + ds: xr.Dataset, + solar_position: xr.Dataset, + surface_orientation: xr.Dataset, + direct: xr.DataArray, + diffuse: xr.DataArray, +) -> xr.DataArray: """ - Calculate diffuse irradiation on a tilted surface. + Calculate diffuse irradiation on a tilted surface (Hay-Davies model). Parameters ---------- @@ -139,8 +145,8 @@ def TiltedDiffuseIrrad( influx = direct + diffuse with np.errstate(divide="ignore", invalid="ignore"): - f = sqrt(direct / influx).fillna(0.0) - A = direct / influx_toa + f = sqrt(direct / influx).fillna(0.0) # brightening factor + A = direct / influx_toa # anisotropy factor R_b = cosincidence / sinaltitude @@ -164,8 +170,8 @@ def TiltedDiffuseIrrad( def TiltedDirectIrrad( - solar_position: Dataset, surface_orientation: Dataset, direct: DataArray -) -> DataArray: + solar_position: xr.Dataset, surface_orientation: xr.Dataset, direct: xr.DataArray +) -> xr.DataArray: """ Calculate direct irradiation on a tilted surface. @@ -191,7 +197,7 @@ def TiltedDirectIrrad( return (R_b * direct).rename("direct tilted") -def _albedo(ds: Dataset, influx: DataArray) -> DataArray: +def _albedo(ds: xr.Dataset, influx: xr.DataArray) -> xr.DataArray: """ Retrieve or derive surface albedo from the dataset. @@ -223,11 +229,11 @@ def _albedo(ds: Dataset, influx: DataArray) -> DataArray: def TiltedGroundIrrad( - ds: Dataset, - solar_position: Dataset, - surface_orientation: Dataset, - influx: DataArray, -) -> DataArray: + ds: xr.Dataset, + solar_position: xr.Dataset, + surface_orientation: xr.Dataset, + influx: xr.DataArray, +) -> xr.DataArray: """ Calculate ground-reflected irradiation on a tilted surface. @@ -253,15 +259,15 @@ def TiltedGroundIrrad( def TiltedIrradiation( - ds: Dataset, - solar_position: Dataset, - surface_orientation: Dataset, + ds: xr.Dataset, + solar_position: xr.Dataset, + surface_orientation: xr.Dataset, trigon_model: TrigonModel, clearsky_model: ClearskyModel | None, - tracking: TrackingType | int = 0, + tracking: TrackingType | int | None = 0, altitude_threshold: float = 1.0, irradiation: IrradiationType = "total", -) -> DataArray: +) -> xr.DataArray: """ Calculate the irradiation on a tilted surface. @@ -311,7 +317,7 @@ def TiltedIrradiation( """ influx_toa = ds["influx_toa"] - def clip(influx: DataArray, influx_max: DataArray) -> DataArray: + def clip(influx: xr.DataArray, influx_max: xr.DataArray) -> xr.DataArray: return influx.clip(min=0, max=influx_max.transpose(*influx.dims).data) if "influx" in ds: diff --git a/atlite/pv/orientation.py b/atlite/pv/orientation.py index 27e645d9..c8fd146b 100644 --- a/atlite/pv/orientation.py +++ b/atlite/pv/orientation.py @@ -17,12 +17,12 @@ if TYPE_CHECKING: from collections.abc import Callable - from atlite._types import Dataset, NumericArray, OrientationName, TrackingType + from atlite._types import NumericArray, OrientationName, TrackingType def get_orientation( name: OrientationName | dict[str, Any], **params: Any -) -> Callable[[NumericArray, NumericArray, Dataset], dict[str, NumericArray]]: +) -> Callable[[NumericArray, NumericArray, xr.Dataset], dict[str, NumericArray]]: """ Return an orientation factory by name. @@ -41,14 +41,14 @@ def get_orientation( if isinstance(name, dict): params = name name = params.pop("name", "constant") - result: Callable[[NumericArray, NumericArray, Dataset], dict[str, NumericArray]] = ( - getattr(sys.modules[__name__], f"make_{name}")(**params) - ) + result: Callable[ + [NumericArray, NumericArray, xr.Dataset], dict[str, NumericArray] + ] = getattr(sys.modules[__name__], f"make_{name}")(**params) return result def make_latitude_optimal() -> Callable[ - [NumericArray, NumericArray, Dataset], dict[str, xr.DataArray] + [NumericArray, NumericArray, xr.Dataset], dict[str, xr.DataArray] ]: """ Return an optimal tilt angle assuming the panel faces the equator. @@ -72,7 +72,7 @@ def make_latitude_optimal() -> Callable[ """ def latitude_optimal( - lon: NumericArray, lat: NumericArray, solar_position: Dataset + lon: NumericArray, lat: NumericArray, solar_position: xr.Dataset ) -> dict[str, xr.DataArray]: """ Build an orientation with latitude-dependent optimal tilt. @@ -113,7 +113,7 @@ def latitude_optimal( def make_constant( slope: float, azimuth: float -) -> Callable[[NumericArray, NumericArray, Dataset], dict[str, NumericArray]]: +) -> Callable[[NumericArray, NumericArray, xr.Dataset], dict[str, NumericArray]]: """ Create an orientation function with constant slope and azimuth. @@ -133,7 +133,7 @@ def make_constant( azimuth_rad = radians(azimuth) def constant( - lon: NumericArray, lat: NumericArray, solar_position: Dataset + lon: NumericArray, lat: NumericArray, solar_position: xr.Dataset ) -> dict[str, NumericArray]: """ Return the configured constant panel orientation. @@ -159,7 +159,7 @@ def constant( def make_latitude( azimuth: float = 180, -) -> Callable[[NumericArray, NumericArray, Dataset], dict[str, NumericArray]]: +) -> Callable[[NumericArray, NumericArray, xr.Dataset], dict[str, NumericArray]]: """ Create an orientation function with slope equal to latitude. @@ -176,7 +176,7 @@ def make_latitude( azimuth_rad = radians(azimuth) def latitude( - lon: NumericArray, lat: NumericArray, solar_position: Dataset + lon: NumericArray, lat: NumericArray, solar_position: xr.Dataset ) -> dict[str, NumericArray]: """ Return an orientation with slope equal to latitude. @@ -201,13 +201,13 @@ def latitude( def SurfaceOrientation( - ds: Dataset, - solar_position: Dataset, + ds: xr.Dataset, + solar_position: xr.Dataset, orientation: Callable[ - [NumericArray, NumericArray, Dataset], dict[str, NumericArray] + [NumericArray, NumericArray, xr.Dataset], dict[str, NumericArray] ], - tracking: TrackingType = None, -) -> Dataset: + tracking: TrackingType | None = None, +) -> xr.Dataset: """ Compute cos(incidence) for slope and panel azimuth. diff --git a/atlite/pv/solar_panel_model.py b/atlite/pv/solar_panel_model.py index 2fc02004..3d044a99 100644 --- a/atlite/pv/solar_panel_model.py +++ b/atlite/pv/solar_panel_model.py @@ -13,12 +13,10 @@ if TYPE_CHECKING: import xarray as xr - from atlite._types import DataArray - def _power_huld( - irradiance: DataArray, t_amb: DataArray, pc: dict[str, Any] -) -> DataArray: + irradiance: xr.DataArray, t_amb: xr.DataArray, pc: dict[str, Any] +) -> xr.DataArray: """ AC power per capacity predicted by Huld model, based on W/m2 irradiance. @@ -57,8 +55,8 @@ def _power_huld( def _power_bofinger( - irradiance: DataArray, t_amb: DataArray, pc: dict[str, Any] -) -> DataArray: + irradiance: xr.DataArray, t_amb: xr.DataArray, pc: dict[str, Any] +) -> xr.DataArray: """ Predict AC power per capacity using the Bofinger model. @@ -93,8 +91,8 @@ def _power_bofinger( def SolarPanelModel( - ds: xr.Dataset, irradiance: DataArray, pc: dict[str, Any] -) -> DataArray: + ds: xr.Dataset, irradiance: xr.DataArray, pc: dict[str, Any] +) -> xr.DataArray: """ Compute PV power output for the selected panel model. @@ -102,14 +100,14 @@ def SolarPanelModel( ---------- ds : xarray.Dataset Dataset containing ambient temperature. - irradiance : xarray.DataArray + irradiance : xarray.xr.DataArray Plane-of-array irradiation. pc : dict Panel configuration including the model parameters. Returns ------- - xarray.DataArray + xarray.xr.DataArray Specific PV power output. Raises diff --git a/atlite/pv/solar_position.py b/atlite/pv/solar_position.py index 30bf3813..da608807 100644 --- a/atlite/pv/solar_position.py +++ b/atlite/pv/solar_position.py @@ -6,7 +6,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING from warnings import warn import pandas as pd @@ -14,11 +13,8 @@ from dask.array import arccos, arcsin, arctan2, cos, radians, sin from numpy import pi -if TYPE_CHECKING: - from atlite._types import Dataset - -def SolarPosition(ds: Dataset, time_shift: str | pd.Timedelta = "0H") -> Dataset: +def SolarPosition(ds: xr.Dataset, time_shift: str | pd.Timedelta = "0H") -> xr.Dataset: """ Compute solar azimuth and altitude. diff --git a/atlite/resource.py b/atlite/resource.py index 3177d9ed..2af36de9 100644 --- a/atlite/resource.py +++ b/atlite/resource.py @@ -31,9 +31,10 @@ if TYPE_CHECKING: from typing import TypedDict + import xarray as xr from typing_extensions import NotRequired - from atlite._types import DataArray, NDArray, PathLike + from atlite._types import NDArray, PathLike class TurbineConfig(TypedDict): """Wind turbine configuration dictionary.""" @@ -60,7 +61,7 @@ class PanelConfig(TypedDict): class CSPConfig(TypedDict): """CSP installation configuration dictionary.""" - efficiency: DataArray + efficiency: xr.DataArray path: PathLike technology: NotRequired[str] name: NotRequired[str] diff --git a/atlite/wind.py b/atlite/wind.py index c2e95e1d..a7b28645 100644 --- a/atlite/wind.py +++ b/atlite/wind.py @@ -12,17 +12,19 @@ import numpy as np if TYPE_CHECKING: - from atlite._types import DataArray, Dataset, NDArray + import xarray as xr + + from atlite._types import NDArray logger = logging.getLogger(__name__) def extrapolate_wind_speed( - ds: Dataset, + ds: xr.Dataset, to_height: int | float, from_height: int | None = None, method: Literal["logarithmic", "power"] = "logarithmic", -) -> DataArray: +) -> xr.DataArray: """ Extrapolate the wind speed from a given height above ground to another. @@ -90,19 +92,19 @@ def extrapolate_wind_speed( if method == "logarithmic": try: - roughness: DataArray = ds["roughness"] + roughness: xr.DataArray = ds["roughness"] except KeyError: raise RuntimeError( "The logarithmic interpolation method requires surface roughness (roughness);\n" "make sure you choose a compatible dataset like ERA5" ) from None - wnd_spd: DataArray = ds[from_name] * ( + wnd_spd: xr.DataArray = ds[from_name] * ( np.log(to_height / roughness) / np.log(from_height / roughness) ) method_desc: str = "logarithmic method with roughness" elif method == "power": try: - wnd_shear_exp: DataArray = ds["wnd_shear_exp"] + wnd_shear_exp: xr.DataArray = ds["wnd_shear_exp"] except KeyError: raise RuntimeError( "The power law interpolation method requires a wind shear exponent (wnd_shear_exp);\n" @@ -123,4 +125,4 @@ def extrapolate_wind_speed( "units": "m s**-1", }) - return cast("DataArray", wnd_spd.rename(to_name)) + return cast("xr.DataArray", wnd_spd.rename(to_name)) diff --git a/pyproject.toml b/pyproject.toml index 5bfa288b..b93ac850 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -106,8 +106,9 @@ select = [ ] ignore = [ - 'E501', # line too long - 'E741', # ambiguous variable names + 'E501', # line too long + 'E741', # ambiguous variable names + 'PTH118', # os.path.join -> Path (keep os.path.join for template strings) ] [tool.ruff.lint.per-file-ignores] From ab6ea3a008c84ea75aa8f745804cbd8aceda121b Mon Sep 17 00:00:00 2001 From: Fabian Date: Mon, 20 Apr 2026 11:04:54 +0200 Subject: [PATCH 22/27] Fail loudly in cordex/ncep when path env var is missing --- atlite/datasets/cordex.py | 208 ++++++++++++++------------------------ atlite/datasets/ncep.py | 137 +++++++++---------------- 2 files changed, 127 insertions(+), 218 deletions(-) diff --git a/atlite/datasets/cordex.py b/atlite/datasets/cordex.py index 9a594fe1..9ce32cbf 100644 --- a/atlite/datasets/cordex.py +++ b/atlite/datasets/cordex.py @@ -309,136 +309,82 @@ def tasks_yearly_cordex( ] -weather_data_config: dict[str, dict[str, Any]] = {} -try: - from atlite import config # type: ignore[attr-defined] - - weather_data_config = { - "influx": { - "tasks_func": tasks_yearly_cordex, - "prepare_func": prepare_data_cordex, - "oldname": "rsds", - "newname": "influx", - "template": os.path.join( - config.cordex_dir, - "{model}", - "influx", - "rsds_*_{year}*.nc", - ), - }, - "outflux": { - "tasks_func": tasks_yearly_cordex, - "prepare_func": prepare_data_cordex, - "oldname": "rsus", - "newname": "outflux", - "template": os.path.join( - config.cordex_dir, - "{model}", - "outflux", - "rsus_*_{year}*.nc", - ), - }, - "temperature": { - "tasks_func": tasks_yearly_cordex, - "prepare_func": prepare_data_cordex, - "oldname": "tas", - "newname": "temperature", - "template": os.path.join( - config.cordex_dir, - "{model}", - "temperature", - "tas_*_{year}*.nc", - ), - }, - "humidity": { - "tasks_func": tasks_yearly_cordex, - "prepare_func": prepare_data_cordex, - "oldname": "hurs", - "newname": "humidity", - "template": os.path.join( - config.cordex_dir, - "{model}", - "humidity", - "hurs_*_{year}*.nc", - ), - }, - "wnd10m": { - "tasks_func": tasks_yearly_cordex, - "prepare_func": prepare_data_cordex, - "oldname": "sfcWind", - "newname": "wnd10m", - "template": os.path.join( - config.cordex_dir, - "{model}", - "wind", - "sfcWind_*_{year}*.nc", - ), - }, - "roughness": { - "tasks_func": tasks_yearly_cordex, - "prepare_func": prepare_static_data_cordex, - "oldname": "rlst", - "newname": "roughness", - "template": os.path.join( - config.cordex_dir, - "{model}", - "roughness", - "rlst_*.nc", - ), - }, - "runoff": { - "tasks_func": tasks_yearly_cordex, - "prepare_func": prepare_data_cordex, - "oldname": "mrro", - "newname": "runoff", - "template": os.path.join( - config.cordex_dir, - "{model}", - "runoff", - "mrro_*_{year}*.nc", - ), - }, - "height": { - "tasks_func": tasks_yearly_cordex, - "prepare_func": prepare_static_data_cordex, - "oldname": "orog", - "newname": "height", - "template": os.path.join( - config.cordex_dir, - "{model}", - "altitude", - "orog_*.nc", - ), - }, - "CWT": { - "tasks_func": tasks_yearly_cordex, - "prepare_func": prepare_weather_types_cordex, - "oldname": "CWT", - "newname": "CWT", - "template": os.path.join( - config.cordex_dir, - "{model}", - "weather_types", - "CWT_*_{year}*.nc", - ), - }, - } -except ImportError: - pass - -meta_data_config: dict[str, Any] = {} -try: - from atlite import config # type: ignore[attr-defined] - - meta_data_config = { - "prepare_func": prepare_meta_cordex, +cordex_dir = os.environ["ATLITE_CORDEX_DIR"] + +weather_data_config: dict[str, dict[str, Any]] = { + "influx": { + "tasks_func": tasks_yearly_cordex, + "prepare_func": prepare_data_cordex, + "oldname": "rsds", + "newname": "influx", + "template": os.path.join(cordex_dir, "{model}", "influx", "rsds_*_{year}*.nc"), + }, + "outflux": { + "tasks_func": tasks_yearly_cordex, + "prepare_func": prepare_data_cordex, + "oldname": "rsus", + "newname": "outflux", + "template": os.path.join(cordex_dir, "{model}", "outflux", "rsus_*_{year}*.nc"), + }, + "temperature": { + "tasks_func": tasks_yearly_cordex, + "prepare_func": prepare_data_cordex, + "oldname": "tas", + "newname": "temperature", "template": os.path.join( - config.cordex_dir, - "{model}", - "temperature", - "tas_*_{year}*.nc", + cordex_dir, "{model}", "temperature", "tas_*_{year}*.nc" ), - "height_config": weather_data_config["height"], - } -except (ImportError, KeyError): - pass + }, + "humidity": { + "tasks_func": tasks_yearly_cordex, + "prepare_func": prepare_data_cordex, + "oldname": "hurs", + "newname": "humidity", + "template": os.path.join( + cordex_dir, "{model}", "humidity", "hurs_*_{year}*.nc" + ), + }, + "wnd10m": { + "tasks_func": tasks_yearly_cordex, + "prepare_func": prepare_data_cordex, + "oldname": "sfcWind", + "newname": "wnd10m", + "template": os.path.join(cordex_dir, "{model}", "wind", "sfcWind_*_{year}*.nc"), + }, + "roughness": { + "tasks_func": tasks_yearly_cordex, + "prepare_func": prepare_static_data_cordex, + "oldname": "rlst", + "newname": "roughness", + "template": os.path.join(cordex_dir, "{model}", "roughness", "rlst_*.nc"), + }, + "runoff": { + "tasks_func": tasks_yearly_cordex, + "prepare_func": prepare_data_cordex, + "oldname": "mrro", + "newname": "runoff", + "template": os.path.join(cordex_dir, "{model}", "runoff", "mrro_*_{year}*.nc"), + }, + "height": { + "tasks_func": tasks_yearly_cordex, + "prepare_func": prepare_static_data_cordex, + "oldname": "orog", + "newname": "height", + "template": os.path.join(cordex_dir, "{model}", "altitude", "orog_*.nc"), + }, + "CWT": { + "tasks_func": tasks_yearly_cordex, + "prepare_func": prepare_weather_types_cordex, + "oldname": "CWT", + "newname": "CWT", + "template": os.path.join( + cordex_dir, "{model}", "weather_types", "CWT_*_{year}*.nc" + ), + }, +} + +meta_data_config: dict[str, Any] = { + "prepare_func": prepare_meta_cordex, + "template": os.path.join(cordex_dir, "{model}", "temperature", "tas_*_{year}*.nc"), + "height_config": weather_data_config["height"], +} diff --git a/atlite/datasets/ncep.py b/atlite/datasets/ncep.py index 5835a9ba..8fa73983 100644 --- a/atlite/datasets/ncep.py +++ b/atlite/datasets/ncep.py @@ -618,90 +618,53 @@ def tasks_height_ncep( ] -weather_data_config: dict[str, dict[str, Any]] = {} -try: - from atlite import config # type: ignore[attr-defined] - - weather_data_config = { - "influx": { - "tasks_func": tasks_monthly_ncep, - "prepare_func": prepare_influx_ncep, - "template": os.path.join( - config.ncep_dir, - "{year}{month:0>2}/dswsfc.*.grb2", - ), - }, - "outflux": { - "tasks_func": tasks_monthly_ncep, - "prepare_func": prepare_outflux_ncep, - "template": os.path.join( - config.ncep_dir, - "{year}{month:0>2}/uswsfc.*.grb2", - ), - }, - "temperature": { - "tasks_func": tasks_monthly_ncep, - "prepare_func": prepare_temperature_ncep, - "template": os.path.join( - config.ncep_dir, - "{year}{month:0>2}/tmp2m.*.grb2", - ), - }, - "soil temperature": { - "tasks_func": tasks_monthly_ncep, - "prepare_func": prepare_soil_temperature_ncep, - "template": os.path.join( - config.ncep_dir, - "{year}{month:0>2}/soilt1.*.grb2", - ), - }, - "wnd10m": { - "tasks_func": tasks_monthly_ncep, - "prepare_func": prepare_wnd10m_ncep, - "template": os.path.join( - config.ncep_dir, - "{year}{month:0>2}/wnd10m.*.grb2", - ), - }, - "runoff": { - "tasks_func": tasks_monthly_ncep, - "prepare_func": prepare_runoff_ncep, - "template": os.path.join( - config.ncep_dir, - "{year}{month:0>2}/runoff.*.grb2", - ), - }, - "roughness": { - "tasks_func": tasks_monthly_ncep, - "prepare_func": prepare_roughness_ncep, - "template": os.path.join( - config.ncep_dir, - "{year}{month:0>2}/flxf.gdas.*.grb2", - ), - }, - "height": { - "tasks_func": tasks_height_ncep, - "prepare_func": prepare_height_ncep, - "template": os.path.join( - config.ncep_dir, - "height/cdas1.20130101.splgrbanl.grb2", - ), - }, - } -except ImportError: - pass - -meta_data_config: dict[str, Any] = {} -try: - from atlite import config # type: ignore[attr-defined] - - meta_data_config = { - "prepare_func": prepare_meta_ncep, - "template": os.path.join( - config.ncep_dir, - "{year}{month:0>2}/tmp2m.*.grb2", - ), - "height_config": weather_data_config["height"], - } -except (ImportError, KeyError): - pass +ncep_dir = os.environ["ATLITE_NCEP_DIR"] + +weather_data_config: dict[str, dict[str, Any]] = { + "influx": { + "tasks_func": tasks_monthly_ncep, + "prepare_func": prepare_influx_ncep, + "template": os.path.join(ncep_dir, "{year}{month:0>2}/dswsfc.*.grb2"), + }, + "outflux": { + "tasks_func": tasks_monthly_ncep, + "prepare_func": prepare_outflux_ncep, + "template": os.path.join(ncep_dir, "{year}{month:0>2}/uswsfc.*.grb2"), + }, + "temperature": { + "tasks_func": tasks_monthly_ncep, + "prepare_func": prepare_temperature_ncep, + "template": os.path.join(ncep_dir, "{year}{month:0>2}/tmp2m.*.grb2"), + }, + "soil temperature": { + "tasks_func": tasks_monthly_ncep, + "prepare_func": prepare_soil_temperature_ncep, + "template": os.path.join(ncep_dir, "{year}{month:0>2}/soilt1.*.grb2"), + }, + "wnd10m": { + "tasks_func": tasks_monthly_ncep, + "prepare_func": prepare_wnd10m_ncep, + "template": os.path.join(ncep_dir, "{year}{month:0>2}/wnd10m.*.grb2"), + }, + "runoff": { + "tasks_func": tasks_monthly_ncep, + "prepare_func": prepare_runoff_ncep, + "template": os.path.join(ncep_dir, "{year}{month:0>2}/runoff.*.grb2"), + }, + "roughness": { + "tasks_func": tasks_monthly_ncep, + "prepare_func": prepare_roughness_ncep, + "template": os.path.join(ncep_dir, "{year}{month:0>2}/flxf.gdas.*.grb2"), + }, + "height": { + "tasks_func": tasks_height_ncep, + "prepare_func": prepare_height_ncep, + "template": os.path.join(ncep_dir, "height/cdas1.20130101.splgrbanl.grb2"), + }, +} + +meta_data_config: dict[str, Any] = { + "prepare_func": prepare_meta_ncep, + "template": os.path.join(ncep_dir, "{year}{month:0>2}/tmp2m.*.grb2"), + "height_config": weather_data_config["height"], +} From 161c81d49da96a548db0e6fdebc71e4b1a95e481 Mon Sep 17 00:00:00 2001 From: Fabian Date: Mon, 20 Apr 2026 16:05:04 +0200 Subject: [PATCH 23/27] Restore non-obvious comments, citations, and docstring context --- atlite/convert.py | 11 +++++++++++ atlite/csp.py | 5 +++-- atlite/datasets/era5.py | 7 ++++++- atlite/datasets/gebco.py | 2 +- atlite/datasets/ncep.py | 25 +++++++++++++++++++++++-- atlite/gis.py | 24 ++++++++++++++++++++---- atlite/pv/irradiation.py | 6 ++++++ atlite/pv/orientation.py | 15 +++++++++++++-- atlite/pv/solar_panel_model.py | 7 ++++++- atlite/pv/solar_position.py | 7 +++++++ atlite/resource.py | 12 +++++++++++- 11 files changed, 107 insertions(+), 14 deletions(-) diff --git a/atlite/convert.py b/atlite/convert.py index b0f927c8..b6e5dd30 100644 --- a/atlite/convert.py +++ b/atlite/convert.py @@ -1666,6 +1666,17 @@ def convert_line_rating( """ Convert weather data to dynamic line rating time series. + The formulation is based on: + [1] "IEEE Std 738™-2012 (Revision of IEEE Std 738-2006/Incorporates IEEE Std + 738-2012/Cor 1-2013), IEEE Standard for Calculating the Current-Temperature + Relationship of Bare Overhead Conductors," p. 72. + + Simplifications: + 1. Wind speed is taken at 100 m above ground, whereas transmission lines are + typically at 50-60 m. + 2. Solar heat influx is set proportional to solar short wave influx. + 3. Incidence angle of the solar heat influx is assumed to be 90°. + Parameters ---------- ds : xr.Dataset diff --git a/atlite/csp.py b/atlite/csp.py index bfd678d1..4c24a8f4 100644 --- a/atlite/csp.py +++ b/atlite/csp.py @@ -47,8 +47,9 @@ def calculate_dni( altitude_threshold : float Threshold for solar altitude in degrees. Values in range (0, altitude_threshold] are set to altitude_threshold to prevent numerical issues when dividing by - the sine of very low solar altitude. Default: 3.75 degrees corresponds to - approximately 15 minutes of solar movement at 60 deg maximum altitude. + the sine of very low solar altitude (sunset / dawn). Default: 3.75 degrees + corresponds to the solar altitude traversed by the sun within about 15 minutes + in a location with 60 deg maximum solar altitude and 10 h day time. Returns ------- diff --git a/atlite/datasets/era5.py b/atlite/datasets/era5.py index 1cc76ebd..38f5987e 100644 --- a/atlite/datasets/era5.py +++ b/atlite/datasets/era5.py @@ -241,6 +241,7 @@ def get_data_influx(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: # are labelled as "14:00". Account by calculating the SolarPosition for the # center of the interval for aggregation. # See https://github.com/PyPSA/atlite/issues/158 + # Suppress DeprecationWarning from new SolarPosition calculation (#199) with warnings.catch_warnings(): warnings.simplefilter("ignore", DeprecationWarning) time_shift = pd.to_timedelta("-30 minutes") @@ -582,6 +583,9 @@ def retrieve_data( """ Download ERA5 data from the CDS API and return as an xarray Dataset. + The ongoing and past requests can be tracked at + https://cds-beta.climate.copernicus.eu/requests?tab=all. + Parameters ---------- product : str @@ -673,7 +677,8 @@ def get_data( lock : SerializableLock or None, optional Lock for thread-safe file creation. data_format : {{'grib', 'netcdf'}}, optional - Download format. Default 'grib'. + Download format. Default 'grib'; ``grib`` is recommended over + ``netcdf`` because the CDSAPI limits request size for the latter. monthly_requests : bool, optional If True, split API requests by month. Default False. concurrent_requests : bool, optional diff --git a/atlite/datasets/gebco.py b/atlite/datasets/gebco.py index 4d51dedd..017f2b05 100755 --- a/atlite/datasets/gebco.py +++ b/atlite/datasets/gebco.py @@ -57,7 +57,7 @@ def get_data_gebco_height( out_shape=(len(ys), len(xs)), resampling=Resampling.average, ) - gebco = gebco[::-1] + gebco = gebco[::-1] # change inversed y-axis tags = dataset.tags(bidx=1) tags = {k: to_numeric(v, errors="ignore") for k, v in tags.items()} diff --git a/atlite/datasets/ncep.py b/atlite/datasets/ncep.py index 8fa73983..0e53644e 100644 --- a/atlite/datasets/ncep.py +++ b/atlite/datasets/ncep.py @@ -58,6 +58,7 @@ def convert_lons_lats_ncep( ds = ds.sel(lat_0=ys) + # Lons should go from -180. to +180. if len(ds.coords["lon_0"].sel(lon_0=slice(xs.start + 360.0, xs.stop + 360.0))): ds = xr.concat( [ds.sel(lon_0=slice(xs.start + 360.0, xs.stop + 360.0)), ds.sel(lon_0=xs)], @@ -105,7 +106,14 @@ def convert_time_hourly_ncep(ds: xr.Dataset, drop_time_vars: bool = True) -> xr. def convert_unaverage_ncep(ds: xr.Dataset) -> xr.Dataset: - """Convert running-average variables (``*_avg``) to instantaneous values. + r"""Convert running-average variables (``*_avg``) to instantaneous values. + + The fields ending in ``_avg`` are averages over the forecast window which + have to be un-averaged by + + .. math:: + \tilde x_1 = x_1, \quad + \tilde x_i = i \cdot x_i - (i - 1) \cdot x_{i-1} \quad \forall i > 1. Parameters ---------- @@ -135,7 +143,17 @@ def unaverage(da: xr.DataArray, dim: str = "forecast_time0") -> xr.DataArray: def convert_unaccumulate_ncep(ds: xr.Dataset) -> xr.Dataset: - """Convert accumulated variables (``*_acc``) to per-timestep values. + r"""Convert accumulated variables (``*_acc``) to per-timestep values. + + The fields ending in ``_acc`` are accumulated over the forecast_time and + have to be un-accumulated by + + .. math:: + \tilde x_1 = x_1, \quad + \tilde x_i = x_i - x_{i-1} \quad \forall\, 1 < i \leq 6. + + Source: + http://rda.ucar.edu/datasets/ds094.1/#docs/FAQs_hrly_timeseries.html Parameters ---------- @@ -255,6 +273,7 @@ def prepare_influx_ncep( ds = convert_time_hourly_ncep(ds) ds = ds.rename({"DSWRF_P8_L1_GGA0": "influx"}) + # clipping random fluctuations around zero ds = convert_clip_lower(ds, "influx", a_min=0.1, value=0.0) yield yearmonth, ds @@ -292,6 +311,7 @@ def prepare_outflux_ncep( ds = convert_time_hourly_ncep(ds) ds = ds.rename({"USWRF_P8_L1_GGA0": "outflux"}) + # clipping random fluctuations around zero ds = convert_clip_lower(ds, "outflux", a_min=3.0, value=0.0) yield yearmonth, ds @@ -395,6 +415,7 @@ def prepare_runoff_ncep( """ with xr.open_dataset(fn, engine=engine) as ds: ds = convert_lons_lats_ncep(ds, xs, ys) + # runoff has missing values: set nans to 0 ds = ds.fillna(0.0) ds = convert_unaccumulate_ncep(ds) ds = convert_time_hourly_ncep(ds) diff --git a/atlite/gis.py b/atlite/gis.py index dd31fe1b..9b282f83 100644 --- a/atlite/gis.py +++ b/atlite/gis.py @@ -359,7 +359,11 @@ def pad_extent( **kwargs: Any, ) -> tuple[NDArray, rio.Affine]: """ - Pad an array before reprojection. + Pad the extent of `src` by an equivalent of one cell of the target raster. + + This ensures that the array is large enough to not be treated as nodata in + all cells of the destination raster. If ``src.ndim > 2``, the function + expects the last two dimensions to be ``y, x``. Parameters ---------- @@ -478,7 +482,14 @@ def shape_availability_reprojected( dst_shape: tuple[int, int], ) -> tuple[NDArray, rio.Affine]: """ - Compute availability and reproject it to a target raster. + Compute and reproject the eligible area of one or more geometries. + + The function executes ``shape_availability`` and reprojects the calculated + mask onto a new raster defined by ``(dst_transform, dst_crs, dst_shape)``. + Before reprojecting, the function pads the mask such that all non-nodata + data points are projected in full cells of the target raster. This ensures + that all data within the mask are projected correctly (GDAL inherent + 'problem'). Parameters ---------- @@ -999,7 +1010,9 @@ def regrid( **kwargs: Any, ) -> xr.Dataset | xr.DataArray: """ - Reproject data to a new spatial grid. + Interpolate `ds` to a new spatial grid using rasterio's reproject. + + See also: https://mapbox.github.io/rasterio/topics/resampling.html Parameters ---------- @@ -1010,7 +1023,10 @@ def regrid( dimy : pandas.Index Target y coordinates. ``dimy.name`` must match the source y dimension. **kwargs - Keyword arguments passed to ``rasterio.warp.reproject``. + Keyword arguments passed to ``rasterio.warp.reproject``; of note: + ``resampling`` is one of + ``gis.Resampling.{average,cubic,bilinear,nearest}``; ``src_crs`` and + ``dst_crs`` define the source/target CRS (default: EPSG 4326, latlong). Returns ------- diff --git a/atlite/pv/irradiation.py b/atlite/pv/irradiation.py index 98337b35..b20d7e88 100644 --- a/atlite/pv/irradiation.py +++ b/atlite/pv/irradiation.py @@ -163,6 +163,8 @@ def TiltedDiffuseIrrad( ): logger.warning("diffuse_t exhibits negative values above altitude threshold.") + # fixup: clip all negative values (unclear why it gets negative) + # note: REatlas does not do the fixup with np.errstate(invalid="ignore"): diffuse_t = diffuse_t.clip(min=0).fillna(0) @@ -318,6 +320,7 @@ def TiltedIrradiation( influx_toa = ds["influx_toa"] def clip(influx: xr.DataArray, influx_max: xr.DataArray) -> xr.DataArray: + # use .data in clip due to dask-xarray incompatibilities return influx.clip(min=0, max=influx_max.transpose(*influx.dims).data) if "influx" in ds: @@ -369,6 +372,9 @@ def clip(influx: xr.DataArray, influx_max: xr.DataArray) -> xr.DataArray: msg = f"Unknown irradiation type: {irradiation}" raise ValueError(msg) + # The solar_position algorithms have a high error for small solar altitude + # values, leading to big overall errors from the 1/sinaltitude factor. + # => Suppress irradiation below altitude_threshold. cap_alt = solar_position["altitude"] < radians(altitude_threshold) result = result.where(~(cap_alt | (direct + diffuse <= 0.01)), 0) result.attrs["units"] = "W m**-2" diff --git a/atlite/pv/orientation.py b/atlite/pv/orientation.py index c8fd146b..e5fd2674 100644 --- a/atlite/pv/orientation.py +++ b/atlite/pv/orientation.py @@ -26,6 +26,10 @@ def get_orientation( """ Return an orientation factory by name. + Conventions: + - ``slope`` is the angle between ground and panel. + - ``azimuth`` is the clockwise angle from North (i.e. azimuth=180 faces South). + Parameters ---------- name : str or dict @@ -102,6 +106,7 @@ def latitude_optimal( ) + np.radians(0.31) slope[~below_50] = np.radians(40.0) + # South orientation for panels on northern hemisphere and vice versa azimuth = np.where(lat.values < 0, 0, pi) return { "slope": xr.DataArray(slope, coords=lat.coords), @@ -262,18 +267,21 @@ def SurfaceOrientation( surface_azimuth - sun_azimuth ) + cos(surface_slope) * sin(sun_altitude) - elif tracking == "horizontal": + elif tracking == "horizontal": # horizontal tracking with horizontal axis + # orientation_dict['azimuth'] refers here to the azimuth of the tracker axis axis_azimuth = orientation_dict["azimuth"] rotation = arctan( (cos(sun_altitude) / sin(sun_altitude)) * sin(sun_azimuth - axis_azimuth) ) surface_slope = abs(rotation) + # the 2nd part yields +/-1 and determines if the panel is facing east or west surface_azimuth = axis_azimuth + arcsin(sin(rotation) / sin(surface_slope)) cosincidence = cos(surface_slope) * sin(sun_altitude) + sin( surface_slope ) * cos(sun_altitude) * cos(sun_azimuth - surface_azimuth) - elif tracking == "tilted_horizontal": + elif tracking == "tilted_horizontal": # horizontal tracking with tilted axis + # orientation_dict['slope'] refers here to the tilt of the tracker axis axis_tilt = orientation_dict["slope"] rotation = arctan( @@ -324,6 +332,9 @@ def SurfaceOrientation( ) raise AssertionError(msg) + # fixup incidence angle: if the panel is badly oriented and the sun shines + # on the back of the panel (incidence > 90 deg), the irradiation would be + # negative instead of 0; this is prevented here. cosincidence = cosincidence.clip(min=0) return xr.Dataset({ diff --git a/atlite/pv/solar_panel_model.py b/atlite/pv/solar_panel_model.py index 3d044a99..312761e6 100644 --- a/atlite/pv/solar_panel_model.py +++ b/atlite/pv/solar_panel_model.py @@ -2,7 +2,12 @@ # # SPDX-License-Identifier: MIT -"""Solar panel electrical performance models.""" +"""Solar panel electrical performance models. + +The Huld model was copied from gsee -- global solar energy estimator +by Stefan Pfenninger. +https://github.com/renewables-ninja/gsee/blob/master/gsee/pv.py +""" from __future__ import annotations diff --git a/atlite/pv/solar_position.py b/atlite/pv/solar_position.py index da608807..fd991780 100644 --- a/atlite/pv/solar_position.py +++ b/atlite/pv/solar_position.py @@ -43,6 +43,9 @@ def SolarPosition(ds: xr.Dataset, time_shift: str | pd.Timedelta = "0H") -> xr.D [2] Sproul, A. B., Derivation of the solar geometric relationships using vector analysis, Renewable Energy, 32(7), 1187–1205 (2007). [3] Kalogirou, Solar Energy Engineering (2009). + + More accurate algorithms would be + --------------------------------- [4] I. Reda and A. Andreas, Solar position algorithm for solar radiation applications. Solar Energy, vol. 76, no. 5, pp. 577-589, 2004. [5] I. Reda and A. Andreas, Corrigendum to Solar position algorithm for @@ -50,6 +53,10 @@ def SolarPosition(ds: xr.Dataset, time_shift: str | pd.Timedelta = "0H") -> xr.D [6] Blanc, P., & Wald, L., The SG2 algorithm for a fast and accurate computation of the position of the sun for multi-decadal time period, Solar Energy, 86(10), 3072–3083 (2012). + + The unfortunately quite computationally intensive SPA algorithm [4,5] has + been implemented using numba or plain numpy for a single location at + https://github.com/pvlib/pvlib-python/blob/master/pvlib/spa.py. """ # Act like a getter if these return variables are already in ds rvs = { diff --git a/atlite/resource.py b/atlite/resource.py index 2af36de9..201adbe4 100644 --- a/atlite/resource.py +++ b/atlite/resource.py @@ -199,10 +199,11 @@ def get_cspinstallationconfig(installation: str | PathLike) -> CSPConfig: config = cast("dict[str, Any]", yaml.safe_load(f)) config["path"] = installation_path + # Convert efficiency dict to xr.DataArray and convert units: deg -> rad, % -> p.u. da = pd.DataFrame(config["efficiency"]).set_index(["altitude", "azimuth"]) - da = da.to_xarray()["value"] + # Solar altitude + azimuth expected in deg for readability; calculations use rad. da = da.rename({"azimuth": "azimuth [deg]", "altitude": "altitude [deg]"}) da = da.assign_coords({ "altitude": radians(da["altitude [deg]"]), @@ -212,6 +213,7 @@ def get_cspinstallationconfig(installation: str | PathLike) -> CSPConfig: da = da.chunk("auto") + # Efficiency unit from % to p.u. da /= 1.0e2 config["efficiency"] = da @@ -245,6 +247,8 @@ def solarpanel_rated_capacity_per_unit(panel: str | PathLike | PanelConfig) -> f if model == "huld": return cast("float", panel["efficiency"]) if model == "bofinger": + # one unit in the capacity layout is interpreted as one panel of a + # capacity (A + 1000 * B + log(1000) * C) * 1000 W/m^2 * (k / 1000) A, B, C = itemgetter("A", "B", "C")(panel) return cast("float", (A + B * 1000.0 + C * np.log(1000.0)) * 1e3) raise ValueError(f"Unknown panel model: {model}") @@ -315,12 +319,16 @@ def kernel(v_0: NDArray) -> NDArray: ) def smooth(velocities: NDArray, power: NDArray) -> tuple[NDArray, NDArray]: + # interpolate kernel and power curve to the same, regular velocity grid velocities_reg = np.linspace(-50.0, 50.0, 1001) power_reg = np.interp(velocities_reg, velocities, power) kernel_reg = kernel(velocities_reg) + # the 0.1 downscaling is necessary because scipy expects velocity + # increments of 1., but here they are 0.1 convolution = 0.1 * fftconvolve(power_reg, kernel_reg, mode="same") + # sample down so power curve doesn't get too long velocities_new = np.linspace(0.0, 35.0, 72) power_new = eta * np.interp(velocities_new, velocities_reg, convolution) @@ -394,6 +402,8 @@ def _validate_turbine_config_dict( err_msg = "turbine wind speed and power arrays do not have equal length." raise ValueError(err_msg) + # Uses `>=` rather than `>` because many power curves have two entries for the + # same wind speed at the cut-in and cut-out speeds. if not np.all(np.diff(turbine["V"]) >= 0): err_msg = ( "wind speed 'V' in the turbine config dict is expected to be increasing, " From 4f57261fc43890ccdbb6ad84004be1e4f2f5ce16 Mon Sep 17 00:00:00 2001 From: Fabian Date: Tue, 21 Apr 2026 09:31:35 +0200 Subject: [PATCH 24/27] Fix mypy errors in convert, cutout, era5 and orientation Tighten return types, narrow unions via assert/cast, swap np.where for xr.where to preserve DataArray typing, and restructure local rebinds to avoid reassignment conflicts. --- atlite/convert.py | 49 ++++++++++++++++++++-------------------- atlite/cutout.py | 2 +- atlite/datasets/era5.py | 14 +++++++----- atlite/pv/orientation.py | 28 +++++++++++------------ 4 files changed, 47 insertions(+), 46 deletions(-) diff --git a/atlite/convert.py b/atlite/convert.py index b6e5dd30..d2985b35 100644 --- a/atlite/convert.py +++ b/atlite/convert.py @@ -8,7 +8,6 @@ import datetime as dt import logging import warnings -from collections import namedtuple from operator import itemgetter from pathlib import Path from typing import TYPE_CHECKING, Any, Literal @@ -48,7 +47,6 @@ ClearskyModel, HeatPumpSource, IrradiationType, - NumericArray, OrientationName, TrackingType, TrigonModel, @@ -83,7 +81,7 @@ def convert_and_aggregate( show_progress: bool = False, dask_kwargs: dict[str, Any] | None = None, **convert_kwds: Any, -) -> Any: +) -> xr.DataArray | xr.Dataset: """ Convert and aggregate a weather-based renewable generation time-series. @@ -235,7 +233,7 @@ def convert_and_aggregate( ) if isinstance(matrix, xr.DataArray): - coords = matrix.indexes.get(matrix.dims[1]).to_frame(index=False) + coords = matrix.indexes[matrix.dims[1]].to_frame(index=False) if not np.array_equal(coords[["x", "y"]], cutout.grid[["x", "y"]]): raise ValueError( "Matrix spatial coordinates not aligned with cutout spatial " @@ -338,7 +336,7 @@ def convert_temperature(ds: xr.Dataset) -> xr.DataArray: return ds["temperature"] - 273.15 -def temperature(cutout: Cutout, **params: Any) -> xr.DataArray | NumericArray: +def temperature(cutout: Cutout, **params: Any) -> xr.Dataset | xr.DataArray: """ Return ambient air temperature converted from Kelvin to degrees Celsius. @@ -386,7 +384,7 @@ def convert_soil_temperature(ds: xr.Dataset) -> xr.DataArray: return (ds["soil temperature"] - 273.15).fillna(0.0) -def soil_temperature(cutout: Cutout, **params: Any) -> xr.DataArray | NumericArray: +def soil_temperature(cutout: Cutout, **params: Any) -> xr.Dataset | xr.DataArray: """ Return soil temperature converted from Kelvin to degrees Celsius. @@ -433,7 +431,7 @@ def convert_dewpoint_temperature(ds: xr.Dataset) -> xr.DataArray: return ds["dewpoint temperature"] - 273.15 -def dewpoint_temperature(cutout: Cutout, **params: Any) -> xr.DataArray | NumericArray: +def dewpoint_temperature(cutout: Cutout, **params: Any) -> xr.Dataset | xr.DataArray: """ Return dew point temperature converted from Kelvin to degrees Celsius. @@ -522,7 +520,7 @@ def coefficient_of_performance( c1: float | None = None, c2: float | None = None, **params: Any, -) -> xr.DataArray | NumericArray: +) -> xr.Dataset | xr.DataArray: """ Convert temperature to heat pump coefficient of performance (COP). @@ -637,7 +635,7 @@ def heat_demand( constant: float = 0.0, hour_shift: float = 0.0, **params: Any, -) -> xr.DataArray | NumericArray: +) -> xr.Dataset | xr.DataArray: """ Convert outside temperature into daily heat demand using degree-day approximation. @@ -754,7 +752,7 @@ def cooling_demand( constant: float = 0.0, hour_shift: float = 0.0, **params: Any, -) -> xr.DataArray | NumericArray: +) -> xr.Dataset | xr.DataArray: """ Convert outside temperature into daily cooling demand using degree-day approximation. @@ -886,7 +884,7 @@ def solar_thermal( c1: float = 3.0, t_store: float = 80.0, **params: Any, -) -> xr.DataArray | NumericArray: +) -> xr.Dataset | xr.DataArray: """ Convert radiation and temperature into solar thermal collector time series. @@ -960,7 +958,7 @@ def convert_wind( ds: xr.Dataset, turbine: TurbineConfig, interpolation_method: Literal["logarithmic", "power"], -) -> xr.DataArray: +) -> xr.Dataset | xr.DataArray: """ Convert wind speeds to turbine-specific generation. @@ -995,6 +993,7 @@ def apply_power_curve(da): output_dtypes=[wnd_hub.dtype], dask="parallelized", ) + assert isinstance(da, xr.DataArray) da.attrs["units"] = "MWh/MWp" return da.rename("specific generation") @@ -1007,7 +1006,7 @@ def wind( add_cutout_windspeed: bool = False, interpolation_method: Literal["logarithmic", "power"] = "logarithmic", **params: Any, -) -> xr.DataArray: +) -> xr.Dataset | xr.DataArray: """ Generate wind generation time-series. @@ -1145,7 +1144,7 @@ def irradiation( tracking: TrackingType | None = None, clearsky_model: ClearskyModel | None = None, **params: Any, -) -> xr.DataArray | NumericArray: +) -> xr.Dataset | xr.DataArray: """ Calculate irradiation on a tilted surface. @@ -1269,7 +1268,7 @@ def pv( tracking: TrackingType | None = None, clearsky_model: ClearskyModel | None = None, **params: Any, -) -> xr.DataArray | NumericArray: +) -> xr.Dataset | xr.DataArray: """ Convert radiation and temperature into PV generation time series. @@ -1419,7 +1418,7 @@ def csp( installation: str | CSPConfig, technology: Literal["parabolic trough", "solar tower"] | None = None, **params: Any, -) -> xr.DataArray | NumericArray: +) -> xr.Dataset | xr.DataArray: """ Convert direct radiation into CSP generation time series. @@ -1748,13 +1747,13 @@ def convert_line_rating( A = D * 1 # projected area of conductor in square meters if isinstance(ds, dict): - Position = namedtuple("Position", ["altitude", "azimuth"]) - solar_position = Position(ds["solar_altitude"], ds["solar_azimuth"]) + altitude = ds["solar_altitude"] + azimuth = ds["solar_azimuth"] else: - solar_position = SolarPosition(ds) - Phi_s = arccos( - cos(solar_position.altitude) * cos((solar_position.azimuth) - radians(psi)) - ) + sp = SolarPosition(ds) + altitude = sp["altitude"] + azimuth = sp["azimuth"] + Phi_s = arccos(cos(altitude) * cos(azimuth - radians(psi))) qs = alpha * Q * A * sin(Phi_s) @@ -1897,8 +1896,8 @@ def get_azimuth(shape): res.append(dummy) if show_progress: with ProgressBar(minimum=2): - res = compute(res, **dask_kwargs) + (computed,) = compute(res, **dask_kwargs) else: - res = compute(res, **dask_kwargs) + (computed,) = compute(res, **dask_kwargs) - return xr.concat(*res, dim=df.index).assign_attrs(units="A") + return xr.concat(computed, dim=df.index).assign_attrs(units="A") diff --git a/atlite/cutout.py b/atlite/cutout.py index d02fb9d6..6d408695 100644 --- a/atlite/cutout.py +++ b/atlite/cutout.py @@ -317,7 +317,7 @@ def transform_r(self) -> rio.Affine: def dx(self) -> float: """Spatial resolution on the x coordinates.""" x = self.coords["x"] - return round((x[-1] - x[0]).item() / (x.size - 1), 8) # type: ignore[no-any-return] + return float(round((x[-1] - x[0]).item() / (x.size - 1), 8)) @property def dy(self) -> float: diff --git a/atlite/datasets/era5.py b/atlite/datasets/era5.py index 38f5987e..21989980 100644 --- a/atlite/datasets/era5.py +++ b/atlite/datasets/era5.py @@ -16,7 +16,7 @@ import weakref from pathlib import Path from tempfile import mkstemp -from typing import TYPE_CHECKING, Any, Literal +from typing import TYPE_CHECKING, Any, Literal, cast import cdsapi import numpy as np @@ -31,6 +31,7 @@ if TYPE_CHECKING: from collections.abc import Callable + from contextlib import AbstractContextManager from dask.utils import SerializableLock @@ -122,7 +123,7 @@ def _rename_and_clean_coords(ds: xr.Dataset, add_lon_lat: bool = True) -> xr.Dat ds = ds.assign_coords( x=np.round(ds.x.astype(float), 5), y=np.round(ds.y.astype(float), 5) ) - ds = maybe_swap_spatial_dims(ds) + ds = cast("xr.Dataset", maybe_swap_spatial_dims(ds)) if add_lon_lat: ds = ds.assign_coords(lon=ds.coords["x"], lat=ds.coords["y"]) return ds.drop_vars(["expver", "number"], errors="ignore") @@ -624,11 +625,10 @@ def retrieve_data( ) result = client.retrieve(product, request) - if lock is None: - lock = nullcontext() + cm: AbstractContextManager = nullcontext() if lock is None else lock suffix = f".{request['data_format']}" - with lock: + with cm: fd, target = mkstemp(suffix=suffix, dir=tmpdir) os.close(fd) @@ -734,4 +734,6 @@ def retrieve_once(time: dict[str, Any]) -> xr.Dataset: else: datasets = map(retrieve_once, time_chunks) - return xr.concat(datasets, dim="time").sel(time=coords["time"]) + result = xr.concat(datasets, dim="time").sel(time=coords["time"]) + assert isinstance(result, xr.Dataset) + return result diff --git a/atlite/pv/orientation.py b/atlite/pv/orientation.py index e5fd2674..ced88ab1 100644 --- a/atlite/pv/orientation.py +++ b/atlite/pv/orientation.py @@ -17,12 +17,12 @@ if TYPE_CHECKING: from collections.abc import Callable - from atlite._types import NumericArray, OrientationName, TrackingType + from atlite._types import OrientationName, TrackingType def get_orientation( name: OrientationName | dict[str, Any], **params: Any -) -> Callable[[NumericArray, NumericArray, xr.Dataset], dict[str, NumericArray]]: +) -> Callable[[xr.DataArray, xr.DataArray, xr.Dataset], dict[str, xr.DataArray]]: """ Return an orientation factory by name. @@ -46,13 +46,13 @@ def get_orientation( params = name name = params.pop("name", "constant") result: Callable[ - [NumericArray, NumericArray, xr.Dataset], dict[str, NumericArray] + [xr.DataArray, xr.DataArray, xr.Dataset], dict[str, xr.DataArray] ] = getattr(sys.modules[__name__], f"make_{name}")(**params) return result def make_latitude_optimal() -> Callable[ - [NumericArray, NumericArray, xr.Dataset], dict[str, xr.DataArray] + [xr.DataArray, xr.DataArray, xr.Dataset], dict[str, xr.DataArray] ]: """ Return an optimal tilt angle assuming the panel faces the equator. @@ -76,7 +76,7 @@ def make_latitude_optimal() -> Callable[ """ def latitude_optimal( - lon: NumericArray, lat: NumericArray, solar_position: xr.Dataset + lon: xr.DataArray, lat: xr.DataArray, solar_position: xr.Dataset ) -> dict[str, xr.DataArray]: """ Build an orientation with latitude-dependent optimal tilt. @@ -118,7 +118,7 @@ def latitude_optimal( def make_constant( slope: float, azimuth: float -) -> Callable[[NumericArray, NumericArray, xr.Dataset], dict[str, NumericArray]]: +) -> Callable[[xr.DataArray, xr.DataArray, xr.Dataset], dict[str, xr.DataArray]]: """ Create an orientation function with constant slope and azimuth. @@ -138,8 +138,8 @@ def make_constant( azimuth_rad = radians(azimuth) def constant( - lon: NumericArray, lat: NumericArray, solar_position: xr.Dataset - ) -> dict[str, NumericArray]: + lon: xr.DataArray, lat: xr.DataArray, solar_position: xr.Dataset + ) -> dict[str, xr.DataArray]: """ Return the configured constant panel orientation. @@ -164,7 +164,7 @@ def constant( def make_latitude( azimuth: float = 180, -) -> Callable[[NumericArray, NumericArray, xr.Dataset], dict[str, NumericArray]]: +) -> Callable[[xr.DataArray, xr.DataArray, xr.Dataset], dict[str, xr.DataArray]]: """ Create an orientation function with slope equal to latitude. @@ -181,8 +181,8 @@ def make_latitude( azimuth_rad = radians(azimuth) def latitude( - lon: NumericArray, lat: NumericArray, solar_position: xr.Dataset - ) -> dict[str, NumericArray]: + lon: xr.DataArray, lat: xr.DataArray, solar_position: xr.Dataset + ) -> dict[str, xr.DataArray]: """ Return an orientation with slope equal to latitude. @@ -209,7 +209,7 @@ def SurfaceOrientation( ds: xr.Dataset, solar_position: xr.Dataset, orientation: Callable[ - [NumericArray, NumericArray, xr.Dataset], dict[str, NumericArray] + [xr.DataArray, xr.DataArray, xr.Dataset], dict[str, xr.DataArray] ], tracking: TrackingType | None = None, ) -> xr.Dataset: @@ -295,10 +295,10 @@ def SurfaceOrientation( surface_slope = arccos(cos(rotation) * cos(axis_tilt)) azimuth_difference = sun_azimuth - surface_azimuth - azimuth_difference = np.where( + azimuth_difference = xr.where( azimuth_difference > pi, azimuth_difference - 2 * pi, azimuth_difference ) - azimuth_difference = np.where( + azimuth_difference = xr.where( azimuth_difference < -pi, 2 * pi + azimuth_difference, azimuth_difference ) rotation = np.where( From 42a9f688c22c7f524956673c274a861062163211 Mon Sep 17 00:00:00 2001 From: Fabian Date: Tue, 21 Apr 2026 14:01:14 +0200 Subject: [PATCH 25/27] Fix remaining mypy errors across atlite modules --- atlite/_types.py | 3 +++ atlite/convert.py | 25 +++++++++--------- atlite/cutout.py | 9 ++++--- atlite/datasets/cordex.py | 4 +-- atlite/datasets/era5.py | 3 ++- atlite/datasets/ncep.py | 4 +-- atlite/datasets/sarah.py | 15 ++++++++--- atlite/pv/irradiation.py | 13 +++++++--- atlite/pv/solar_panel_model.py | 6 +++-- atlite/pv/solar_position.py | 9 +++---- atlite/resource.py | 3 ++- pyproject.toml | 2 +- test/test_aggregate_time.py | 46 ++++++++++++++++++---------------- 13 files changed, 82 insertions(+), 60 deletions(-) diff --git a/atlite/_types.py b/atlite/_types.py index f6933575..8b8341e3 100644 --- a/atlite/_types.py +++ b/atlite/_types.py @@ -18,6 +18,9 @@ NumericArray: TypeAlias = NDArray | xr.DataArray Number: TypeAlias = int | float | np.number[Any] CrsLike: TypeAlias = str | int | CRS | dict[str, Any] +ConvertResult: TypeAlias = ( + xr.DataArray | xr.Dataset | tuple[xr.DataArray | xr.Dataset, xr.DataArray] +) TrackingType: TypeAlias = Literal["horizontal", "tilted_horizontal", "vertical", "dual"] ClearskyModel: TypeAlias = Literal["simple", "enhanced"] diff --git a/atlite/convert.py b/atlite/convert.py index d2985b35..ad43cacd 100644 --- a/atlite/convert.py +++ b/atlite/convert.py @@ -45,6 +45,7 @@ from atlite._types import ( ClearskyModel, + ConvertResult, HeatPumpSource, IrradiationType, OrientationName, @@ -81,7 +82,7 @@ def convert_and_aggregate( show_progress: bool = False, dask_kwargs: dict[str, Any] | None = None, **convert_kwds: Any, -) -> xr.DataArray | xr.Dataset: +) -> ConvertResult: """ Convert and aggregate a weather-based renewable generation time-series. @@ -336,7 +337,7 @@ def convert_temperature(ds: xr.Dataset) -> xr.DataArray: return ds["temperature"] - 273.15 -def temperature(cutout: Cutout, **params: Any) -> xr.Dataset | xr.DataArray: +def temperature(cutout: Cutout, **params: Any) -> ConvertResult: """ Return ambient air temperature converted from Kelvin to degrees Celsius. @@ -384,7 +385,7 @@ def convert_soil_temperature(ds: xr.Dataset) -> xr.DataArray: return (ds["soil temperature"] - 273.15).fillna(0.0) -def soil_temperature(cutout: Cutout, **params: Any) -> xr.Dataset | xr.DataArray: +def soil_temperature(cutout: Cutout, **params: Any) -> ConvertResult: """ Return soil temperature converted from Kelvin to degrees Celsius. @@ -431,7 +432,7 @@ def convert_dewpoint_temperature(ds: xr.Dataset) -> xr.DataArray: return ds["dewpoint temperature"] - 273.15 -def dewpoint_temperature(cutout: Cutout, **params: Any) -> xr.Dataset | xr.DataArray: +def dewpoint_temperature(cutout: Cutout, **params: Any) -> ConvertResult: """ Return dew point temperature converted from Kelvin to degrees Celsius. @@ -520,7 +521,7 @@ def coefficient_of_performance( c1: float | None = None, c2: float | None = None, **params: Any, -) -> xr.Dataset | xr.DataArray: +) -> ConvertResult: """ Convert temperature to heat pump coefficient of performance (COP). @@ -635,7 +636,7 @@ def heat_demand( constant: float = 0.0, hour_shift: float = 0.0, **params: Any, -) -> xr.Dataset | xr.DataArray: +) -> ConvertResult: """ Convert outside temperature into daily heat demand using degree-day approximation. @@ -752,7 +753,7 @@ def cooling_demand( constant: float = 0.0, hour_shift: float = 0.0, **params: Any, -) -> xr.Dataset | xr.DataArray: +) -> ConvertResult: """ Convert outside temperature into daily cooling demand using degree-day approximation. @@ -884,7 +885,7 @@ def solar_thermal( c1: float = 3.0, t_store: float = 80.0, **params: Any, -) -> xr.Dataset | xr.DataArray: +) -> ConvertResult: """ Convert radiation and temperature into solar thermal collector time series. @@ -1006,7 +1007,7 @@ def wind( add_cutout_windspeed: bool = False, interpolation_method: Literal["logarithmic", "power"] = "logarithmic", **params: Any, -) -> xr.Dataset | xr.DataArray: +) -> ConvertResult: """ Generate wind generation time-series. @@ -1144,7 +1145,7 @@ def irradiation( tracking: TrackingType | None = None, clearsky_model: ClearskyModel | None = None, **params: Any, -) -> xr.Dataset | xr.DataArray: +) -> ConvertResult: """ Calculate irradiation on a tilted surface. @@ -1268,7 +1269,7 @@ def pv( tracking: TrackingType | None = None, clearsky_model: ClearskyModel | None = None, **params: Any, -) -> xr.Dataset | xr.DataArray: +) -> ConvertResult: """ Convert radiation and temperature into PV generation time series. @@ -1418,7 +1419,7 @@ def csp( installation: str | CSPConfig, technology: Literal["parabolic trough", "solar tower"] | None = None, **params: Any, -) -> xr.Dataset | xr.DataArray: +) -> ConvertResult: """ Convert direct radiation into CSP generation time series. diff --git a/atlite/cutout.py b/atlite/cutout.py index 6d408695..59cacb7d 100644 --- a/atlite/cutout.py +++ b/atlite/cutout.py @@ -17,7 +17,7 @@ import logging from pathlib import Path from tempfile import mktemp -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, cast from warnings import warn import geopandas as gpd @@ -224,8 +224,9 @@ def __init__(self, path: PathLike, **cutoutparams: Any) -> None: } data = xr.Dataset(coords=coords, attrs=attrs) - # Check compatibility of CRS - modules = atleast_1d(data.attrs.get("module")) + module_attr = data.attrs.get("module") + assert module_attr is not None, "Cutout data missing 'module' attribute" + modules = atleast_1d(module_attr) crs = {CRS(datamodules[m].crs) for m in modules} assert len(crs) == 1, f"CRS of {module} not compatible" @@ -240,7 +241,7 @@ def name(self) -> str: @property def module(self) -> str | list[str]: """Data module of the cutout.""" - return self.data.attrs.get("module") # type: ignore[no-any-return] + return cast("str | list[str]", self.data.attrs["module"]) @property def crs(self) -> CRS: diff --git a/atlite/datasets/cordex.py b/atlite/datasets/cordex.py index 9ce32cbf..9e26507f 100644 --- a/atlite/datasets/cordex.py +++ b/atlite/datasets/cordex.py @@ -288,10 +288,10 @@ def tasks_yearly_cordex( model = meta_attrs["model"] if not isinstance(xs, slice): - first, second, last = xs.values[[0, 1, -1]] + first, second, last = xs[[0, 1, -1]] xs = slice(first - 0.1 * (second - first), last + 0.1 * (second - first)) if not isinstance(ys, slice): - first, second, last = ys.values[[0, 1, -1]] + first, second, last = ys[[0, 1, -1]] ys = slice(first - 0.1 * (second - first), last + 0.1 * (second - first)) return [ diff --git a/atlite/datasets/era5.py b/atlite/datasets/era5.py index 21989980..2b349155 100644 --- a/atlite/datasets/era5.py +++ b/atlite/datasets/era5.py @@ -467,7 +467,8 @@ def add_finalizer(ds: xr.Dataset, target: PathLike) -> None: Path to the temporary file to clean up. """ logger.debug("Adding finalizer for %s", target) - weakref.finalize(ds._close.__self__.ds, noisy_unlink, target) + assert ds._close is not None + weakref.finalize(cast("Any", ds._close).__self__.ds, noisy_unlink, target) def sanitize_chunks(chunks: Any, **dim_mapping: str) -> Any: diff --git a/atlite/datasets/ncep.py b/atlite/datasets/ncep.py index 0e53644e..fae176b0 100644 --- a/atlite/datasets/ncep.py +++ b/atlite/datasets/ncep.py @@ -131,7 +131,7 @@ def unaverage(da: xr.DataArray, dim: str = "forecast_time0") -> xr.DataArray: y = da * xr.DataArray( np.arange(1, len(coords) + 1), dims=[dim], coords={dim: coords} ) - return y - y.shift(**{dim: 1}).fillna(0.0) + return y - y.shift({dim: 1}).fillna(0.0) for k, da in ds.items(): assert isinstance(k, str) @@ -167,7 +167,7 @@ def convert_unaccumulate_ncep(ds: xr.Dataset) -> xr.Dataset: """ def unaccumulate(da: xr.DataArray, dim: str = "forecast_time0") -> xr.DataArray: - return da - da.shift(**{dim: 1}).fillna(0.0) + return da - da.shift({dim: 1}).fillna(0.0) for k, da in ds.items(): assert isinstance(k, str) diff --git a/atlite/datasets/sarah.py b/atlite/datasets/sarah.py index 3ade934f..4c43ceee 100644 --- a/atlite/datasets/sarah.py +++ b/atlite/datasets/sarah.py @@ -9,7 +9,7 @@ import warnings from functools import partial from pathlib import Path -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, cast import numpy as np import pandas as pd @@ -135,7 +135,7 @@ def _interpolate(a: Any) -> Any: dtypes = {da.dtype for da in data_vars} assert len(dtypes) == 1, "interpolate only supports datasets with homogeneous dtype" - return xr.apply_ufunc( + result: xr.Dataset | xr.DataArray = xr.apply_ufunc( _interpolate, ds, input_core_dims=[[dim]], @@ -145,6 +145,7 @@ def _interpolate(a: Any) -> Any: dask="allowed", keep_attrs=True, ) + return result def as_slice(bounds: slice | tuple[float, float], pad: bool = True) -> slice: @@ -245,13 +246,19 @@ def get_data( lon=ds.lon.astype(float).round(4), lat=ds.lat.astype(float).round(4) ) - ds = interpolate(ds) if creation_parameters["sarah_interpolate"] else ds.fillna(0) + ds = cast( + "xr.Dataset", + interpolate(ds) if creation_parameters["sarah_interpolate"] else ds.fillna(0), + ) if cutout.dt not in ["30min", "30T"]: ds = hourly_mean(ds) if (cutout.dx != dx) or (cutout.dy != dy): - ds = regrid(ds, coords["lon"], coords["lat"], resampling=Resampling.average) + ds = cast( + "xr.Dataset", + regrid(ds, coords["lon"], coords["lat"], resampling=Resampling.average), + ) dif_attrs = {"long_name": "Surface Diffuse Shortwave Flux", "units": "W m-2"} ds["influx_diffuse"] = (ds["SIS"] - ds["SID"]).assign_attrs(**dif_attrs) diff --git a/atlite/pv/irradiation.py b/atlite/pv/irradiation.py index b20d7e88..3cf9f526 100644 --- a/atlite/pv/irradiation.py +++ b/atlite/pv/irradiation.py @@ -105,7 +105,8 @@ def DiffuseHorizontalIrrad( else: raise KeyError("`clearsky model` must be chosen from 'simple' and 'enhanced'") - return (influx * fraction).rename("diffuse horizontal") + result: xr.DataArray = (influx * fraction).rename("diffuse horizontal") + return result def TiltedDiffuseIrrad( @@ -168,7 +169,8 @@ def TiltedDiffuseIrrad( with np.errstate(invalid="ignore"): diffuse_t = diffuse_t.clip(min=0).fillna(0) - return diffuse_t.rename("diffuse tilted") + result: xr.DataArray = diffuse_t.rename("diffuse tilted") + return result def TiltedDirectIrrad( @@ -196,7 +198,8 @@ def TiltedDirectIrrad( R_b = cosincidence / sinaltitude - return (R_b * direct).rename("direct tilted") + result: xr.DataArray = (R_b * direct).rename("direct tilted") + return result def _albedo(ds: xr.Dataset, influx: xr.DataArray) -> xr.DataArray: @@ -257,7 +260,8 @@ def TiltedGroundIrrad( """ surface_slope = surface_orientation["slope"] ground_t = influx * _albedo(ds, influx) * (1.0 - cos(surface_slope)) / 2.0 - return ground_t.rename("ground tilted") + result: xr.DataArray = ground_t.rename("ground tilted") + return result def TiltedIrradiation( @@ -360,6 +364,7 @@ def clip(influx: xr.DataArray, influx_max: xr.DataArray) -> xr.DataArray: total_t = direct_t + diffuse_t + ground_t + result: xr.DataArray if irradiation == "total": result = total_t.rename("total tilted") elif irradiation == "direct": diff --git a/atlite/pv/solar_panel_model.py b/atlite/pv/solar_panel_model.py index 312761e6..97bd3cec 100644 --- a/atlite/pv/solar_panel_model.py +++ b/atlite/pv/solar_panel_model.py @@ -56,7 +56,8 @@ def _power_huld( da = G_ * eff * pc.get("inverter_efficiency", 1.0) da.attrs["units"] = "kWh/kWp" - return da.rename("specific generation") + result: xr.DataArray = da.rename("specific generation") + return result def _power_bofinger( @@ -92,7 +93,8 @@ def _power_bofinger( capacity = (pc["A"] + pc["B"] * 1000.0 + pc["C"] * np.log(1000.0)) * 1e3 power = irradiance * eta * (pc.get("inverter_efficiency", 1.0) / capacity) power = power.where(irradiance >= pc["threshold"], 0) - return power.rename("AC power") + result: xr.DataArray = power.rename("AC power") + return result def SolarPanelModel( diff --git a/atlite/pv/solar_position.py b/atlite/pv/solar_position.py index fd991780..5267739f 100644 --- a/atlite/pv/solar_position.py +++ b/atlite/pv/solar_position.py @@ -89,11 +89,10 @@ def SolarPosition(ds: xr.Dataset, time_shift: str | pd.Timedelta = "0H") -> xr.D # Operations make new DataArray eager; reconvert to lazy dask arrays chunks = ds.chunksizes.get("time", "auto") - if isinstance(chunks, tuple): - chunks = chunks[0] - n = n.chunk(chunks) - hour = hour.chunk(chunks) - minute = minute.chunk(chunks) + chunk_size = chunks[0] if isinstance(chunks, tuple) else chunks + n = n.chunk(chunk_size) + hour = hour.chunk(chunk_size) + minute = minute.chunk(chunk_size) L = 280.460 + 0.9856474 * n # mean longitude (deg) g = radians(357.528 + 0.9856003 * n) # mean anomaly (rad) diff --git a/atlite/resource.py b/atlite/resource.py index 201adbe4..a8258c56 100644 --- a/atlite/resource.py +++ b/atlite/resource.py @@ -312,11 +312,12 @@ def windturbine_smooth( sigma: float = params.get("sigma", 2.29) def kernel(v_0: NDArray) -> NDArray: - return ( + result: NDArray = ( 1.0 / np.sqrt(2 * np.pi * sigma * sigma) * np.exp(-(v_0 - Delta_v) * (v_0 - Delta_v) / (2 * sigma * sigma)) ) + return result def smooth(velocities: NDArray, power: NDArray) -> tuple[NDArray, NDArray]: # interpolate kernel and power curve to the same, regular velocity grid diff --git a/pyproject.toml b/pyproject.toml index b93ac850..f42c77b3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,7 +58,7 @@ Documentation = "https://atlite.readthedocs.io/en/latest/" [project.optional-dependencies] -dev = ["pre-commit", "pytest", "pytest-cov", "matplotlib", "ruff", "mypy", "types-PyYAML"] +dev = ["pre-commit", "pytest", "pytest-cov", "matplotlib", "ruff", "mypy", "types-PyYAML", "types-requests"] docs = [ "numpydoc==1.8.0", diff --git a/test/test_aggregate_time.py b/test/test_aggregate_time.py index 270fae91..97f17b64 100644 --- a/test/test_aggregate_time.py +++ b/test/test_aggregate_time.py @@ -4,6 +4,8 @@ """Tests for time aggregation functionality.""" +from typing import Any + import numpy as np import pandas as pd import pytest @@ -12,6 +14,12 @@ from atlite.convert import convert_and_aggregate +def _call(*args: Any, **kwargs: Any) -> xr.DataArray | xr.Dataset: + result = convert_and_aggregate(*args, **kwargs) + assert not isinstance(result, tuple) + return result + + class MockCutout: def __init__(self, data): self.data = data @@ -43,24 +51,24 @@ def cutout(): class TestAggregateTimeNoSpatial: def test_aggregate_time_none_returns_timeseries(self, cutout): - result = convert_and_aggregate(cutout, identity_convert, aggregate_time=None) + result = _call(cutout, identity_convert, aggregate_time=None) assert "time" in result.dims def test_aggregate_time_mean(self, cutout): - result = convert_and_aggregate(cutout, identity_convert, aggregate_time="mean") + result = _call(cutout, identity_convert, aggregate_time="mean") assert "time" not in result.dims expected = cutout.data["var"].mean("time") np.testing.assert_allclose(result.values, expected.values) def test_aggregate_time_sum(self, cutout): - result = convert_and_aggregate(cutout, identity_convert, aggregate_time="sum") + result = _call(cutout, identity_convert, aggregate_time="sum") assert "time" not in result.dims expected = cutout.data["var"].sum("time") np.testing.assert_allclose(result.values, expected.values) def test_legacy_default_no_spatial_sums_over_time(self, cutout): with pytest.warns(FutureWarning, match="aggregate_time='legacy'"): - result = convert_and_aggregate(cutout, identity_convert) + result = _call(cutout, identity_convert) expected = cutout.data["var"].sum("time") assert "time" not in result.dims xr.testing.assert_identical(result, expected) @@ -77,14 +85,12 @@ def layout(cutout): @pytest.fixture def result_ts(cutout, layout): - return convert_and_aggregate( - cutout, identity_convert, layout=layout, aggregate_time=None - ) + return _call(cutout, identity_convert, layout=layout, aggregate_time=None) class TestAggregateTimeWithSpatial: def test_aggregate_time_mean_with_layout(self, cutout, layout, result_ts): - result_mean = convert_and_aggregate( + result_mean = _call( cutout, identity_convert, layout=layout, aggregate_time="mean" ) assert "time" in result_ts.dims @@ -92,7 +98,7 @@ def test_aggregate_time_mean_with_layout(self, cutout, layout, result_ts): np.testing.assert_allclose(result_mean.values, result_ts.mean("time").values) def test_aggregate_time_sum_with_layout(self, cutout, layout, result_ts): - result_sum = convert_and_aggregate( + result_sum = _call( cutout, identity_convert, layout=layout, aggregate_time="sum" ) assert "time" not in result_sum.dims @@ -100,7 +106,7 @@ def test_aggregate_time_sum_with_layout(self, cutout, layout, result_ts): def test_legacy_default_with_layout_returns_timeseries(self, cutout, layout): with pytest.warns(FutureWarning, match="aggregate_time='legacy'"): - result = convert_and_aggregate(cutout, identity_convert, layout=layout) + result = _call(cutout, identity_convert, layout=layout) assert "time" in result.dims def test_aggregate_time_with_per_unit(self, cutout): @@ -109,7 +115,7 @@ def test_aggregate_time_with_per_unit(self, cutout): dims=["y", "x"], coords={"y": cutout.data.y, "x": cutout.data.x}, ) - result_pu = convert_and_aggregate( + result_pu = _call( cutout, identity_convert, layout=layout, @@ -118,7 +124,7 @@ def test_aggregate_time_with_per_unit(self, cutout): ) assert "time" not in result_pu.dims - result_pu_ts = convert_and_aggregate( + result_pu_ts = _call( cutout, identity_convert, layout=layout, @@ -131,23 +137,19 @@ def test_aggregate_time_with_per_unit(self, cutout): class TestDeprecatedParams: def test_capacity_factor_warns(self, cutout): with pytest.warns(FutureWarning, match="capacity_factor is deprecated"): - result = convert_and_aggregate( - cutout, identity_convert, capacity_factor=True - ) + result = _call(cutout, identity_convert, capacity_factor=True) assert "time" not in result.dims def test_capacity_factor_timeseries_warns(self, cutout): with pytest.warns( FutureWarning, match="capacity_factor_timeseries is deprecated" ): - result = convert_and_aggregate( - cutout, identity_convert, capacity_factor_timeseries=True - ) + result = _call(cutout, identity_convert, capacity_factor_timeseries=True) assert "time" in result.dims def test_capacity_factor_with_aggregate_time_raises(self, cutout): with pytest.raises(ValueError, match="Cannot use"): - convert_and_aggregate( + _call( cutout, identity_convert, capacity_factor=True, @@ -158,12 +160,12 @@ def test_capacity_factor_with_aggregate_time_raises(self, cutout): class TestInvalidArgs: def test_invalid_aggregate_time_value(self, cutout): with pytest.raises(ValueError, match="aggregate_time must be"): - convert_and_aggregate(cutout, identity_convert, aggregate_time="invalid") # type: ignore[arg-type] + _call(cutout, identity_convert, aggregate_time="invalid") def test_aggregate_time_false_raises(self, cutout): with pytest.raises(ValueError, match="aggregate_time must be"): - convert_and_aggregate(cutout, identity_convert, aggregate_time=False) # type: ignore[arg-type] + _call(cutout, identity_convert, aggregate_time=False) def test_aggregate_time_true_raises(self, cutout): with pytest.raises(ValueError, match="aggregate_time must be"): - convert_and_aggregate(cutout, identity_convert, aggregate_time=True) # type: ignore[arg-type] + _call(cutout, identity_convert, aggregate_time=True) From aa18981d238ece78c13a3ac3344a0d618418d085 Mon Sep 17 00:00:00 2001 From: Fabian Date: Mon, 18 May 2026 10:55:57 +0200 Subject: [PATCH 26/27] Restore review-flagged comments/examples and drop ERA5RetrievalParams --- atlite/_types.py | 17 +------------ atlite/datasets/cordex.py | 2 ++ atlite/datasets/era5.py | 51 ++++++++++++++++++++++++++++----------- 3 files changed, 40 insertions(+), 30 deletions(-) diff --git a/atlite/_types.py b/atlite/_types.py index 8b8341e3..caa3538b 100644 --- a/atlite/_types.py +++ b/atlite/_types.py @@ -5,7 +5,7 @@ from __future__ import annotations from pathlib import Path -from typing import Any, Literal, TypeAlias, TypedDict +from typing import Any, Literal, TypeAlias import numpy as np import xarray as xr @@ -29,18 +29,3 @@ HeatPumpSource: TypeAlias = Literal["air", "soil"] OrientationName: TypeAlias = Literal["latitude_optimal", "constant", "latitude"] DataFormat: TypeAlias = Literal["grib", "netcdf"] - - -class ERA5RetrievalParams(TypedDict, total=False): - product: str - area: list[float] - grid: str - chunks: dict[str, int] | None - tmpdir: str | Path | None - lock: Any | None - data_format: Literal["grib", "netcdf"] - year: list[str] - month: list[str] | str - day: list[str] | str - time: str | list[str] - variable: str | list[str] diff --git a/atlite/datasets/cordex.py b/atlite/datasets/cordex.py index 9e26507f..cbf34133 100644 --- a/atlite/datasets/cordex.py +++ b/atlite/datasets/cordex.py @@ -97,12 +97,14 @@ def prepare_data_cordex( ds = ds.sel(x=xs, y=ys) if newname in {"influx", "outflux"}: + # shift averaged data to beginning of bin ds = ds.assign_coords( time=( pd.to_datetime(ds.coords["time"].values) - pd.Timedelta(hours=1.5) ) ) elif newname in {"runoff"}: + # shift and fill 6hr average data to beginning of 3hr bins t = pd.to_datetime(ds.coords["time"].values) ds = ds.reindex(method="bfill", time=(t - pd.Timedelta(hours=3.0)).union(t)) diff --git a/atlite/datasets/era5.py b/atlite/datasets/era5.py index 2b349155..adafdb3b 100644 --- a/atlite/datasets/era5.py +++ b/atlite/datasets/era5.py @@ -35,7 +35,7 @@ from dask.utils import SerializableLock - from atlite._types import ERA5RetrievalParams, PathLike + from atlite._types import PathLike # Null context for running a with statements wihout any context try: @@ -129,7 +129,7 @@ def _rename_and_clean_coords(ds: xr.Dataset, add_lon_lat: bool = True) -> xr.Dat return ds.drop_vars(["expver", "number"], errors="ignore") -def get_data_wind(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: +def get_data_wind(retrieval_params: dict[str, Any]) -> xr.Dataset: """ Retrieve and compute wind speed variables from ERA5. @@ -138,7 +138,7 @@ def get_data_wind(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: Parameters ---------- - retrieval_params : ERA5RetrievalParams + retrieval_params : dict[str, Any] CDS API retrieval parameters including area, time, and format. Returns @@ -192,7 +192,7 @@ def sanitize_wind(ds: xr.Dataset) -> xr.Dataset: return ds -def get_data_influx(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: +def get_data_influx(retrieval_params: dict[str, Any]) -> xr.Dataset: """ Retrieve and compute solar radiation variables from ERA5. @@ -201,7 +201,7 @@ def get_data_influx(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: Parameters ---------- - retrieval_params : ERA5RetrievalParams + retrieval_params : dict[str, Any] CDS API retrieval parameters including area, time, and format. Returns @@ -271,7 +271,7 @@ def sanitize_influx(ds: xr.Dataset) -> xr.Dataset: return ds -def get_data_temperature(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: +def get_data_temperature(retrieval_params: dict[str, Any]) -> xr.Dataset: """ Retrieve temperature variables from ERA5. @@ -279,7 +279,7 @@ def get_data_temperature(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: Parameters ---------- - retrieval_params : ERA5RetrievalParams + retrieval_params : dict[str, Any] CDS API retrieval parameters including area, time, and format. Returns @@ -304,13 +304,13 @@ def get_data_temperature(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: }) -def get_data_runoff(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: +def get_data_runoff(retrieval_params: dict[str, Any]) -> xr.Dataset: """ Retrieve runoff data from ERA5. Parameters ---------- - retrieval_params : ERA5RetrievalParams + retrieval_params : dict[str, Any] CDS API retrieval parameters including area, time, and format. Returns @@ -342,13 +342,13 @@ def sanitize_runoff(ds: xr.Dataset) -> xr.Dataset: return ds -def get_data_height(retrieval_params: ERA5RetrievalParams) -> xr.Dataset: +def get_data_height(retrieval_params: dict[str, Any]) -> xr.Dataset: """ Retrieve geopotential and convert to terrain height. Parameters ---------- - retrieval_params : ERA5RetrievalParams + retrieval_params : dict[str, Any] CDS API retrieval parameters including area, time, and format. Returns @@ -531,6 +531,10 @@ def open_with_grib_conventions( xr.Dataset Opened dataset with standardized dimensions. """ + # Open grib file as dataset. + # Options below normalize different ERA5 grib variants into consistent + # netCDF-compatible hypercubes. Options relevant only to e.g. wave-model + # data have been removed to keep this routine focused on the products we use. ds = xr.open_dataset( grib_file, engine="cfgrib", @@ -549,6 +553,13 @@ def open_with_grib_conventions( add_finalizer(ds, grib_file) def safely_expand_dims(dataset: xr.Dataset, expand_dims: list[str]) -> xr.Dataset: + """Expand missing dimensions while preserving their original order. + + Returns + ------- + xr.Dataset + Dataset with the requested dimensions present. + """ dims_required = [ c for c in dataset.coords if c in expand_dims + list(dataset.dims) ] @@ -608,6 +619,18 @@ def retrieve_data( xr.Dataset Downloaded ERA5 data. + Examples + -------- + >>> ds = retrieve_data( + ... product='reanalysis-era5-single-levels', + ... chunks={'time': 1, 'x': 100, 'y': 100}, + ... tmpdir='/tmp', + ... lock=None, + ... year='2020', + ... month='01', + ... variable=['10m_u_component_of_wind', '10m_v_component_of_wind'], + ... data_format='netcdf', + ... ) """ request: dict[str, Any] = { "product_type": ["reanalysis"], @@ -697,7 +720,7 @@ def get_data( sanitize = creation_parameters.get("sanitize", True) - retrieval_params: ERA5RetrievalParams = { + retrieval_params: dict[str, Any] = { "product": "reanalysis-era5-single-levels", "area": _area(coords), "chunks": cutout.chunks, @@ -707,7 +730,7 @@ def get_data( "data_format": data_format, } - func: Callable[[ERA5RetrievalParams], xr.Dataset] | None = globals().get( + func: Callable[[dict[str, Any]], xr.Dataset] | None = globals().get( f"get_data_{feature}" ) sanitize_func: Callable[[xr.Dataset], xr.Dataset] | None = globals().get( @@ -717,7 +740,7 @@ def get_data( logger.info("Requesting data for feature %s...", feature) def retrieve_once(time: dict[str, Any]) -> xr.Dataset: - ds = func({**retrieval_params, **time}) # type: ignore[misc, typeddict-item] + ds = func({**retrieval_params, **time}) # type: ignore[misc] if sanitize and sanitize_func is not None: ds = sanitize_func(ds) return ds From f27fda7eb94f73cfa7d38c50b25fbe78683c95ff Mon Sep 17 00:00:00 2001 From: Fabian Date: Mon, 18 May 2026 11:39:05 +0200 Subject: [PATCH 27/27] Fix line_rating azimuth bug and address review feedback Pass line azimuth as degrees instead of radians to convert_line_rating, matching its documented psi unit. Trim _types.py aliases, drop redundant local annotations in data.py, simplify regrid() in gis.py, narrow _aggregate_time call site. Adds focused test for line_azimuth_degrees. --- RELEASE_NOTES.rst | 14 ++++++--- atlite/_types.py | 4 --- atlite/convert.py | 44 ++++++++++++++------------- atlite/cutout.py | 3 +- atlite/data.py | 51 +++++++++++++------------------- atlite/gis.py | 39 ++++++++++-------------- test/test_dynamic_line_rating.py | 20 ++++++++++++- 7 files changed, 89 insertions(+), 86 deletions(-) diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index 1583d28c..80c8e5e6 100755 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -11,12 +11,18 @@ Release Notes Upcoming Release ================ -.. warning:: - - The features listed below are not released yet, but will be part of the next release! - To use the features already you have to install the ``master`` branch, e.g. +.. warning:: + + The features listed below are not released yet, but will be part of the next release! + To use the features already you have to install the ``master`` branch, e.g. ``pip install git+https://github.com/pypsa/atlite``. +**Bug fixes** + +* Fix ``Cutout.line_rating`` passing line azimuth in radians while + ``convert_line_rating`` interpreted ``psi`` as degrees. Azimuths are now + computed in degrees, matching the documented unit. + `v0.6.1 `__ (21st April 2026) ======================================================================================= diff --git a/atlite/_types.py b/atlite/_types.py index caa3538b..123a007b 100644 --- a/atlite/_types.py +++ b/atlite/_types.py @@ -12,11 +12,7 @@ from pyproj import CRS NDArray: TypeAlias = np.ndarray[Any, np.dtype[np.floating[Any]]] -NDArrayInt: TypeAlias = np.ndarray[Any, np.dtype[np.signedinteger[Any]]] -NDArrayBool: TypeAlias = np.ndarray[Any, np.dtype[np.bool_]] PathLike: TypeAlias = str | Path -NumericArray: TypeAlias = NDArray | xr.DataArray -Number: TypeAlias = int | float | np.number[Any] CrsLike: TypeAlias = str | int | CRS | dict[str, Any] ConvertResult: TypeAlias = ( xr.DataArray | xr.Dataset | tuple[xr.DataArray | xr.Dataset, xr.DataArray] diff --git a/atlite/convert.py b/atlite/convert.py index aed43e07..980c1137 100644 --- a/atlite/convert.py +++ b/atlite/convert.py @@ -224,7 +224,9 @@ def convert_and_aggregate( "given for `per_unit` or `return_capacity`" ) - agg = "sum" if aggregate_time == "legacy" else aggregate_time + agg: Literal["sum", "mean"] | None = ( + "sum" if aggregate_time == "legacy" else aggregate_time + ) res = _aggregate_time(da, agg) return maybe_progressbar(res, show_progress, **dask_kwargs) @@ -1765,6 +1767,25 @@ def convert_line_rating( return Imax.min("spatial") if isinstance(Imax, xr.DataArray) else Imax +def line_azimuth_degrees(shape: Any) -> float: + """ + Return the line azimuth in degrees, measured clockwise from north. + + Parameters + ---------- + shape : shapely.geometry.base.BaseGeometry + Line geometry with at least two coordinates. + + Returns + ------- + float + Azimuth in degrees in the range ``(-180, 180]``. + """ + coords = np.array(shape.coords) + start, end = coords[0], coords[-1] + return float(np.degrees(np.arctan2(start[0] - end[0], start[1] - end[1]))) + + def line_rating( cutout, shapes, line_resistance, show_progress=False, dask_kwargs=None, **params ): @@ -1857,26 +1878,7 @@ def line_rating( data = cutout.data.stack(spatial=["y", "x"]) - def get_azimuth(shape): - """ - Return the line azimuth in degrees from its end points. - - Parameters - ---------- - shape : shapely.geometry.base.BaseGeometry - Line geometry. - - Returns - ------- - float - Azimuth angle in degrees computed from the line end points. - """ - coords = np.array(shape.coords) - start = coords[0] - end = coords[-1] - return np.degrees(np.arctan2(start[0] - end[0], start[1] - end[1])) - - azimuth = shapes.apply(get_azimuth) + azimuth = shapes.apply(line_azimuth_degrees) azimuth = azimuth.where(azimuth >= 0, azimuth + 180.0) params.setdefault("D", 0.028) diff --git a/atlite/cutout.py b/atlite/cutout.py index 59cacb7d..1ed5451f 100644 --- a/atlite/cutout.py +++ b/atlite/cutout.py @@ -38,7 +38,6 @@ from atlite._types import ( CrsLike, NDArray, - Number, PathLike, ) @@ -594,7 +593,7 @@ def uniform_layout(self) -> xr.DataArray: return xr.DataArray(1, [self.coords["y"], self.coords["x"]]) def uniform_density_layout( - self, capacity_density: Number, crs: CrsLike | None = None + self, capacity_density: float, crs: CrsLike | None = None ) -> xr.DataArray: """ Get a capacity layout from a uniform capacity density. diff --git a/atlite/data.py b/atlite/data.py index e5dfbe2b..2064a8dd 100644 --- a/atlite/data.py +++ b/atlite/data.py @@ -13,7 +13,6 @@ from tempfile import mkdtemp, mkstemp from typing import TYPE_CHECKING, Any -import numpy as np import pandas as pd import xarray as xr from dask import compute as dask_compute @@ -67,13 +66,13 @@ def get_features( xarray.Dataset Merged dataset containing the requested features. """ - parameters: dict[str, Any] = cutout.data.attrs - lock: SerializableLock = SerializableLock() - datasets: list[Any] = [] - get_data: Callable[..., Any] = datamodules[module].get_data + parameters = cutout.data.attrs + lock = SerializableLock() + datasets = [] + get_data = datamodules[module].get_data for feature in features: - feature_data: Any = delayed(get_data)( + feature_data = delayed(get_data)( cutout, feature, tmpdir=tmpdir, @@ -89,14 +88,13 @@ def get_features( ds: xr.Dataset = xr.merge(datasets, compat="equals") for v in ds: - da: xr.DataArray = ds[v] + da = ds[v] da.attrs["module"] = module - fd: Iterable[tuple[str, Any]] = datamodules[module].features.items() + fd = datamodules[module].features.items() da.attrs["feature"] = [k for k, l in fd if v in l].pop() if da.chunks is not None: - chunksizes: list[int] = [c[0] for c in da.chunks] - da.encoding["chunksizes"] = chunksizes + da.encoding["chunksizes"] = [c[0] for c in da.chunks] return ds @@ -119,8 +117,8 @@ def available_features(module: str | Sequence[str] | None = None) -> pd.Series[s obtained. """ - features: dict[str, Any] = {name: m.features for name, m in datamodules.items()} - features_frame: pd.Series[Any] = ( + features = {name: m.features for name, m in datamodules.items()} + features_frame = ( pd .DataFrame(features) .unstack() @@ -268,33 +266,28 @@ def cutout_prepare( if tmpdir is None: raise ValueError("tmpdir cannot be None") - temp_dir_path: Path = Path(tmpdir) + temp_dir_path = Path(tmpdir) if not temp_dir_path.is_dir(): raise FileNotFoundError(f"The tmpdir: {temp_dir_path} does not exist.") logger.info("Storing temporary files in %s", tmpdir) - modules_array: np.ndarray[Any, np.dtype[Any]] = atleast_1d(cutout.module) - modules_list: list[str] = modules_array.tolist() - features_normalized: np.ndarray[Any, np.dtype[Any]] | slice = ( - atleast_1d(features) if features else slice(None) - ) - prepared: set[str] = set(atleast_1d(cutout.data.attrs["prepared_features"])) + modules_list = atleast_1d(cutout.module).tolist() + features_normalized = atleast_1d(features) if features else slice(None) + prepared = set(atleast_1d(cutout.data.attrs["prepared_features"])) - target: pd.Series[str] = ( + target = ( available_features(modules_list).loc[:, features_normalized].drop_duplicates() ) for module in target.index.unique("module"): - missing_vars: pd.Series[str] = target[module] + missing_vars = target[module] if not overwrite: missing_vars = missing_vars[lambda v: ~v.isin(cutout.data)] if missing_vars.empty: continue logger.info("Calculating and writing with module %s:", module) - missing_features: np.ndarray[Any, np.dtype[Any]] = missing_vars.index.unique( - "feature" - ) - ds: xr.Dataset = get_features( + missing_features = missing_vars.index.unique("feature") + ds = get_features( cutout, module, missing_features, @@ -306,7 +299,7 @@ def cutout_prepare( prepared |= set(missing_features) cutout.data.attrs.update({"prepared_features": list(prepared)}) - attrs: dict[str, Any] = non_bool_dict(cutout.data.attrs) + attrs = non_bool_dict(cutout.data.attrs) attrs.update(ds.attrs) if compression: @@ -315,16 +308,12 @@ def cutout_prepare( ds = cutout.data.merge(ds[missing_vars.values]).assign_attrs(**attrs) - directory: str - filename: str directory, filename = os.path.split(str(cutout.path)) - fd: int - tmp: str fd, tmp = mkstemp(suffix=filename, dir=directory) os.close(fd) logger.debug("Writing cutout to file...") - write_job: Any = ds.to_netcdf(tmp, compute=False) + write_job = ds.to_netcdf(tmp, compute=False) if show_progress: with ProgressBar(minimum=2): write_job.compute(**dask_kwargs) diff --git a/atlite/gis.py b/atlite/gis.py index 3e91a5fb..cd45f85f 100644 --- a/atlite/gis.py +++ b/atlite/gis.py @@ -1074,27 +1074,20 @@ def _reproject(src: NDArray, **kwargs: Any) -> NDArray: dtypes = {da.dtype for da in data_vars} assert len(dtypes) == 1, "regrid can only reproject datasets with homogeneous dtype" - return cast( - "xr.Dataset | xr.DataArray", - ( - xr - .apply_ufunc( - _reproject, - ds, - input_core_dims=[[namey, namex]], - output_core_dims=[["yout", "xout"]], - output_dtypes=[dtypes.pop()], - dask_gufunc_kwargs={ - "output_sizes": {"yout": dst_shape[0], "xout": dst_shape[1]} - }, - dask="parallelized", - kwargs=kwargs, - ) - .rename({"yout": namey, "xout": namex}) - .assign_coords(**{ - namey: (namey, dimy.data, ds.coords[namey].attrs), - namex: (namex, dimx.data, ds.coords[namex].attrs), - }) - .assign_attrs(**ds.attrs) - ), + result = xr.apply_ufunc( + _reproject, + ds, + input_core_dims=[[namey, namex]], + output_core_dims=[["yout", "xout"]], + output_dtypes=[dtypes.pop()], + dask_gufunc_kwargs={ + "output_sizes": {"yout": dst_shape[0], "xout": dst_shape[1]} + }, + dask="parallelized", + kwargs=kwargs, ) + result = result.rename({"yout": namey, "xout": namex}).assign_coords({ + namey: (namey, dimy.data, ds.coords[namey].attrs), + namex: (namex, dimx.data, ds.coords[namex].attrs), + }) + return cast("xr.Dataset | xr.DataArray", result.assign_attrs(**ds.attrs)) diff --git a/test/test_dynamic_line_rating.py b/test/test_dynamic_line_rating.py index 3532a415..e0e0b1ca 100644 --- a/test/test_dynamic_line_rating.py +++ b/test/test_dynamic_line_rating.py @@ -11,8 +11,10 @@ import numpy as np import pandas as pd +import pytest +from shapely.geometry import LineString, Point -from atlite.convert import convert_line_rating +from atlite.convert import convert_line_rating, line_azimuth_degrees def test_ieee_sample_case(): @@ -176,3 +178,19 @@ def func(psi): # check point reflection assert np.isclose(res.iloc[:19], res.iloc[:17:-1], atol=1e-10, rtol=1e-10).all() assert np.isclose(res.iloc[:19], res.iloc[18:], atol=1e-10, rtol=1e-10).all() + + +@pytest.mark.parametrize( + ("start", "end", "expected"), + [ + ((0.0, 0.0), (0.0, 1.0), 180.0), # N-pointing line + ((0.0, 0.0), (0.0, -1.0), 0.0), # S-pointing line + ((0.0, 0.0), (1.0, 0.0), -90.0), # E-pointing + ((0.0, 0.0), (-1.0, 0.0), 90.0), # W-pointing + ((0.0, 0.0), (1.0, 1.0), -135.0), # NE diagonal + ], +) +def test_line_azimuth_degrees(start, end, expected): + """`line_azimuth_degrees` returns degrees consistent with `convert_line_rating`'s `psi`.""" + shape = LineString([Point(*start), Point(*end)]) + assert np.isclose(line_azimuth_degrees(shape), expected)