diff --git a/.gitignore b/.gitignore index c67bdd8a..23e0ac9d 100644 --- a/.gitignore +++ b/.gitignore @@ -19,6 +19,7 @@ examples/*.nc examples/*.csv examples/*.zip examples/*.tif +benchmarks/ atlite/version.py paper .coverage* diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8b0a5b1c..0a810112 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -23,6 +23,13 @@ repos: # Run the formatter. - id: ruff-format +# Type checking with mypy +- repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.15.0 + hooks: + - id: mypy + additional_dependencies: ['types-PyYAML', 'types-requests'] + # Find common spelling mistakes in comments and docstrings - repo: https://github.com/codespell-project/codespell rev: v2.4.2 diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index 1583d28c..80c8e5e6 100755 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -11,12 +11,18 @@ Release Notes Upcoming Release ================ -.. warning:: - - The features listed below are not released yet, but will be part of the next release! - To use the features already you have to install the ``master`` branch, e.g. +.. warning:: + + The features listed below are not released yet, but will be part of the next release! + To use the features already you have to install the ``master`` branch, e.g. ``pip install git+https://github.com/pypsa/atlite``. +**Bug fixes** + +* Fix ``Cutout.line_rating`` passing line azimuth in radians while + ``convert_line_rating`` interpreted ``psi`` as degrees. Azimuths are now + computed in degrees, matching the documented unit. + `v0.6.1 `__ (21st April 2026) ======================================================================================= diff --git a/atlite/__init__.py b/atlite/__init__.py index d21aaf5c..03a78626 100644 --- a/atlite/__init__.py +++ b/atlite/__init__.py @@ -33,16 +33,16 @@ __version__ = version("atlite") # e.g. "0.17.0" # TODO, in the network structure it should use the dev version match = re.match(r"(\d+\.\d+(\.\d+)?)", __version__) -assert match, f"Could not determine release_version of pypsa: {__version__}" +assert match, f"Could not determine release_version of atlite: {__version__}" release_version = match.group(0) __all__ = [ - Cutout, - ExclusionContainer, - compute_indicatormatrix, - regrid, - cspinstallations, - solarpanels, - windturbines, - __version__, + "Cutout", + "ExclusionContainer", + "compute_indicatormatrix", + "regrid", + "cspinstallations", + "solarpanels", + "windturbines", + "__version__", ] diff --git a/atlite/_types.py b/atlite/_types.py new file mode 100644 index 00000000..123a007b --- /dev/null +++ b/atlite/_types.py @@ -0,0 +1,27 @@ +# SPDX-FileCopyrightText: Contributors to atlite +# +# SPDX-License-Identifier: MIT + +from __future__ import annotations + +from pathlib import Path +from typing import Any, Literal, TypeAlias + +import numpy as np +import xarray as xr +from pyproj import CRS + +NDArray: TypeAlias = np.ndarray[Any, np.dtype[np.floating[Any]]] +PathLike: TypeAlias = str | Path +CrsLike: TypeAlias = str | int | CRS | dict[str, Any] +ConvertResult: TypeAlias = ( + xr.DataArray | xr.Dataset | tuple[xr.DataArray | xr.Dataset, xr.DataArray] +) + +TrackingType: TypeAlias = Literal["horizontal", "tilted_horizontal", "vertical", "dual"] +ClearskyModel: TypeAlias = Literal["simple", "enhanced"] +TrigonModel: TypeAlias = Literal["simple", "perez"] +IrradiationType: TypeAlias = Literal["total", "direct", "diffuse", "ground"] +HeatPumpSource: TypeAlias = Literal["air", "soil"] +OrientationName: TypeAlias = Literal["latitude_optimal", "constant", "latitude"] +DataFormat: TypeAlias = Literal["grib", "netcdf"] diff --git a/atlite/aggregate.py b/atlite/aggregate.py index eb100260..cc14326f 100644 --- a/atlite/aggregate.py +++ b/atlite/aggregate.py @@ -1,35 +1,59 @@ # SPDX-FileCopyrightText: Contributors to atlite # # SPDX-License-Identifier: MIT -""" -Functions for aggregating results. -""" +"""Functions for aggregating results.""" +from __future__ import annotations + +from typing import TYPE_CHECKING, cast + +import dask import pandas as pd -import scipy.sparse as sp import xarray as xr -from dask.array.core import Array from atlite.utils import ensure_coords +if TYPE_CHECKING: + import pandas as pd + from scipy.sparse import spmatrix + def aggregate_matrix( - da: xr.DataArray, matrix: sp.csr_matrix, index: xr.Coordinates | pd.Index + da: xr.DataArray, + matrix: spmatrix, + index: xr.Coordinates | pd.Index, ) -> xr.DataArray: + """ + Aggregate spatial data with a sparse matrix. + + Parameters + ---------- + da : xarray.DataArray + DataArray with spatial dimensions ``y`` and ``x``. + matrix : scipy.sparse.spmatrix + Aggregation matrix mapping flattened spatial cells to ``index``. + index : xarray.Coordinates | pandas.Index + Index defining the aggregated dimension. + + Returns + ------- + xarray.DataArray + Aggregated data indexed by ``index`` and, if present, time. + """ coords = ensure_coords(index) - if isinstance(da.data, Array): + if isinstance(da.data, dask.array.core.Array): da = da.stack(spatial=("y", "x")) - da = da.chunk(dict(spatial=-1)) - return xr.apply_ufunc( + da = da.chunk({"spatial": -1}) + result = xr.apply_ufunc( lambda da: da * matrix.T, da, input_core_dims=[["spatial"]], output_core_dims=[list(coords.dims)], dask="parallelized", output_dtypes=[da.dtype], - dask_gufunc_kwargs=dict(output_sizes=coords.sizes), + dask_gufunc_kwargs={"output_sizes": coords.sizes}, ).assign_coords(coords) - else: - da = da.stack(spatial=("y", "x")).transpose("spatial", "time") - return xr.DataArray(matrix * da, coords.assign(time=da.coords["time"])) + return cast("xr.DataArray", result) + da = da.stack(spatial=("y", "x")).transpose("spatial", "time") + return xr.DataArray(matrix * da, [index, da.coords["time"]]) diff --git a/atlite/convert.py b/atlite/convert.py index fb626e9b..980c1137 100644 --- a/atlite/convert.py +++ b/atlite/convert.py @@ -1,19 +1,16 @@ # SPDX-FileCopyrightText: Contributors to atlite # # SPDX-License-Identifier: MIT -""" -All functions for converting weather data into energy system model data. -""" +"""All functions for converting weather data into energy system model data.""" from __future__ import annotations import datetime as dt import logging import warnings -from collections import namedtuple from operator import itemgetter from pathlib import Path -from typing import TYPE_CHECKING, Literal +from typing import TYPE_CHECKING, Any, Literal import geopandas as gpd import numpy as np @@ -45,34 +42,48 @@ logger = logging.getLogger(__name__) if TYPE_CHECKING: - from atlite.resource import TurbineConfig + from collections.abc import Callable + + from atlite._types import ( + ClearskyModel, + ConvertResult, + HeatPumpSource, + IrradiationType, + OrientationName, + TrackingType, + TrigonModel, + ) + from atlite.cutout import Cutout + from atlite.resource import CSPConfig, PanelConfig, TurbineConfig -def _aggregate_time(da: xr.DataArray, method: str | None) -> xr.DataArray: +def _aggregate_time( + da: xr.DataArray, method: Literal["sum", "mean"] | None +) -> xr.DataArray: if method == "sum": return da.sum("time", keep_attrs=True) - elif method == "mean": + if method == "mean": return da.mean("time", keep_attrs=True) return da def convert_and_aggregate( - cutout, - convert_func, - matrix=None, - index=None, - layout=None, - shapes=None, - shapes_crs=4326, - per_unit=False, - return_capacity=False, + cutout: Cutout, + convert_func: Callable[..., Any], + matrix: Any = None, + index: Any = None, + layout: Any = None, + shapes: Any = None, + shapes_crs: int = 4326, + per_unit: bool = False, + return_capacity: bool = False, aggregate_time: Literal["sum", "mean", "legacy"] | None = "legacy", - capacity_factor=False, - capacity_factor_timeseries=False, - show_progress=False, - dask_kwargs={}, - **convert_kwds, -): + capacity_factor: bool = False, + capacity_factor_timeseries: bool = False, + show_progress: bool = False, + dask_kwargs: dict[str, Any] | None = None, + **convert_kwds: Any, +) -> ConvertResult: """ Convert and aggregate a weather-based renewable generation time-series. @@ -82,6 +93,10 @@ def convert_and_aggregate( Parameters ---------- + cutout : atlite.Cutout + The cutout to process. + convert_func : callable + Callback like convert_wind, convert_pv. matrix : N x S - xr.DataArray or sp.sparse.csr_matrix or None If given, it is used to aggregate the grid cells to buses. N is the number of buses, S the number of spatial coordinates, in the @@ -118,11 +133,8 @@ def convert_and_aggregate( Whether to show a progress bar. dask_kwargs : dict, default {} Dict with keyword arguments passed to ``dask.compute``. - - Other Parameters - ---------------- - convert_func : Function - Callback like convert_wind, convert_pv + **convert_kwds : Any + Additional keyword arguments passed to ``convert_func``. Returns ------- @@ -151,6 +163,11 @@ def convert_and_aggregate( The installed units per bus in MW corresponding to ``layout`` (only if ``return_capacity`` is True). + Raises + ------ + ValueError + If deprecated parameters conflict or invalid arguments are provided. + See Also -------- wind : Generate wind generation time-series. @@ -194,9 +211,10 @@ def convert_and_aggregate( aggregate_time = None func_name = convert_func.__name__.replace("convert_", "") - logger.info(f"Convert and aggregate '{func_name}'.") + logger.info("Convert and aggregate '%s'.", func_name) da = convert_func(cutout.data, **convert_kwds) + dask_kwargs = dask_kwargs or {} no_args = all(v is None for v in [layout, shapes, matrix]) if no_args: @@ -206,7 +224,9 @@ def convert_and_aggregate( "given for `per_unit` or `return_capacity`" ) - agg = "sum" if aggregate_time == "legacy" else aggregate_time + agg: Literal["sum", "mean"] | None = ( + "sum" if aggregate_time == "legacy" else aggregate_time + ) res = _aggregate_time(da, agg) return maybe_progressbar(res, show_progress, **dask_kwargs) @@ -272,13 +292,28 @@ def convert_and_aggregate( if return_capacity: return maybe_progressbar(results, show_progress, **dask_kwargs), capacity - else: - return maybe_progressbar(results, show_progress, **dask_kwargs) + return maybe_progressbar(results, show_progress, **dask_kwargs) -def maybe_progressbar(ds, show_progress, **kwargs): +def maybe_progressbar( + ds: xr.Dataset | xr.DataArray, show_progress: bool, **kwargs: Any +) -> xr.Dataset | xr.DataArray: """ - Load a xr.dataset with dask arrays either with or without progressbar. + Load a dataset or data array, optionally showing a dask progress bar. + + Parameters + ---------- + ds : xr.Dataset or xr.DataArray + Object backed by dask arrays. + show_progress : bool + Whether to display a progress bar while loading. + **kwargs + Keyword arguments passed to ``load``. + + Returns + ------- + xr.Dataset or xr.DataArray + Loaded object. """ if show_progress: with ProgressBar(minimum=2): @@ -289,24 +324,63 @@ def maybe_progressbar(ds, show_progress, **kwargs): # temperature -def convert_temperature(ds): +def convert_temperature(ds: xr.Dataset) -> xr.DataArray: """ - Return outside temperature (useful for e.g. heat pump T-dependent - coefficient of performance). + Convert ambient air temperature from Kelvin to degrees Celsius. + + Parameters + ---------- + ds : xr.Dataset + Dataset containing ``temperature``. + + Returns + ------- + xr.DataArray + Ambient temperature in degrees Celsius. """ # Temperature is in Kelvin return ds["temperature"] - 273.15 -def temperature(cutout, **params): +def temperature(cutout: Cutout, **params: Any) -> ConvertResult: + """ + Return ambient air temperature converted from Kelvin to degrees Celsius. + + Parameters + ---------- + cutout : atlite.Cutout + The cutout to process. + **params : Any + Additional keyword arguments passed to + :py:func:`convert_and_aggregate`. + + Returns + ------- + xr.DataArray + Ambient temperature in °C. + + Note + ---- + This function also accepts all keyword arguments of + :py:func:`convert_and_aggregate`. + """ return cutout.convert_and_aggregate(convert_func=convert_temperature, **params) # soil temperature -def convert_soil_temperature(ds): +def convert_soil_temperature(ds: xr.Dataset) -> xr.DataArray: """ - Return soil temperature (useful for e.g. heat pump T-dependent coefficient - of performance). + Convert soil temperature from Kelvin to degrees Celsius. + + Parameters + ---------- + ds : xr.Dataset + Dataset containing ``soil temperature``. + + Returns + ------- + xr.DataArray + Soil temperature in degrees Celsius with missing values filled by zero. """ # Temperature is in Kelvin @@ -316,26 +390,108 @@ def convert_soil_temperature(ds): return (ds["soil temperature"] - 273.15).fillna(0.0) -def soil_temperature(cutout, **params): +def soil_temperature(cutout: Cutout, **params: Any) -> ConvertResult: + """ + Return soil temperature converted from Kelvin to degrees Celsius. + + Sea grid cells, where soil temperature is undefined, are filled with 0.0 + so they do not contribute during spatial aggregation. + + Parameters + ---------- + cutout : atlite.Cutout + The cutout to process. + **params : Any + Additional keyword arguments passed to + :py:func:`convert_and_aggregate`. + + Returns + ------- + xr.DataArray + Soil temperature in °C. + + Note + ---- + This function also accepts all keyword arguments of + :py:func:`convert_and_aggregate`. + """ return cutout.convert_and_aggregate(convert_func=convert_soil_temperature, **params) # dewpoint temperature -def convert_dewpoint_temperature(ds): +def convert_dewpoint_temperature(ds: xr.Dataset) -> xr.DataArray: """ - Return dewpoint temperature. + Convert dew point temperature from Kelvin to degrees Celsius. + + Parameters + ---------- + ds : xr.Dataset + Dataset containing ``dewpoint temperature``. + + Returns + ------- + xr.DataArray + Dew point temperature in degrees Celsius. """ # Temperature is in Kelvin return ds["dewpoint temperature"] - 273.15 -def dewpoint_temperature(cutout, **params): +def dewpoint_temperature(cutout: Cutout, **params: Any) -> ConvertResult: + """ + Return dew point temperature converted from Kelvin to degrees Celsius. + + Parameters + ---------- + cutout : atlite.Cutout + The cutout to process. + **params : Any + Additional keyword arguments passed to + :py:func:`convert_and_aggregate`. + + Returns + ------- + xr.DataArray + Dew point temperature in °C. + + Note + ---- + This function also accepts all keyword arguments of + :py:func:`convert_and_aggregate`. + """ return cutout.convert_and_aggregate( convert_func=convert_dewpoint_temperature, **params ) -def convert_coefficient_of_performance(ds, source, sink_T, c0, c1, c2): +def convert_coefficient_of_performance( + ds: xr.Dataset, + source: HeatPumpSource, + sink_T: float, + c0: float | None, + c1: float | None, + c2: float | None, +) -> xr.DataArray: + """ + Convert source temperatures to heat pump COP values. + + Parameters + ---------- + ds : xr.Dataset + Dataset containing the required temperature variables. + source : {"air", "soil"} + Heat source used for the heat pump. + sink_T : float + Sink temperature in degrees Celsius. + c0, c1, c2 : float or None + Quadratic regression coefficients. If ``None``, source-specific + defaults are used. + + Returns + ------- + xr.DataArray + Coefficient of performance for each time step and grid cell. + """ assert source in ["air", "soil"], NotImplementedError( "'source' must be one of ['air', 'soil']" ) @@ -363,29 +519,57 @@ def convert_coefficient_of_performance(ds, source, sink_T, c0, c1, c2): def coefficient_of_performance( - cutout, source="air", sink_T=55.0, c0=None, c1=None, c2=None, **params -): + cutout: Cutout, + source: HeatPumpSource = "air", + sink_T: float = 55.0, + c0: float | None = None, + c1: float | None = None, + c2: float | None = None, + **params: Any, +) -> ConvertResult: """ - Convert ambient or soil temperature to coefficient of performance (COP) of - air- or ground-sourced heat pumps. The COP is a function of temperature - difference from source to sink. The defaults for either source (c0, c1, c2) - are based on a quadratic regression in [1]. - - Paramterers - ----------- - source : str - The heat source. Can be 'air' or 'soil'. - sink_T : float - The temperature of the heat sink. - c0 : float - The constant regression coefficient for the temperature difference. - c1 : float - The linear regression coefficient for the temperature difference. - c2 : float - The quadratic regression coefficient for the temperature difference. - - Reference - --------- + Convert temperature to heat pump coefficient of performance (COP). + + The COP is modelled as a quadratic function of the temperature difference + ``dT = sink_T - source_T``: ``COP = c0 + c1 * dT + c2 * dT**2``. + + Parameters + ---------- + cutout : atlite.Cutout + The cutout to process. + source : {"air", "soil"} + Heat source type. Default coefficients per source: + + - ``"air"``: ``c0=6.81, c1=-0.121, c2=0.000630`` + - ``"soil"``: ``c0=8.77, c1=-0.150, c2=0.000734`` + sink_T : float, default 55.0 + Heat sink temperature in °C. + c0 : float or None + Constant regression coefficient. If ``None``, uses source default. + c1 : float or None + Linear regression coefficient. If ``None``, uses source default. + c2 : float or None + Quadratic regression coefficient. If ``None``, uses source default. + **params : Any + Additional keyword arguments passed to + :py:func:`convert_and_aggregate`. + + Returns + ------- + xr.DataArray + Coefficient of performance time-series (dimensionless). + + See Also + -------- + heat_demand : Compute heating degree-day demand. + + Note + ---- + This function also accepts all keyword arguments of + :py:func:`convert_and_aggregate`. + + References + ---------- [1] Staffell, Brett, Brandon, Hawkes, A review of domestic heat pumps, Energy & Environmental Science (2012), 5, 9291-9306, https://doi.org/10.1039/C2EE22653G. @@ -402,7 +586,39 @@ def coefficient_of_performance( # heat demand -def convert_heat_demand(ds, threshold, a, constant, hour_shift): +def convert_heat_demand( + ds: xr.Dataset, + threshold: float, + a: float, + constant: float, + hour_shift: float, +) -> xr.DataArray: + """ + Convert ambient temperature to daily heat demand by degree days. + + Parameters + ---------- + ds : xr.Dataset + Dataset containing ``temperature``. + threshold : float + Heating threshold temperature in degrees Celsius. + a : float + Linear scaling factor. + constant : float + Constant demand component added to the result. + hour_shift : float + Time shift in hours applied before daily averaging. + + Returns + ------- + xr.DataArray + Daily heat demand in degree-day-like units. + + Notes + ----- + The formula is ``max(0, a * (threshold - T_daily_mean)) + constant`` + where ``T_daily_mean`` is the daily-averaged temperature. + """ # Temperature is in Kelvin; take daily average T = ds["temperature"] T = T.assign_coords( @@ -418,48 +634,63 @@ def convert_heat_demand(ds, threshold, a, constant, hour_shift): return (constant + heat_demand).rename("heat_demand") -def heat_demand(cutout, threshold=15.0, a=1.0, constant=0.0, hour_shift=0.0, **params): +def heat_demand( + cutout: Cutout, + threshold: float = 15.0, + a: float = 1.0, + constant: float = 0.0, + hour_shift: float = 0.0, + **params: Any, +) -> ConvertResult: """ - Convert outside temperature into daily heat demand using the degree-day - approximation. + Convert outside temperature into daily heat demand using degree-day approximation. - Since "daily average temperature" means different things in - different time zones and since xarray coordinates do not handle - time zones gracefully like pd.DateTimeIndex, you can provide an - hour_shift to redefine when the day starts. + The formula is ``max(0, a * (threshold - T_daily_mean)) + constant`` + where ``T_daily_mean`` is the daily-averaged temperature. Output is in + degree-day-like units (scaled by *a*). - E.g. for Moscow in winter, hour_shift = 4, for New York in winter, - hour_shift = -5 - - This time shift applies across the entire spatial scope of ds for - all times. More fine-grained control will be built in a some - point, i.e. space- and time-dependent time zones. - - WARNING: Because the original data is provided every month, at the - month boundaries there is untidiness if you use a time shift. The - resulting xarray will have duplicates in the index for the parts - of the day in each month at the boundary. You will have to - re-average these based on the number of hours in each month for - the duplicated day. + Since "daily average temperature" means different things in different time + zones, you can provide *hour_shift* to redefine when the day starts. + E.g. for Moscow in winter ``hour_shift=4``, for New York ``hour_shift=-5``. + The shift applies uniformly across all grid cells and times. Parameters ---------- - threshold : float - Outside temperature in degrees Celsius above which there is no - heat demand. - a : float + cutout : atlite.Cutout + The cutout to process. + threshold : float, default 15.0 + Outside temperature in °C above which there is no heat demand. + a : float, default 1.0 Linear factor relating heat demand to outside temperature. - constant : float - Constant part of heat demand that does not depend on outside - temperature (e.g. due to water heating). - hour_shift : float - Time shift relative to UTC for taking daily average + constant : float, default 0.0 + Constant part of heat demand independent of outside temperature + (e.g. water heating). + hour_shift : float, default 0.0 + Time shift in hours relative to UTC for daily averaging. + **params : Any + Additional keyword arguments passed to + :py:func:`convert_and_aggregate`. + + Returns + ------- + xr.DataArray + Daily heat demand time-series in degree-day-like units. + + Warnings + -------- + Because the original data is provided per month, at month boundaries + there is untidiness when using a time shift. The resulting array will + have duplicate indices for parts of the day at each boundary. You must + re-average these based on the number of hours in each month. + + See Also + -------- + cooling_demand : Degree-day cooling demand. Note ---- - You can also specify all of the general conversion arguments - documented in the `convert_and_aggregate` function. - + This function also accepts all keyword arguments of + :py:func:`convert_and_aggregate`. """ return cutout.convert_and_aggregate( convert_func=convert_heat_demand, @@ -472,7 +703,39 @@ def heat_demand(cutout, threshold=15.0, a=1.0, constant=0.0, hour_shift=0.0, **p # cooling demand -def convert_cooling_demand(ds, threshold, a, constant, hour_shift): +def convert_cooling_demand( + ds: xr.Dataset, + threshold: float, + a: float, + constant: float, + hour_shift: float, +) -> xr.DataArray: + """ + Convert ambient temperature to daily cooling demand by degree days. + + Parameters + ---------- + ds : xr.Dataset + Dataset containing ``temperature``. + threshold : float + Cooling threshold temperature in degrees Celsius. + a : float + Linear scaling factor. + constant : float + Constant demand component added to the result. + hour_shift : float + Time shift in hours applied before daily averaging. + + Returns + ------- + xr.DataArray + Daily cooling demand in degree-day-like units. + + Notes + ----- + The formula is ``max(0, a * (T_daily_mean - threshold)) + constant`` + where ``T_daily_mean`` is the daily-averaged temperature. + """ # Temperature is in Kelvin; take daily average T = ds["temperature"] T = T.assign_coords( @@ -489,52 +752,64 @@ def convert_cooling_demand(ds, threshold, a, constant, hour_shift): def cooling_demand( - cutout, threshold=23.0, a=1.0, constant=0.0, hour_shift=0.0, **params -): + cutout: Cutout, + threshold: float = 23.0, + a: float = 1.0, + constant: float = 0.0, + hour_shift: float = 0.0, + **params: Any, +) -> ConvertResult: """ - Convert outside temperature into daily cooling demand using the degree-day - approximation. + Convert outside temperature into daily cooling demand using degree-day approximation. - Since "daily average temperature" means different things in - different time zones and since xarray coordinates do not handle - time zones gracefully like pd.DateTimeIndex, you can provide an - hour_shift to redefine when the day starts. + The formula is ``max(0, a * (T_daily_mean - threshold)) + constant`` + where ``T_daily_mean`` is the daily-averaged temperature. Output is in + degree-day-like units (scaled by *a*). - E.g. for Moscow in summer, hour_shift = 3, for New York in summer, - hour_shift = -4 - - This time shift applies across the entire spatial scope of ds for - all times. More fine-grained control will be built in a some - point, i.e. space- and time-dependent time zones. - - WARNING: Because the original data is provided every month, at the - month boundaries there is untidiness if you use a time shift. The - resulting xarray will have duplicates in the index for the parts - of the day in each month at the boundary. You will have to - re-average these based on the number of hours in each month for - the duplicated day. + Since "daily average temperature" means different things in different time + zones, you can provide *hour_shift* to redefine when the day starts. + E.g. for Moscow in summer ``hour_shift=3``, for New York ``hour_shift=-4``. + The shift applies uniformly across all grid cells and times. Parameters ---------- - threshold : float - Outside temperature in degrees Celsius below which there is no - cooling demand. The default 23C is taken as a more liberal - estimation following European computational practices - (e.g. UK Met Office and European commission take as thresholds - 22C and 24C, respectively) - a : float + cutout : atlite.Cutout + The cutout to process. + threshold : float, default 23.0 + Outside temperature in °C below which there is no cooling demand. + The default follows European computational practices (UK Met Office + uses 22 °C, European Commission uses 24 °C). + a : float, default 1.0 Linear factor relating cooling demand to outside temperature. - constant : float - Constant part of cooling demand that does not depend on outside - temperature (e.g. due to ventilation). - hour_shift : float - Time shift relative to UTC for taking daily average + constant : float, default 0.0 + Constant part of cooling demand independent of outside temperature + (e.g. ventilation). + hour_shift : float, default 0.0 + Time shift in hours relative to UTC for daily averaging. + **params : Any + Additional keyword arguments passed to + :py:func:`convert_and_aggregate`. + + Returns + ------- + xr.DataArray + Daily cooling demand time-series in degree-day-like units. + + Warnings + -------- + Because the original data is provided per month, at month boundaries + there is untidiness when using a time shift. The resulting array will + have duplicate indices for parts of the day at each boundary. You must + re-average these based on the number of hours in each month. + + See Also + -------- + heat_demand : Degree-day heating demand. Note ---- - You can also specify all of the general conversion arguments - documented in the `convert_and_aggregate` function. - + This function also accepts all keyword arguments of + :py:func:`convert_and_aggregate`. """ return cutout.convert_and_aggregate( convert_func=convert_cooling_demand, @@ -548,8 +823,42 @@ def cooling_demand( # solar thermal collectors def convert_solar_thermal( - ds, orientation, trigon_model, clearsky_model, c0, c1, t_store -): + ds: xr.Dataset, + orientation: Callable, + trigon_model: TrigonModel, + clearsky_model: ClearskyModel | None, + c0: float, + c1: float, + t_store: float, +) -> xr.DataArray: + """ + Convert weather data to solar thermal collector output. + + Parameters + ---------- + ds : xr.Dataset + Dataset containing radiation and temperature variables. + orientation : callable + Surface orientation callback. + trigon_model : str + Trigonometric irradiation model. + clearsky_model : str or None + Clear-sky model used for diffuse irradiation. + c0, c1 : float + Collector efficiency parameters. + t_store : float + Storage temperature in degrees Celsius. + + Returns + ------- + xr.DataArray + Specific solar thermal output in W/m². + + Notes + ----- + Collector efficiency is ``eta = c0 - c1 * (T_store - T_amb) / G`` where + *G* is the tilted irradiation. Output is ``max(0, G * eta)``. + """ # convert storage temperature to Kelvin in line with reanalysis data t_store += 273.15 @@ -573,48 +882,68 @@ def convert_solar_thermal( def solar_thermal( - cutout, - orientation={"slope": 45.0, "azimuth": 180.0}, - trigon_model="simple", - clearsky_model="simple", - c0=0.8, - c1=3.0, - t_store=80.0, - **params, -): + cutout: Cutout, + orientation: OrientationName | dict[str, float] | Callable | None = None, + trigon_model: TrigonModel = "simple", + clearsky_model: ClearskyModel = "simple", + c0: float = 0.8, + c1: float = 3.0, + t_store: float = 80.0, + **params: Any, +) -> ConvertResult: """ - Convert downward short-wave radiation flux and outside temperature into - time series for solar thermal collectors. + Convert radiation and temperature into solar thermal collector time series. - Mathematical model and defaults for c0, c1 based on model in [1]. + Collector efficiency is ``eta = c0 - c1 * (T_store - T_amb) / G``. + Mathematical model and defaults for *c0*, *c1* based on [1]. Parameters ---------- - cutout : cutout - orientation : dict or str or function - Panel orientation with slope and azimuth (units of degrees), or - 'latitude_optimal'. - trigon_model : str - Type of trigonometry model - clearsky_model : str or None - Type of clearsky model for diffuse irradiation. Either - 'simple' or 'enhanced'. - c0, c1 : float - Parameters for model in [1] (defaults to 0.8 and 3., respectively) - t_store : float - Store temperature in degree Celsius + cutout : atlite.Cutout + The cutout to process. + orientation : dict, str, or callable, optional + Panel orientation. A dict with ``'slope'`` and ``'azimuth'`` keys + in degrees, the string ``'latitude_optimal'``, or a callable with + the same signature as callbacks from + ``atlite.pv.orientation.make_*``. Default: ``{'slope': 45.0, + 'azimuth': 180.0}``. + trigon_model : {"simple", "perez"}, default "simple" + Trigonometric model for tilted irradiation decomposition. + clearsky_model : {"simple", "enhanced"} or None, default "simple" + Clear-sky model for diffuse irradiation. ``'enhanced'`` also uses + ambient temperature and relative humidity. + c0 : float, default 0.8 + Optical efficiency parameter. + c1 : float, default 3.0 + Thermal loss coefficient in W/(m² K). + t_store : float, default 80.0 + Storage temperature in °C. + **params : Any + Additional keyword arguments passed to + :py:func:`convert_and_aggregate`. + + Returns + ------- + xr.DataArray + Solar thermal generation time-series. + + See Also + -------- + pv : Photovoltaic generation. + irradiation : Tilted surface irradiation. Note ---- - You can also specify all of the general conversion arguments - documented in the `convert_and_aggregate` function. + This function also accepts all keyword arguments of + :py:func:`convert_and_aggregate`. References ---------- [1] Henning and Palzer, Renewable and Sustainable Energy Reviews 30 (2014) 1003-1018 - """ + if orientation is None: + orientation = {"slope": 45.0, "azimuth": 180.0} if not callable(orientation): orientation = get_orientation(orientation) @@ -635,9 +964,23 @@ def convert_wind( ds: xr.Dataset, turbine: TurbineConfig, interpolation_method: Literal["logarithmic", "power"], -) -> xr.DataArray: +) -> xr.Dataset | xr.DataArray: """ - Convert wind speeds for turbine to wind energy generation. + Convert wind speeds to turbine-specific generation. + + Parameters + ---------- + ds : xr.Dataset + Dataset containing wind speed data. + turbine : TurbineConfig + Turbine configuration with power curve and hub height. + interpolation_method : {"logarithmic", "power"} + Method used to extrapolate wind speed to hub height. + + Returns + ------- + xr.DataArray + Wind power output as specific yield per unit of installed capacity. """ V, POW, hub_height, P = itemgetter("V", "POW", "hub_height", "P")(turbine) @@ -656,20 +999,20 @@ def apply_power_curve(da): output_dtypes=[wnd_hub.dtype], dask="parallelized", ) + assert isinstance(da, xr.DataArray) da.attrs["units"] = "MWh/MWp" - da = da.rename("specific generation") - return da + return da.rename("specific generation") def wind( - cutout, - turbine: str | Path | dict, - smooth: bool | dict = False, + cutout: Cutout, + turbine: str | Path | dict[str, Any], + smooth: bool | dict[str, Any] = False, add_cutout_windspeed: bool = False, interpolation_method: Literal["logarithmic", "power"] = "logarithmic", - **params, -) -> xr.DataArray: + **params: Any, +) -> ConvertResult: """ Generate wind generation time-series. @@ -678,6 +1021,8 @@ def wind( Parameters ---------- + cutout : atlite.Cutout + The cutout to process. turbine : str or dict A turbineconfig dictionary with the keys 'hub_height' for the hub height and 'V', 'POW' defining the power curve. @@ -698,17 +1043,29 @@ def wind( interpolation_method : {"logarithmic", "power"} Law to interpolate wind speed to turbine hub height. Refer to :py:func:`atlite.wind.extrapolate_wind_speed`. + **params : Any + Additional keyword arguments passed to `convert_and_aggregate`. Returns ------- resource : xr.DataArray - Wind generation time-series. See :py:func:`convert_and_aggregate` - for the return value depending on the aggregation arguments. + Wind generation time-series. Without aggregation, values are capacity + factors (MWh/MWp). With aggregation and ``per_unit=False``, values are + in MW. See :py:func:`convert_and_aggregate` for details. + + See Also + -------- + pv : Photovoltaic generation. Note ---- - You can also specify all of the general conversion arguments - documented in the :py:func:`convert_and_aggregate` function. + This function also accepts all keyword arguments of + :py:func:`convert_and_aggregate`. + + References + ---------- + .. [1] Andresen G B, Søndergaard A A and Greiner M 2015 Energy 93, Part 1 + 1074 – 1088. doi:10.1016/j.energy.2015.09.071 Examples -------- @@ -725,20 +1082,17 @@ def wind( ('time', 'y', 'x') >>> location_cf = cf.sel(x=6.9, y=53.1, method="nearest") - References - ---------- - .. [1] Andresen G B, Søndergaard A A and Greiner M 2015 Energy 93, Part 1 - 1074 – 1088. doi:10.1016/j.energy.2015.09.071 - """ - turbine = get_windturbineconfig(turbine, add_cutout_windspeed=add_cutout_windspeed) + turbine_config = get_windturbineconfig( + turbine, add_cutout_windspeed=add_cutout_windspeed + ) if smooth: - turbine = windturbine_smooth(turbine, params=smooth) + turbine_config = windturbine_smooth(turbine_config, params=smooth) return cutout.convert_and_aggregate( convert_func=convert_wind, - turbine=turbine, + turbine=turbine_config, interpolation_method=interpolation_method, **params, ) @@ -746,16 +1100,39 @@ def wind( # irradiation def convert_irradiation( - ds, - orientation, - tracking=None, - irradiation="total", - trigon_model="simple", - clearsky_model="simple", -): + ds: xr.Dataset, + orientation: Callable, + tracking: TrackingType | None = None, + irradiation: IrradiationType = "total", + trigon_model: TrigonModel = "simple", + clearsky_model: ClearskyModel | None = "simple", +) -> xr.DataArray: + """ + Convert weather data to irradiation on a tilted surface. + + Parameters + ---------- + ds : xr.Dataset + Dataset containing radiation and meteorological variables. + orientation : callable + Surface orientation callback. + tracking : {None, "horizontal", "tilted_horizontal", "vertical", "dual"}, optional + Tracking mode of the surface. + irradiation : {"total", "direct", "diffuse", "ground"}, default "total" + Irradiation component to return. + trigon_model : str, default "simple" + Trigonometric irradiation model. + clearsky_model : str or None, default "simple" + Clear-sky model used for diffuse irradiation. + + Returns + ------- + xr.DataArray + Tilted surface irradiation in W/m². + """ solar_position = SolarPosition(ds) surface_orientation = SurfaceOrientation(ds, solar_position, orientation, tracking) - irradiation = TiltedIrradiation( + return TiltedIrradiation( ds, solar_position, surface_orientation, @@ -764,23 +1141,23 @@ def convert_irradiation( tracking=tracking, irradiation=irradiation, ) - return irradiation def irradiation( - cutout, - orientation, - irradiation="total", - tracking=None, - clearsky_model=None, - **params, -): + cutout: Cutout, + orientation: OrientationName | dict[str, float] | Callable, + irradiation: IrradiationType = "total", + tracking: TrackingType | None = None, + clearsky_model: ClearskyModel | None = None, + **params: Any, +) -> ConvertResult: """ - Calculate the total, direct, diffuse, or ground irradiation on a tilted - surface. + Calculate irradiation on a tilted surface. Parameters ---------- + cutout : atlite.Cutout + The cutout to process. orientation : str, dict or callback Panel orientation can be chosen from either 'latitude_optimal', a constant orientation {'slope': 0.0, @@ -805,23 +1182,32 @@ def irradiation( model. The default choice of None will choose dependending on data availability, since the 'enhanced' model also incorporates ambient air temperature and relative humidity. + **params : Any + Additional keyword arguments passed to `convert_and_aggregate`. Returns ------- irradiation : xr.DataArray - The desired irradiation quantity on the tilted surface. Defaults to - "total". + Irradiation on the tilted surface in W/m². + + See Also + -------- + pv : Photovoltaic generation. + solar_thermal : Solar thermal collector output. + + Notes + ----- + The ``trigon_model`` is fixed to ``'simple'`` internally. Note ---- - You can also specify all of the general conversion arguments - documented in the `convert_and_aggregate` function. + This function also accepts all keyword arguments of + :py:func:`convert_and_aggregate`. References ---------- [1] D.T. Reindl, W.A. Beckman, and J.A. Duffie. Diffuse fraction correla- tions. Solar Energy, 45(1):1 – 7, 1990. - """ if not callable(orientation): orientation = get_orientation(orientation) @@ -838,8 +1224,36 @@ def irradiation( # solar PV def convert_pv( - ds, panel, orientation, tracking, trigon_model="simple", clearsky_model="simple" -): + ds: xr.Dataset, + panel: dict[str, Any], + orientation: Callable, + tracking: TrackingType, + trigon_model: TrigonModel = "simple", + clearsky_model: ClearskyModel | None = "simple", +) -> xr.DataArray: + """ + Convert weather data to photovoltaic specific generation. + + Parameters + ---------- + ds : xr.Dataset + Dataset containing radiation and temperature variables. + panel : dict + Solar panel configuration. + orientation : callable + Surface orientation callback. + tracking : {None, "horizontal", "tilted_horizontal", "vertical", "dual"} + Tracking mode of the panel. + trigon_model : str, default "simple" + Trigonometric irradiation model. + clearsky_model : str or None, default "simple" + Clear-sky model used for diffuse irradiation. + + Returns + ------- + xr.DataArray + PV power output as capacity factors (unitless, 0–1). + """ solar_position = SolarPosition(ds) surface_orientation = SurfaceOrientation(ds, solar_position, orientation, tracking) irradiation = TiltedIrradiation( @@ -850,21 +1264,28 @@ def convert_pv( clearsky_model=clearsky_model, tracking=tracking, ) - solar_panel = SolarPanelModel(ds, irradiation, panel) - return solar_panel + return SolarPanelModel(ds, irradiation, panel) -def pv(cutout, panel, orientation, tracking=None, clearsky_model=None, **params): +def pv( + cutout: Cutout, + panel: str | PanelConfig, + orientation: OrientationName | dict[str, float] | Callable, + tracking: TrackingType | None = None, + clearsky_model: ClearskyModel | None = None, + **params: Any, +) -> ConvertResult: """ - Convert downward-shortwave, upward-shortwave radiation flux and ambient - temperature into a pv generation time-series. + Convert radiation and temperature into PV generation time series. Parameters ---------- + cutout : atlite.Cutout + The cutout to process. panel : str or dict Panel config dictionary with the parameters for the electrical - model in [3]. Alternatively, name of yaml file stored in - atlite.config.solarpanel_dir. + model in [3]. Alternatively, a name accepted by + :py:func:`atlite.resource.get_solarpanelconfig`. orientation : str, dict or callback Panel orientation can be chosen from either 'latitude_optimal', a constant orientation {'slope': 0.0, @@ -882,30 +1303,26 @@ def pv(cutout, panel, orientation, tracking=None, clearsky_model=None, **params) model. The default choice of None will choose dependending on data availability, since the 'enhanced' model also incorporates ambient air temperature and relative humidity. + **params : Any + Additional keyword arguments passed to `convert_and_aggregate`. Returns ------- pv : xr.DataArray - PV generation time-series. See :py:func:`convert_and_aggregate` - for the return value depending on the aggregation arguments. - - Note - ---- - You can also specify all of the general conversion arguments - documented in the :py:func:`convert_and_aggregate` function. + PV generation time-series. Without aggregation, values are capacity + factors (unitless, 0–1). With aggregation and ``per_unit=False``, + values are in MW. See :py:func:`convert_and_aggregate` for details. - Examples + See Also -------- - Aggregate PV generation to bus regions: - - >>> pv = cutout.pv(panel="CSi", orientation="latitude_optimal", - ... matrix=matrix, index=buses, per_unit=True) - - Get per-cell capacity factor time series (no aggregation): + wind : Wind generation. + irradiation : Tilted surface irradiation. + solar_thermal : Solar thermal collector output. - >>> cf = cutout.pv(panel="CSi", orientation="latitude_optimal", - ... aggregate_time=None) - >>> location_cf = cf.sel(x=6.9, y=53.1, method="nearest") + Note + ---- + This function also accepts all keyword arguments of + :py:func:`convert_and_aggregate`. References ---------- @@ -920,6 +1337,19 @@ def pv(cutout, panel, orientation, tracking=None, clearsky_model=None, **params) the Performance Check of Grid Connected Systems, Freiburg, June 2004. Eurosun (ISES Europe Solar Congress). + + Examples + -------- + Aggregate PV generation to bus regions: + + >>> pv = cutout.pv(panel="CSi", orientation="latitude_optimal", + ... matrix=matrix, index=buses, per_unit=True) + + Get per-cell capacity factor time series (no aggregation): + + >>> cf = cutout.pv(panel="CSi", orientation="latitude_optimal", + ... aggregate_time=None) + >>> location_cf = cf.sel(x=6.9, y=53.1, method="nearest") """ if isinstance(panel, (str | Path)): panel = get_solarpanelconfig(panel) @@ -938,6 +1368,26 @@ def pv(cutout, panel, orientation, tracking=None, clearsky_model=None, **params) # solar CSP def convert_csp(ds, installation): + """ + Convert direct solar radiation to CSP specific generation. + + Parameters + ---------- + ds : xr.Dataset + Dataset containing direct radiation variables. + installation : dict + CSP installation configuration. + + Returns + ------- + xr.DataArray + CSP output as specific yield (kWh/kW_ref), clipped to [0, 1]. + + Raises + ------ + ValueError + If the CSP technology option is not recognized. + """ solar_position = SolarPosition(ds) tech = installation["technology"] @@ -966,39 +1416,46 @@ def convert_csp(ds, installation): da = da.fillna(0.0) da.attrs["units"] = "kWh/kW_ref" - da = da.rename("specific generation") - - return da + return da.rename("specific generation") -def csp(cutout, installation, technology=None, **params): +def csp( + cutout: Cutout, + installation: str | CSPConfig, + technology: Literal["parabolic trough", "solar tower"] | None = None, + **params: Any, +) -> ConvertResult: """ - Convert downward shortwave direct radiation into a csp generation time- - series. + Convert direct radiation into CSP generation time series. Parameters ---------- - installation: str or xr.DataArray - CSP installation details determining the solar field efficiency dependent on - the local solar position. Can be either the name of one of the standard - installations provided through `atlite.cspinstallationsPanel` or an - xarray.DataArray with 'azimuth' (in rad) and 'altitude' (in rad) coordinates - and an 'efficiency' (in p.u.) entry. - technology: str - Overwrite CSP technology from the installation configuration. The technology - affects which direct radiation is considered. Either 'parabolic trough' (DHI) - or 'solar tower' (DNI). + cutout : atlite.Cutout + The cutout to process. + installation : str or xr.DataArray + CSP installation details determining the solar field efficiency + dependent on the local solar position. Can be a name accepted by + :py:func:`atlite.resource.get_cspinstallationconfig` or an + ``xr.DataArray`` with ``'azimuth'`` (rad) and ``'altitude'`` (rad) + coordinates and an ``'efficiency'`` (p.u.) entry. + technology : {"parabolic trough", "solar tower"} or None + Overwrite CSP technology from the installation configuration. + ``'parabolic trough'`` uses direct horizontal irradiance (DHI), + ``'solar tower'`` uses direct normal irradiance (DNI). + **params + Additional keyword arguments passed to `convert_and_aggregate`. Returns ------- csp : xr.DataArray - Time-series or capacity factors based on additional general - conversion arguments. + CSP generation time-series in specific yield (kWh/kW_ref), clipped + to [0, 1]. See :py:func:`convert_and_aggregate` for details on + aggregation behaviour. Note ---- - You can also specify all of the general conversion arguments - documented in the `convert_and_aggregate` function. + This function also accepts all keyword arguments of + :py:func:`convert_and_aggregate`. References ---------- @@ -1026,6 +1483,21 @@ def csp(cutout, installation, technology=None, **params): # hydro def convert_runoff(ds, weight_with_height=True): + """ + Convert runoff data, optionally weighting by surface height. + + Parameters + ---------- + ds : xr.Dataset + Dataset containing ``runoff`` and, if needed, ``height``. + weight_with_height : bool, default True + Whether to weight runoff by terrain height. + + Returns + ------- + xr.DataArray + Runoff field, optionally weighted by surface height. + """ runoff = ds["runoff"] if weight_with_height: @@ -1041,12 +1513,47 @@ def runoff( normalize_using_yearly=None, **params, ): + """ + Compute aggregated surface runoff with optional smoothing and normalization. + + Parameters + ---------- + cutout : atlite.Cutout + Cutout providing weather data with runoff variables. + smooth : bool or int, optional + If ``True``, apply a rolling mean with the default window of + ``24 * 7`` time steps. If an integer, use it as the rolling window + size. Default ``None`` (no smoothing). + lower_threshold_quantile : bool or float, optional + If ``True``, use the default quantile ``5e-3``. If a float, set + values below that quantile to zero. Default ``None`` (no + thresholding). + normalize_using_yearly : pd.Series, optional + Annual country totals used to scale ``countries``-indexed results over + overlapping full years. One factor per country is derived from the + summed reference values across the overlap. + **params + Additional keyword arguments passed to ``convert_and_aggregate()``, + including ``weight_with_height`` for the underlying runoff + conversion. + + Returns + ------- + xr.DataArray or tuple[xr.DataArray, xr.DataArray] + Runoff output from ``convert_and_aggregate``. Smoothing also supports + the tuple return form used with ``return_capacity=True``. Thresholding + and normalization are only supported for ``xr.DataArray`` results. + + See Also + -------- + convert_and_aggregate : General conversion/aggregation arguments. + """ result = cutout.convert_and_aggregate(convert_func=convert_runoff, **params) if smooth is not None: if smooth is True: smooth = 24 * 7 - if "return_capacity" in params.keys(): + if "return_capacity" in params: result = result[0].rolling(time=smooth, min_periods=1).mean(), result[1] else: result = result.rolling(time=smooth, min_periods=1).mean() @@ -1067,7 +1574,8 @@ def runoff( normalize_using_yearly_i = normalize_using_yearly_i.astype(int) years = ( - pd.Series(pd.to_datetime(result.coords["time"].values).year) + pd + .Series(pd.to_datetime(result.coords["time"].values).year) .value_counts() .loc[lambda x: x > 8700] .index.intersection(normalize_using_yearly_i) @@ -1094,11 +1602,12 @@ def hydro( **kwargs, ): """ - Compute inflow time-series for `plants` by aggregating over catchment - basins from `hydrobasins` + Compute inflow time series for plants by aggregating over catchment basins. Parameters ---------- + cutout : atlite.Cutout + The cutout to process. plants : pd.DataFrame Run-of-river plants or dams with lon, lat columns. hydrobasins : str|gpd.GeoDataFrame @@ -1111,6 +1620,13 @@ def hydro( better for coarser resolution). show_progress : bool Whether to display progressbars. + **kwargs + Additional keyword arguments passed to `convert_and_aggregate`. + + Returns + ------- + xr.DataArray + Inflow time-series for each plant. References ---------- @@ -1142,7 +1658,7 @@ def hydro( # The hydrological parameters are in units of "m of water per day" and so # they should be multiplied by 1000 and the basin area to convert to m3 # d-1 = m3 h-1 / 24 - runoff *= xr.DataArray(basins.shapes.to_crs(dict(proj="cea")).area) + runoff *= xr.DataArray(basins.shapes.to_crs({"proj": "cea"}).area) return hydrom.shift_and_aggregate_runoff_for_plants( basins, runoff, flowspeed, show_progress @@ -1153,45 +1669,44 @@ def convert_line_rating( ds, psi, R, D=0.028, Ts=373, epsilon=0.6, alpha=0.6, per_unit=False ): """ - Convert the cutout data to dynamic line rating time series. + Convert weather data to dynamic line rating time series. The formulation is based on: - - [1]“IEEE Std 738™-2012 (Revision of IEEE Std 738-2006/Incorporates IEEE Std + [1] "IEEE Std 738™-2012 (Revision of IEEE Std 738-2006/Incorporates IEEE Std 738-2012/Cor 1-2013), IEEE Standard for Calculating the Current-Temperature - Relationship of Bare Overhead Conductors,” p. 72. - - The following simplifications/assumptions were made: - 1. Wind speed are taken at height 100 meters above ground. However, ironmen - and transmission lines are typically at 50-60 meters. - 2. Solar heat influx is set proportionally to solar short wave influx. - 3. The incidence angle of the solar heat influx is assumed to be 90 degree. + Relationship of Bare Overhead Conductors," p. 72. + Simplifications: + 1. Wind speed is taken at 100 m above ground, whereas transmission lines are + typically at 50-60 m. + 2. Solar heat influx is set proportional to solar short wave influx. + 3. Incidence angle of the solar heat influx is assumed to be 90°. Parameters ---------- ds : xr.Dataset - Subset of the cutout data including all weather cells overlapping with - the line. - psi : int/float - Azimuth angle of the line in degree, that is the incidence angle of the line - with a pointer directing north (90 is east, 180 is south, 270 is west). + Dataset for the cells intersecting a line. + psi : float + Line azimuth in degrees clockwise from north. R : float - Resistance of the conductor in [Ω/m] at maximally allowed temperature Ts. - D : float, - Conductor diameter. - Ts : float - Maximally allowed surface temperature (typically 100°C). - epsilon : float - Conductor emissivity. - alpha : float - Conductor absorptivity. + Conductor resistance in Ω/m at temperature *Ts*. + D : float, default 0.028 + Conductor diameter in meters. + Ts : float, default 373 + Maximum conductor surface temperature in Kelvin. + epsilon : float, default 0.6 + Conductor emissivity (dimensionless). + alpha : float, default 0.6 + Conductor absorptivity (dimensionless). + per_unit : bool, default False + Unused compatibility parameter. Returns ------- - Imax - xr.DataArray giving the maximal current capacity per timestep in Ampere. - + xr.DataArray or numpy.ndarray + Maximum current per time step in ampere. When *ds* is an + ``xr.Dataset`` the result is aggregated across intersecting cells + via ``.min("spatial")``. """ Ta = ds["temperature"] Tfilm = (Ta + Ts) / 2 @@ -1238,13 +1753,13 @@ def convert_line_rating( A = D * 1 # projected area of conductor in square meters if isinstance(ds, dict): - Position = namedtuple("solarposition", ["altitude", "azimuth"]) - solar_position = Position(ds["solar_altitude"], ds["solar_azimuth"]) + altitude = ds["solar_altitude"] + azimuth = ds["solar_azimuth"] else: - solar_position = SolarPosition(ds) - Phi_s = arccos( - cos(solar_position.altitude) * cos((solar_position.azimuth) - radians(psi)) - ) + sp = SolarPosition(ds) + altitude = sp["altitude"] + azimuth = sp["azimuth"] + Phi_s = arccos(cos(altitude) * cos(azimuth - radians(psi))) qs = alpha * Q * A * sin(Phi_s) @@ -1252,8 +1767,27 @@ def convert_line_rating( return Imax.min("spatial") if isinstance(Imax, xr.DataArray) else Imax +def line_azimuth_degrees(shape: Any) -> float: + """ + Return the line azimuth in degrees, measured clockwise from north. + + Parameters + ---------- + shape : shapely.geometry.base.BaseGeometry + Line geometry with at least two coordinates. + + Returns + ------- + float + Azimuth in degrees in the range ``(-180, 180]``. + """ + coords = np.array(shape.coords) + start, end = coords[0], coords[-1] + return float(np.degrees(np.arctan2(start[0] - end[0], start[1] - end[1]))) + + def line_rating( - cutout, shapes, line_resistance, show_progress=False, dask_kwargs={}, **params + cutout, shapes, line_resistance, show_progress=False, dask_kwargs=None, **params ): """ Create a dynamic line rating time series based on the IEEE-738 standard. @@ -1274,6 +1808,7 @@ def line_rating( Parameters ---------- cutout : atlite.Cutout + The cutout to process. shapes : geopandas.GeoSeries Line shapes of the lines. line_resistance : float/series @@ -1283,17 +1818,28 @@ def line_rating( Whether to show a progress bar. dask_kwargs : dict, default {} Dict with keyword arguments passed to `dask.compute`. - params : keyword arguments as float/series - Arguments to tweak/modify the line rating calculations based on [1]. - Defaults are: - * D : 0.028 (conductor diameter) - * Ts : 373 (maximally allowed surface temperature) - * epsilon : 0.6 (conductor emissivity) - * alpha : 0.6 (conductor absorptivity) + D : float, default 0.028 + Conductor diameter in meters. + Ts : float, default 373 + Maximum allowed conductor surface temperature in Kelvin. + epsilon : float, default 0.6 + Conductor emissivity (dimensionless). + alpha : float, default 0.6 + Conductor absorptivity (dimensionless). + **params : Any + Additional keyword arguments passed to + :py:func:`convert_line_rating`. Returns ------- - Current thermal limit timeseries with dimensions time x lines in Ampere. + xr.DataArray + Thermal current limit time-series with dimensions + ``(time, lines)`` in ampere. + + Note + ---- + This function also accepts all keyword arguments of + :py:func:`convert_and_aggregate`. Example ------- @@ -1322,6 +1868,8 @@ def line_rating( >>> s = np.sqrt(3) * i * v / 1e3 # in MW """ + if dask_kwargs is None: + dask_kwargs = {} if not isinstance(shapes, gpd.GeoSeries): shapes = gpd.GeoSeries(shapes).rename_axis("dim_0") @@ -1330,14 +1878,8 @@ def line_rating( data = cutout.data.stack(spatial=["y", "x"]) - def get_azimuth(shape): - coords = np.array(shape.coords) - start = coords[0] - end = coords[-1] - return np.arctan2(start[0] - end[0], start[1] - end[1]) - - azimuth = shapes.apply(get_azimuth) - azimuth = azimuth.where(azimuth >= 0, azimuth + np.pi) + azimuth = shapes.apply(line_azimuth_degrees) + azimuth = azimuth.where(azimuth >= 0, azimuth + 180.0) params.setdefault("D", 0.028) params.setdefault("Ts", 373) @@ -1360,8 +1902,8 @@ def get_azimuth(shape): res.append(dummy) if show_progress: with ProgressBar(minimum=2): - res = compute(res, **dask_kwargs) + (computed,) = compute(res, **dask_kwargs) else: - res = compute(res, **dask_kwargs) + (computed,) = compute(res, **dask_kwargs) - return xr.concat(*res, dim=df.index).assign_attrs(units="A") + return xr.concat(computed, dim=df.index).assign_attrs(units="A") diff --git a/atlite/csp.py b/atlite/csp.py index df23a0ad..4c24a8f4 100644 --- a/atlite/csp.py +++ b/atlite/csp.py @@ -1,21 +1,36 @@ # SPDX-FileCopyrightText: Contributors to atlite # # SPDX-License-Identifier: MIT -""" -Functions for use in conjunction with csp data generation. -""" +"""Functions for use in conjunction with csp data generation.""" + +from __future__ import annotations import logging +from typing import TYPE_CHECKING, Literal, TypeAlias import numpy as np +import xarray as xr from dask.array import radians, sin from atlite.pv.solar_position import SolarPosition +if TYPE_CHECKING: + from dask.array import Array + logger = logging.getLogger(__name__) +NDArray: TypeAlias = np.ndarray +DataArray: TypeAlias = xr.DataArray +Dataset: TypeAlias = xr.Dataset +CSPTechnology = Literal["parabolic trough", "solar tower"] +FieldOrientation = Literal["horizontal", "tilted", "single-axis", "two-axis"] -def calculate_dni(ds, solar_position=None, altitude_threshold=3.75): + +def calculate_dni( + ds: Dataset, + solar_position: Dataset | None = None, + altitude_threshold: float = 3.75, +) -> DataArray: """ Calculate DNI on a perpendicular plane. @@ -24,35 +39,35 @@ def calculate_dni(ds, solar_position=None, altitude_threshold=3.75): Parameters ---------- - ds : xarray.Dataset + ds : xr.Dataset Dataset containing the direct influx (influx_direct) into a horizontal plane. - solar_position : xarray.Dataset (optional) - solar_position containing a solar 'altitude' (in rad, 0 to pi/2) for the 'ds' dataset. - Is calculated using atlite.pv.SolarPosition if omitted. - altitude_threshold : float (default: 3.75 degrees) + solar_position : xr.Dataset | None + Dataset containing solar altitude (in rad, 0 to pi/2) for the input dataset. + Calculated using atlite.pv.SolarPosition if not provided. + altitude_threshold : float Threshold for solar altitude in degrees. Values in range (0, altitude_threshold] - will be set to the altitude_threshold to avoid numerical issues when dividing by - the sine of very low solar altitude. - The default values '3.75 deg' corresponds to - the solar altitude traversed by the sun within about 15 minutes in a location with - maximum solar altitude of 60 deg and 10h day time. + are set to altitude_threshold to prevent numerical issues when dividing by + the sine of very low solar altitude (sunset / dawn). Default: 3.75 degrees + corresponds to the solar altitude traversed by the sun within about 15 minutes + in a location with 60 deg maximum solar altitude and 10 h day time. + + Returns + ------- + xr.DataArray + Direct Normal Irradiance (DNI) in W/m^2 on a plane perpendicular to solar rays. """ if solar_position is None: solar_position = SolarPosition(ds) - # solar altitude expected in rad, convert degrees (easier to specifcy) to match - altitude_threshold = radians(altitude_threshold) + altitude_threshold_rad: Array = radians(altitude_threshold) - # Sanitation of altitude values: - # Prevent high calculated DNI values during low solar altitudes (sunset / dawn) - # where sin() results in a very low denominator in the DNI calculation - altitude = solar_position["altitude"] + altitude: DataArray = solar_position["altitude"] altitude = altitude.where(lambda x: x > 0, np.nan) - altitude = altitude.where(lambda x: x > altitude_threshold, altitude_threshold) + altitude = altitude.where( + lambda x: x > altitude_threshold_rad, altitude_threshold_rad + ) - # Calculate DNI and remove NaNs introduced during altitude sanitation - # DNI is determined either by dividing by cos(azimuth) or sin(altitude) - dni = ds["influx_direct"] / sin(altitude) + dni: DataArray = ds["influx_direct"] / sin(altitude) return dni diff --git a/atlite/cutout.py b/atlite/cutout.py index db747869..1ed5451f 100644 --- a/atlite/cutout.py +++ b/atlite/cutout.py @@ -1,9 +1,7 @@ # SPDX-FileCopyrightText: Contributors to atlite # # SPDX-License-Identifier: MIT -""" -Base class for atlite. -""" +"""Base class for atlite.""" # There is a binary incompatibility between the pip wheels of netCDF4 and # rasterio, which leads to the first one to work correctly while the second @@ -14,9 +12,12 @@ # https://github.com/pydata/xarray/issues/2535, # https://github.com/rasterio/rasterio-wheels/issues/12 +from __future__ import annotations + import logging from pathlib import Path from tempfile import mktemp +from typing import TYPE_CHECKING, Any, cast from warnings import warn import geopandas as gpd @@ -28,6 +29,18 @@ from pyproj import CRS from shapely.geometry import box +if TYPE_CHECKING: + from collections.abc import Sequence + + import scipy.sparse as sp + from shapely.geometry.base import BaseGeometry + + from atlite._types import ( + CrsLike, + NDArray, + PathLike, + ) + from atlite.convert import ( coefficient_of_performance, convert_and_aggregate, @@ -66,7 +79,7 @@ class Cutout: functionalities. """ - def __init__(self, path, **cutoutparams): + def __init__(self, path: PathLike, **cutoutparams: Any) -> None: """ Provide an atlite cutout object. @@ -118,6 +131,8 @@ def __init__(self, path, **cutoutparams): data : xr.Dataset User provided cutout data. Save the cutout using `Cutout.to_file()` afterwards. + **cutoutparams + Additional keyword arguments. See Other Parameters below. Other Parameters ---------------- @@ -131,13 +146,20 @@ def __init__(self, path, **cutoutparams): Whether to interpolate NaN's in the SARAH data. This takes effect for sarah data which has missing data for areas where dawn and nightfall happens (ca. 30 min gap). - gebco_path: str + gebco_path : str Path to find the gebco NetCDF file. Only necessary when including the gebco module. parallel : bool, default False Whether to open dataset in parallel mode. Take effect for all xr.open_mfdataset usages. + Raises + ------ + TypeError + If required arguments are missing when building a new cutout. + ValueError + If ``bounds`` has an invalid format. + """ path = Path(path).with_suffix(".nc") chunks = cutoutparams.pop("chunks", {"time": 100}) @@ -155,12 +177,13 @@ def __init__(self, path, **cutoutparams): if cutoutparams: warn( f"Arguments {', '.join(cutoutparams)} are ignored, since " - "cutout is already built." + "cutout is already built.", + stacklevel=2, ) elif "data" in cutoutparams: data = cutoutparams.pop("data") else: - logger.info(f"Building new cutout {path}") + logger.info("Building new cutout %s", path) if "bounds" in cutoutparams: bounds = cutoutparams.pop("bounds") @@ -200,47 +223,38 @@ def __init__(self, path, **cutoutparams): } data = xr.Dataset(coords=coords, attrs=attrs) - # Check compatibility of CRS - modules = atleast_1d(data.attrs.get("module")) - crs = set(CRS(datamodules[m].crs) for m in modules) + module_attr = data.attrs.get("module") + assert module_attr is not None, "Cutout data missing 'module' attribute" + modules = atleast_1d(module_attr) + crs = {CRS(datamodules[m].crs) for m in modules} assert len(crs) == 1, f"CRS of {module} not compatible" self.path = path self.data = data @property - def name(self): - """ - Name of the cutout. - """ + def name(self) -> str: + """Name of the cutout.""" return self.path.stem @property - def module(self): - """ - Data module of the cutout. - """ - return self.data.attrs.get("module") + def module(self) -> str | list[str]: + """Data module of the cutout.""" + return cast("str | list[str]", self.data.attrs["module"]) @property - def crs(self): - """ - Coordinate Reference System of the cutout. - """ + def crs(self) -> CRS: + """Coordinate Reference System of the cutout.""" return CRS(datamodules[atleast_1d(self.module)[0]].crs) @property - def available_features(self): - """ - List of available weather data features for the cutout. - """ + def available_features(self) -> pd.Index: + """List of available weather data features for the cutout.""" return available_features(self.module) @property - def chunks(self): - """ - Chunking of the cutout data used by dask. - """ + def chunks(self) -> dict[str, int] | None: + """Chunking of the cutout data used by dask.""" chunks = { k.lstrip("chunksize_"): v for k, v in self.data.attrs.items() @@ -249,42 +263,35 @@ def chunks(self): return None if chunks == {} else chunks @property - def coords(self): - """ - Geographic coordinates of the cutout. - """ + def coords(self) -> xr.Coordinates: + """Geographic coordinates of the cutout.""" return self.data.coords @property - def shape(self): - """ - Size of spatial dimensions (y, x) of the cutout data. - """ + def shape(self) -> tuple[int, int]: + """Size of spatial dimensions (y, x) of the cutout data.""" return len(self.coords["y"]), len(self.coords["x"]) @property - def extent(self): - """ - Total extent of the area covered by the cutout (x, X, y, Y). - """ + def extent(self) -> NDArray: + """Total extent of the area covered by the cutout (x, X, y, Y).""" xs, ys = self.coords["x"].values, self.coords["y"].values dx, dy = self.dx, self.dy - return np.array( - [xs[0] - dx / 2, xs[-1] + dx / 2, ys[0] - dy / 2, ys[-1] + dy / 2] - ) + return np.array([ + xs[0] - dx / 2, + xs[-1] + dx / 2, + ys[0] - dy / 2, + ys[-1] + dy / 2, + ]) @property - def bounds(self): - """ - Total bounds of the area covered by the cutout (x, y, X, Y). - """ + def bounds(self) -> NDArray: + """Total bounds of the area covered by the cutout (x, y, X, Y).""" return self.extent[[0, 2, 1, 3]] @property - def transform(self): - """ - Get the affine transform of the cutout. - """ + def transform(self) -> rio.Affine: + """Get the affine transform of the cutout.""" return rio.Affine( self.dx, 0, @@ -295,10 +302,8 @@ def transform(self): ) @property - def transform_r(self): - """ - Get the affine transform of the cutout with reverse y-order. - """ + def transform_r(self) -> rio.Affine: + """Get the affine transform of the cutout with reverse y-order.""" return rio.Affine( self.dx, 0, @@ -309,42 +314,32 @@ def transform_r(self): ) @property - def dx(self): - """ - Spatial resolution on the x coordinates. - """ + def dx(self) -> float: + """Spatial resolution on the x coordinates.""" x = self.coords["x"] - return round((x[-1] - x[0]).item() / (x.size - 1), 8) + return float(round((x[-1] - x[0]).item() / (x.size - 1), 8)) @property - def dy(self): - """ - Spatial resolution on the y coordinates. - """ + def dy(self) -> float: + """Spatial resolution on the y coordinates.""" y = self.coords["y"] - return round((y[-1] - y[0]).item() / (y.size - 1), 8) + return round((y[-1] - y[0]).item() / (y.size - 1), 8) # type: ignore[no-any-return] @property - def dt(self): - """ - Time resolution of the cutout. - """ - return pd.infer_freq(self.coords["time"].to_index()) + def dt(self) -> str | None: + """Time resolution of the cutout.""" + return pd.infer_freq(self.coords["time"].to_index()) # type: ignore[no-any-return] @property - def prepared(self): - """ - Boolean indicating whether all available features are prepared. - """ - return self.prepared_features.sort_index().equals( + def prepared(self) -> bool: + """Boolean indicating whether all available features are prepared.""" + return self.prepared_features.sort_index().equals( # type: ignore[no-any-return] self.available_features.sort_index() ) @property - def prepared_features(self): - """ - Get the list of prepared features in the cutout. - """ + def prepared_features(self) -> pd.Series[Any]: + """Get the list of prepared features in the cutout.""" index = [ (self.data[v].attrs["module"], self.data[v].attrs["feature"]) for v in self.data @@ -353,7 +348,7 @@ def prepared_features(self): return pd.Series(list(self.data), index, dtype=object) @CachedAttribute - def grid(self): + def grid(self) -> gpd.GeoDataFrame: """ Cutout grid with coordinates and geometries. @@ -375,7 +370,13 @@ def grid(self): crs=self.crs, ) - def sel(self, path=None, bounds=None, buffer=0, **kwargs): + def sel( + self, + path: PathLike | None = None, + bounds: Sequence[float] | None = None, + buffer: float = 0, + **kwargs: Any, + ) -> Cutout: """ Select parts of the cutout. @@ -383,7 +384,7 @@ def sel(self, path=None, bounds=None, buffer=0, **kwargs): ---------- path : str | path-like File where to store the sub-cutout. Defaults to a temporary file. - bounds : GeoSeries.bounds | DataFrame, optional + bounds : gpd.GeoSeries.bounds | DataFrame, optional The outer bounds of the cutout or as a DataFrame containing (min.long, min.lat, max.long, max.lat). buffer : float, optional @@ -407,12 +408,15 @@ def sel(self, path=None, bounds=None, buffer=0, **kwargs): if bounds is not None: if buffer > 0: bounds = box(*bounds).buffer(buffer).bounds + assert bounds is not None x1, y1, x2, y2 = bounds kwargs.update(x=slice(x1, x2), y=slice(y1, y2)) data = self.data.sel(**kwargs) return Cutout(path, data=data) - def merge(self, other, path=None, **kwargs): + def merge( + self, other: Cutout, path: PathLike | None = None, **kwargs: Any + ) -> Cutout: """ Merge two cutouts into a single cutout. @@ -450,7 +454,7 @@ def merge(self, other, path=None, **kwargs): return Cutout(path, data=data) - def to_file(self, fn=None): + def to_file(self, fn: PathLike | None = None) -> None: """ Save cutout to a NetCDF file. @@ -464,7 +468,14 @@ def to_file(self, fn=None): fn = self.path self.data.to_netcdf(fn) - def __repr__(self): + def __repr__(self) -> str: + """Return string representation of the cutout. + + Returns + ------- + str + Human-readable summary of the cutout. + """ start = np.datetime_as_string(self.coords["time"].values[0], unit="D") end = np.datetime_as_string(self.coords["time"].values[-1], unit="D") return ( @@ -489,7 +500,9 @@ def __repr__(self): ) ) - def indicatormatrix(self, shapes, shapes_crs=4326): + def indicatormatrix( + self, shapes: Sequence[BaseGeometry], shapes_crs: CrsLike = 4326 + ) -> sp.lil_matrix | sp.csr_matrix: """ Compute the indicatormatrix. @@ -505,6 +518,9 @@ def indicatormatrix(self, shapes, shapes_crs=4326): Parameters ---------- shapes : Collection of shapely polygons + Shapes to compute the indicator matrix for. + shapes_crs : int or CRS, default 4326 + CRS of the shapes. Returns ------- @@ -514,7 +530,9 @@ def indicatormatrix(self, shapes, shapes_crs=4326): """ return compute_indicatormatrix(self.grid, shapes, self.crs, shapes_crs) - def intersectionmatrix(self, shapes, shapes_crs=4326): + def intersectionmatrix( + self, shapes: Sequence[BaseGeometry], shapes_crs: CrsLike = 4326 + ) -> sp.lil_matrix | sp.csr_matrix: """ Compute the intersectionmatrix. @@ -525,8 +543,10 @@ def intersectionmatrix(self, shapes, shapes_crs=4326): Parameters ---------- - orig : Collection of shapely polygons - dest : Collection of shapely polygons + shapes : Collection of shapely polygons + Shapes to compute the intersection matrix for. + shapes_crs : int or CRS, default 4326 + CRS of the shapes. Returns ------- @@ -536,7 +556,7 @@ def intersectionmatrix(self, shapes, shapes_crs=4326): """ return compute_intersectionmatrix(self.grid, shapes, self.crs, shapes_crs) - def area(self, crs=None): + def area(self, crs: CrsLike | None = None) -> xr.DataArray: """ Get the area per grid cell as a DataArray with coords (x,y). @@ -561,13 +581,20 @@ def area(self, crs=None): [self.coords["y"], self.coords["x"]], ) - def uniform_layout(self): + def uniform_layout(self) -> xr.DataArray: """ Get a uniform capacity layout for all grid cells. + + Returns + ------- + xr.DataArray + Layout with value 1 for all grid cells. """ return xr.DataArray(1, [self.coords["y"], self.coords["x"]]) - def uniform_density_layout(self, capacity_density, crs=None): + def uniform_density_layout( + self, capacity_density: float, crs: CrsLike | None = None + ) -> xr.DataArray: """ Get a capacity layout from a uniform capacity density. @@ -588,14 +615,18 @@ def uniform_density_layout(self, capacity_density, crs=None): """ return capacity_density * self.area(crs) - def equals(self, other): + def equals(self, other: Any) -> bool: """ - It overrides xarray.Dataset.equals and ignores the path attribute in the comparison + Compare equality with another cutout, ignoring the path attribute. + + Returns + ------- + bool + Whether the two cutouts are equal. """ if not isinstance(other, Cutout): - return NotImplemented - # Compare cutouts data attributes - return self.data.equals(other.data) + return NotImplemented # type: ignore[no-any-return] + return bool(self.data.equals(other.data)) def layout_from_capacity_list(self, data, col="Capacity"): """ @@ -632,7 +663,6 @@ def layout_from_capacity_list(self, data, col="Capacity"): >>> pv.plot() """ - x_grid = self.data.x.values y_grid = self.data.y.values diff --git a/atlite/data.py b/atlite/data.py index fece0cf6..2064a8dd 100644 --- a/atlite/data.py +++ b/atlite/data.py @@ -1,9 +1,9 @@ # SPDX-FileCopyrightText: Contributors to atlite # # SPDX-License-Identifier: MIT -""" -Management of data retrieval and structure. -""" +"""Management of data retrieval and structure.""" + +from __future__ import annotations import logging import os @@ -11,33 +11,60 @@ from pathlib import Path from shutil import rmtree from tempfile import mkdtemp, mkstemp +from typing import TYPE_CHECKING, Any import pandas as pd import xarray as xr -from dask import compute, delayed +from dask import compute as dask_compute +from dask import delayed from dask.diagnostics import ProgressBar from dask.utils import SerializableLock from numpy import atleast_1d from atlite.datasets import modules as datamodules +if TYPE_CHECKING: + from collections.abc import Callable, Iterable, Sequence + + from atlite._types import DataFormat, PathLike + from atlite.cutout import Cutout + logger = logging.getLogger(__name__) def get_features( - cutout, - module, - features, - data_format, - tmpdir=None, - monthly_requests=False, - concurrent_requests=False, -): + cutout: Cutout, + module: str, + features: Iterable[str], + data_format: DataFormat, + tmpdir: PathLike | None = None, + monthly_requests: bool = False, + concurrent_requests: bool = False, +) -> xr.Dataset: """ - Load the feature data for a given module. + Load feature datasets for a cutout module. - This get the data for a set of features from a module. All modules - in `atlite.datasets` are allowed. + Parameters + ---------- + cutout : atlite.Cutout + Cutout for which data is retrieved. + module : str + Name of the dataset module. + features : iterable of str + Feature names to retrieve from the module. + data_format : str + Data format requested from the dataset backend. + tmpdir : str or pathlib.Path, optional + Directory for intermediate files. + monthly_requests : bool, optional + Whether to split requests into monthly chunks. + concurrent_requests : bool, optional + Whether to issue monthly requests concurrently. + + Returns + ------- + xarray.Dataset + Merged dataset containing the requested features. """ parameters = cutout.data.attrs lock = SerializableLock() @@ -57,9 +84,9 @@ def get_features( ) datasets.append(feature_data) - datasets = compute(*datasets) + datasets = dask_compute(*datasets) - ds = xr.merge(datasets, compat="equals") + ds: xr.Dataset = xr.merge(datasets, compat="equals") for v in ds: da = ds[v] da.attrs["module"] = module @@ -67,13 +94,12 @@ def get_features( da.attrs["feature"] = [k for k, l in fd if v in l].pop() if da.chunks is not None: - chunksizes = [c[0] for c in da.chunks] - da.encoding["chunksizes"] = chunksizes + da.encoding["chunksizes"] = [c[0] for c in da.chunks] return ds -def available_features(module=None): +def available_features(module: str | Sequence[str] | None = None) -> pd.Series[str]: """ Inspect the available features of all or a selection of modules. @@ -92,32 +118,58 @@ def available_features(module=None): """ features = {name: m.features for name, m in datamodules.items()} - features = ( - pd.DataFrame(features) + features_frame = ( + pd + .DataFrame(features) .unstack() .dropna() .rename_axis(index=["module", "feature"]) .rename("variables") ) if module is not None: - features = features.reindex(atleast_1d(module), level="module") - return features.explode() + features_frame = features_frame.reindex(atleast_1d(module), level="module") + return features_frame.explode() -def non_bool_dict(d): +def non_bool_dict(d: dict[str, Any]) -> dict[str, Any]: """ - Convert bool to int for netCDF4 storing. + Convert boolean dictionary values to integers. + + Parameters + ---------- + d : dict + Dictionary to convert. + + Returns + ------- + dict + Dictionary with boolean values replaced by ``0`` or ``1``. """ return {k: v if not isinstance(v, bool) else int(v) for k, v in d.items()} -def maybe_remove_tmpdir(func): - """Use this wrapper to make tempfile deletion compatible with windows machines.""" +def maybe_remove_tmpdir( + func: Callable[..., Any], +) -> Callable[..., Any]: + """ + Wrap a function to manage a temporary directory. + + Parameters + ---------- + func : callable + Function accepting a ``tmpdir`` keyword argument. + + Returns + ------- + callable + Wrapped function that creates and removes a temporary directory when + ``tmpdir`` is not provided. + """ @wraps(func) - def wrapper(*args, **kwargs): - if kwargs.get("tmpdir", None): - res = func(*args, **kwargs) + def wrapper(*args: Any, **kwargs: Any) -> Any: + if kwargs.get("tmpdir"): + res: Any = func(*args, **kwargs) else: kwargs["tmpdir"] = mkdtemp() try: @@ -131,17 +183,17 @@ def wrapper(*args, **kwargs): @maybe_remove_tmpdir def cutout_prepare( - cutout, - features=None, - tmpdir=None, - data_format="grib", - overwrite=False, - compression={"zlib": True, "complevel": 9, "shuffle": True}, - show_progress=False, - dask_kwargs=None, - monthly_requests=False, - concurrent_requests=False, -): + cutout: Cutout, + features: str | Sequence[str] | None = None, + tmpdir: PathLike | None = None, + data_format: DataFormat = "grib", + overwrite: bool = False, + compression: dict[str, Any] | None = None, + show_progress: bool = False, + dask_kwargs: dict[str, Any] | None = None, + monthly_requests: bool = False, + concurrent_requests: bool = False, +) -> Cutout: """ Prepare all or a selection of features in a cutout. @@ -156,6 +208,7 @@ def cutout_prepare( Parameters ---------- cutout : atlite.Cutout + The cutout to process. features : str/list, optional Feature(s) to be prepared. The default slice(None) results in all available features. @@ -195,29 +248,36 @@ def cutout_prepare( Raises ------ - NotADirectoryError - The argument `tmpdir` is not a valid path. + ValueError + If ``tmpdir`` is None. + FileNotFoundError + If ``tmpdir`` does not exist. """ if dask_kwargs is None: dask_kwargs = {} + if compression is None: + compression = {"zlib": True, "complevel": 9, "shuffle": True} + if cutout.prepared and not overwrite: logger.info("Cutout already prepared.") return cutout - # ensure that the tmpdir actually exists + if tmpdir is None: + raise ValueError("tmpdir cannot be None") temp_dir_path = Path(tmpdir) if not temp_dir_path.is_dir(): raise FileNotFoundError(f"The tmpdir: {temp_dir_path} does not exist.") - logger.info(f"Storing temporary files in {tmpdir}") + logger.info("Storing temporary files in %s", tmpdir) - modules = atleast_1d(cutout.module) - features = atleast_1d(features) if features else slice(None) + modules_list = atleast_1d(cutout.module).tolist() + features_normalized = atleast_1d(features) if features else slice(None) prepared = set(atleast_1d(cutout.data.attrs["prepared_features"])) - # target is series of all available variables for given module and features - target = available_features(modules).loc[:, features].drop_duplicates() + target = ( + available_features(modules_list).loc[:, features_normalized].drop_duplicates() + ) for module in target.index.unique("module"): missing_vars = target[module] @@ -225,39 +285,34 @@ def cutout_prepare( missing_vars = missing_vars[lambda v: ~v.isin(cutout.data)] if missing_vars.empty: continue - logger.info(f"Calculating and writing with module {module}:") + logger.info("Calculating and writing with module %s:", module) missing_features = missing_vars.index.unique("feature") ds = get_features( cutout, module, missing_features, - tmpdir=tmpdir, data_format=data_format, + tmpdir=tmpdir, monthly_requests=monthly_requests, concurrent_requests=concurrent_requests, ) prepared |= set(missing_features) - cutout.data.attrs.update(dict(prepared_features=list(prepared))) + cutout.data.attrs.update({"prepared_features": list(prepared)}) attrs = non_bool_dict(cutout.data.attrs) attrs.update(ds.attrs) - # Add optional compression to the newly prepared features if compression: for v in missing_vars: ds[v].encoding.update(compression) ds = cutout.data.merge(ds[missing_vars.values]).assign_attrs(**attrs) - # write data to tmp file, copy it to original data, this is much safer - # than appending variables directory, filename = os.path.split(str(cutout.path)) fd, tmp = mkstemp(suffix=filename, dir=directory) os.close(fd) logger.debug("Writing cutout to file...") - # Delayed writing for large cutout - # cf. https://stackoverflow.com/questions/69810367/python-how-to-write-large-netcdf-with-xarray write_job = ds.to_netcdf(tmp, compute=False) if show_progress: with ProgressBar(minimum=2): @@ -267,7 +322,7 @@ def cutout_prepare( if cutout.path.exists(): cutout.data.close() cutout.path.unlink() - os.rename(tmp, cutout.path) + Path(tmp).rename(cutout.path) cutout.data = xr.open_dataset(cutout.path, chunks=cutout.chunks) diff --git a/atlite/datasets/__init__.py b/atlite/datasets/__init__.py index 045c59d8..69dd418d 100644 --- a/atlite/datasets/__init__.py +++ b/atlite/datasets/__init__.py @@ -2,9 +2,7 @@ # # SPDX-License-Identifier: MIT -""" -atlite datasets. -""" +"""atlite datasets.""" from atlite.datasets import era5, gebco, sarah diff --git a/atlite/datasets/cordex.py b/atlite/datasets/cordex.py index 0e95b4f2..cbf34133 100644 --- a/atlite/datasets/cordex.py +++ b/atlite/datasets/cordex.py @@ -2,8 +2,7 @@ # # SPDX-License-Identifier: MIT """ -Module containing specific operations for creating cutouts from the CORDEX -dataset. +Module for creating cutouts from the CORDEX dataset. DEPRECATED ---------- @@ -12,14 +11,24 @@ for the time being! """ +from __future__ import annotations + import glob import os from itertools import groupby from operator import itemgetter +from typing import TYPE_CHECKING, Any import pandas as pd import xarray as xr +if TYPE_CHECKING: + from collections.abc import Generator + + import numpy as np + + from atlite._types import PathLike + # Model and CRS Settings model = "MPI-M-MPI-ESM-LR" @@ -29,16 +38,59 @@ # RotProj(dict(proj='ob_tran', o_proj='latlong', lon_0=180, o_lon_p=-162, o_lat_p=39.25)) -def rename_and_clean_coords(ds): +def rename_and_clean_coords(ds: xr.Dataset) -> xr.Dataset: + """Rename rotated coordinates and drop auxiliary variables. + + Parameters + ---------- + ds : xr.Dataset + CORDEX dataset with rotated lon/lat coordinates. + + Returns + ------- + xr.Dataset + Dataset with ``rlon``/``rlat`` renamed to ``x``/``y`` and + ``bnds``, ``height``, ``rotated_pole`` removed if present. + """ ds = ds.rename({"rlon": "x", "rlat": "y"}) - # drop some coordinates and variables we do not use - ds = ds.drop( + return ds.drop( (set(ds.coords) | set(ds.data_vars)) & {"bnds", "height", "rotated_pole"} ) - return ds -def prepare_data_cordex(fn, year, months, oldname, newname, xs, ys): +def prepare_data_cordex( + fn: PathLike, + year: int, + months: list[int], + oldname: str, + newname: str, + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], +) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: + """Load and prepare time-varying CORDEX data, yielding per-month slices. + + Parameters + ---------- + fn : PathLike + Path to the NetCDF file. + year : int + Year to extract. + months : list of int + Months to extract. + oldname : str + Original variable name in the dataset. + newname : str + Target variable name after renaming. + xs : slice or np.ndarray + Spatial selection along x. + ys : slice or np.ndarray + Spatial selection along y. + + Yields + ------ + tuple of ((int, int), xr.Dataset) + ``(year, month)`` key and the corresponding monthly dataset slice. + """ with xr.open_dataset(fn) as ds: ds = rename_and_clean_coords(ds) ds = ds.rename({oldname: newname}) @@ -60,7 +112,39 @@ def prepare_data_cordex(fn, year, months, oldname, newname, xs, ys): yield (year, m), ds.sel(time=f"{year}-{m}") -def prepare_static_data_cordex(fn, year, months, oldname, newname, xs, ys): +def prepare_static_data_cordex( + fn: PathLike, + year: int, + months: list[int], + oldname: str, + newname: str, + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], +) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: + """Load and prepare static (time-invariant) CORDEX data. + + Parameters + ---------- + fn : PathLike + Path to the NetCDF file. + year : int + Year key for the yielded tuples. + months : list of int + Months to yield entries for. + oldname : str + Original variable name in the dataset. + newname : str + Target variable name after renaming. + xs : slice or np.ndarray + Spatial selection along x. + ys : slice or np.ndarray + Spatial selection along y. + + Yields + ------ + tuple of ((int, int), xr.Dataset) + ``(year, month)`` key and the static dataset (same for each month). + """ with xr.open_dataset(fn) as ds: ds = rename_and_clean_coords(ds) ds = ds.rename({oldname: newname}) @@ -70,7 +154,39 @@ def prepare_static_data_cordex(fn, year, months, oldname, newname, xs, ys): yield (year, m), ds -def prepare_weather_types_cordex(fn, year, months, oldname, newname, xs, ys): +def prepare_weather_types_cordex( + fn: PathLike, + year: int, + months: list[int], + oldname: str, + newname: str, + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], +) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: + """Load and prepare CORDEX weather type classification data. + + Parameters + ---------- + fn : PathLike + Path to the NetCDF file. + year : int + Year to extract. + months : list of int + Months to extract. + oldname : str + Original variable name in the dataset. + newname : str + Target variable name after renaming. + xs : slice or np.ndarray + Unused, kept for interface consistency. + ys : slice or np.ndarray + Unused, kept for interface consistency. + + Yields + ------ + tuple of ((int, int), xr.Dataset) + ``(year, month)`` key and the corresponding monthly dataset slice. + """ with xr.open_dataset(fn) as ds: ds = ds.rename({oldname: newname}) for m in months: @@ -78,9 +194,42 @@ def prepare_weather_types_cordex(fn, year, months, oldname, newname, xs, ys): def prepare_meta_cordex( - xs, ys, year, month, template, height_config, module, model=model -): - fn = next(glob.iglob(template.format(year=year, model=model))) + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], + year: int, + month: int, + template: str, + height_config: dict[str, Any], + module: Any, + model: str = "MPI-M-MPI-ESM-LR", +) -> xr.Dataset: + """Build metadata dataset for a CORDEX cutout including height. + + Parameters + ---------- + xs : slice or np.ndarray + Spatial selection along x. + ys : slice or np.ndarray + Spatial selection along y. + year : int + Reference year. + month : int + Reference month. + template : str + Glob template for locating NetCDF files. + height_config : dict + Configuration for height data retrieval. + module : Any + Dataset module reference. + model : str, optional + Climate model identifier. + + Returns + ------- + xr.Dataset + Coordinate metadata dataset with height variable. + """ + fn = next(glob.iglob(template.format(year=year, model=model))) # noqa: PTH207 with xr.open_dataset(fn) as ds: ds = rename_and_clean_coords(ds) ds = ds.coords.to_dataset() @@ -103,150 +252,141 @@ def prepare_meta_cordex( def tasks_yearly_cordex( - xs, ys, yearmonths, prepare_func, template, oldname, newname, meta_attrs -): + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], + yearmonths: list[tuple[int, int]], + prepare_func: Any, + template: str, + oldname: str, + newname: str, + meta_attrs: dict[str, Any], +) -> list[dict[str, Any]]: + """Create yearly preparation task dicts for CORDEX data retrieval. + + Parameters + ---------- + xs : slice or np.ndarray + Spatial selection along x. + ys : slice or np.ndarray + Spatial selection along y. + yearmonths : list of (int, int) + ``(year, month)`` pairs to process. + prepare_func : callable + Function to call for data preparation. + template : str + Glob template for locating NetCDF files. + oldname : str + Original variable name in the dataset. + newname : str + Target variable name after renaming. + meta_attrs : dict + Cutout metadata attributes; must contain ``"model"`` key. + + Returns + ------- + list of dict + One task dict per year with keys needed by ``prepare_func``. + """ model = meta_attrs["model"] if not isinstance(xs, slice): - first, second, last = xs.values[[0, 1, -1]] + first, second, last = xs[[0, 1, -1]] xs = slice(first - 0.1 * (second - first), last + 0.1 * (second - first)) if not isinstance(ys, slice): - first, second, last = ys.values[[0, 1, -1]] + first, second, last = ys[[0, 1, -1]] ys = slice(first - 0.1 * (second - first), last + 0.1 * (second - first)) return [ - dict( - prepare_func=prepare_func, - xs=xs, - ys=ys, - oldname=oldname, - newname=newname, - fn=next(glob.iglob(template.format(year=year, model=model))), - year=year, - months=list(map(itemgetter(1), yearmonths)), - ) + { + "prepare_func": prepare_func, + "xs": xs, + "ys": ys, + "oldname": oldname, + "newname": newname, + "fn": next(glob.iglob(template.format(year=year, model=model))), # noqa: PTH207 + "year": year, + "months": list(map(itemgetter(1), yearmonths)), + } for year, yearmonths in groupby(yearmonths, itemgetter(0)) ] -weather_data_config = { - "influx": dict( - tasks_func=tasks_yearly_cordex, - prepare_func=prepare_data_cordex, - oldname="rsds", - newname="influx", - template=os.path.join( - config.cordex_dir, # noqa: F821 - "{model}", - "influx", - "rsds_*_{year}*.nc", - ), - ), - "outflux": dict( - tasks_func=tasks_yearly_cordex, - prepare_func=prepare_data_cordex, - oldname="rsus", - newname="outflux", - template=os.path.join( - config.cordex_dir, # noqa: F821 - "{model}", - "outflux", - "rsus_*_{year}*.nc", - ), - ), - "temperature": dict( - tasks_func=tasks_yearly_cordex, - prepare_func=prepare_data_cordex, - oldname="tas", - newname="temperature", - template=os.path.join( - config.cordex_dir, # noqa: F821 - "{model}", - "temperature", - "tas_*_{year}*.nc", - ), - ), - "humidity": dict( - tasks_func=tasks_yearly_cordex, - prepare_func=prepare_data_cordex, - oldname="hurs", - newname="humidity", - template=os.path.join( - config.cordex_dir, # noqa: F821 - "{model}", - "humidity", - "hurs_*_{year}*.nc", - ), - ), - "wnd10m": dict( - tasks_func=tasks_yearly_cordex, - prepare_func=prepare_data_cordex, - oldname="sfcWind", - newname="wnd10m", - template=os.path.join( - config.cordex_dir, # noqa: F821 - "{model}", - "wind", - "sfcWind_*_{year}*.nc", - ), - ), - "roughness": dict( - tasks_func=tasks_yearly_cordex, - prepare_func=prepare_static_data_cordex, - oldname="rlst", - newname="roughness", - template=os.path.join( - config.cordex_dir, # noqa: F821 - "{model}", - "roughness", - "rlst_*.nc", - ), - ), - "runoff": dict( - tasks_func=tasks_yearly_cordex, - prepare_func=prepare_data_cordex, - oldname="mrro", - newname="runoff", - template=os.path.join( - config.cordex_dir, # noqa: F821 - "{model}", - "runoff", - "mrro_*_{year}*.nc", +cordex_dir = os.environ["ATLITE_CORDEX_DIR"] + +weather_data_config: dict[str, dict[str, Any]] = { + "influx": { + "tasks_func": tasks_yearly_cordex, + "prepare_func": prepare_data_cordex, + "oldname": "rsds", + "newname": "influx", + "template": os.path.join(cordex_dir, "{model}", "influx", "rsds_*_{year}*.nc"), + }, + "outflux": { + "tasks_func": tasks_yearly_cordex, + "prepare_func": prepare_data_cordex, + "oldname": "rsus", + "newname": "outflux", + "template": os.path.join(cordex_dir, "{model}", "outflux", "rsus_*_{year}*.nc"), + }, + "temperature": { + "tasks_func": tasks_yearly_cordex, + "prepare_func": prepare_data_cordex, + "oldname": "tas", + "newname": "temperature", + "template": os.path.join( + cordex_dir, "{model}", "temperature", "tas_*_{year}*.nc" ), - ), - "height": dict( - tasks_func=tasks_yearly_cordex, - prepare_func=prepare_static_data_cordex, - oldname="orog", - newname="height", - template=os.path.join( - config.cordex_dir, # noqa: F821 - "{model}", - "altitude", - "orog_*.nc", + }, + "humidity": { + "tasks_func": tasks_yearly_cordex, + "prepare_func": prepare_data_cordex, + "oldname": "hurs", + "newname": "humidity", + "template": os.path.join( + cordex_dir, "{model}", "humidity", "hurs_*_{year}*.nc" ), - ), - "CWT": dict( - tasks_func=tasks_yearly_cordex, - prepare_func=prepare_weather_types_cordex, - oldname="CWT", - newname="CWT", - template=os.path.join( - config.cordex_dir, # noqa: F821 - "{model}", - "weather_types", - "CWT_*_{year}*.nc", + }, + "wnd10m": { + "tasks_func": tasks_yearly_cordex, + "prepare_func": prepare_data_cordex, + "oldname": "sfcWind", + "newname": "wnd10m", + "template": os.path.join(cordex_dir, "{model}", "wind", "sfcWind_*_{year}*.nc"), + }, + "roughness": { + "tasks_func": tasks_yearly_cordex, + "prepare_func": prepare_static_data_cordex, + "oldname": "rlst", + "newname": "roughness", + "template": os.path.join(cordex_dir, "{model}", "roughness", "rlst_*.nc"), + }, + "runoff": { + "tasks_func": tasks_yearly_cordex, + "prepare_func": prepare_data_cordex, + "oldname": "mrro", + "newname": "runoff", + "template": os.path.join(cordex_dir, "{model}", "runoff", "mrro_*_{year}*.nc"), + }, + "height": { + "tasks_func": tasks_yearly_cordex, + "prepare_func": prepare_static_data_cordex, + "oldname": "orog", + "newname": "height", + "template": os.path.join(cordex_dir, "{model}", "altitude", "orog_*.nc"), + }, + "CWT": { + "tasks_func": tasks_yearly_cordex, + "prepare_func": prepare_weather_types_cordex, + "oldname": "CWT", + "newname": "CWT", + "template": os.path.join( + cordex_dir, "{model}", "weather_types", "CWT_*_{year}*.nc" ), - ), + }, } -meta_data_config = dict( - prepare_func=prepare_meta_cordex, - template=os.path.join( - config.cordex_dir, # noqa: F821 - "{model}", - "temperature", - "tas_*_{year}*.nc", - ), - height_config=weather_data_config["height"], -) +meta_data_config: dict[str, Any] = { + "prepare_func": prepare_meta_cordex, + "template": os.path.join(cordex_dir, "{model}", "temperature", "tas_*_{year}*.nc"), + "height_config": weather_data_config["height"], +} diff --git a/atlite/datasets/era5.py b/atlite/datasets/era5.py index f2f957a7..adafdb3b 100644 --- a/atlite/datasets/era5.py +++ b/atlite/datasets/era5.py @@ -8,12 +8,15 @@ https://confluence.ecmwf.int/display/CKB/ERA5%3A+data+documentation """ +from __future__ import annotations + import logging import os import warnings import weakref from pathlib import Path from tempfile import mkstemp +from typing import TYPE_CHECKING, Any, Literal, cast import cdsapi import numpy as np @@ -21,12 +24,19 @@ import xarray as xr from dask import compute, delayed from dask.array import arctan2, sqrt -from dask.utils import SerializableLock from numpy import atleast_1d from atlite.gis import maybe_swap_spatial_dims from atlite.pv.solar_position import SolarPosition +if TYPE_CHECKING: + from collections.abc import Callable + from contextlib import AbstractContextManager + + from dask.utils import SerializableLock + + from atlite._types import PathLike + # Null context for running a with statements wihout any context try: from contextlib import nullcontext @@ -34,8 +44,8 @@ # for Python verions < 3.7: import contextlib - @contextlib.contextmanager - def nullcontext(): + @contextlib.contextmanager # type: ignore[no-redef] + def nullcontext(): # noqa: D103 yield @@ -62,48 +72,79 @@ def nullcontext(): static_features = {"height"} -def _add_height(ds): +def _add_height(ds: xr.Dataset) -> xr.Dataset: """ - Convert geopotential 'z' to geopotential height following [1]. + Convert geopotential to height and replace the 'z' variable. + + Parameters + ---------- + ds : xr.Dataset + Dataset containing geopotential variable 'z'. + + Returns + ------- + xr.Dataset + Dataset with 'height' variable in meters, 'z' removed. References ---------- [1] ERA5: surface elevation and orography, retrieved: 10.02.2019 https://confluence.ecmwf.int/display/CKB/ERA5%3A+surface+elevation+and+orography - """ g0 = 9.80665 z = ds["z"] if "time" in z.coords: z = z.isel(time=0, drop=True) ds["height"] = z / g0 - ds = ds.drop_vars("z") - return ds + return ds.drop_vars("z") -def _rename_and_clean_coords(ds, add_lon_lat=True): +def _rename_and_clean_coords(ds: xr.Dataset, add_lon_lat: bool = True) -> xr.Dataset: """ - Rename 'longitude' and 'latitude' columns to 'x' and 'y' and fix roundings. + Standardize coordinate names and clean up auxiliary variables. + + Renames longitude/latitude/valid_time to x/y/time, rounds spatial + coordinates, and drops 'expver'/'number' if present. + + Parameters + ---------- + ds : xr.Dataset + Raw ERA5 dataset with original coordinate names. + add_lon_lat : bool, optional + Whether to add 'lon'/'lat' as coordinate aliases. Default True. - Optionally (add_lon_lat, default:True) preserves latitude and - longitude columns as 'lat' and 'lon'. + Returns + ------- + xr.Dataset + Dataset with standardized coordinates. """ ds = ds.rename({"longitude": "x", "latitude": "y", "valid_time": "time"}) # round coords since cds coords are float32 which would lead to mismatches ds = ds.assign_coords( x=np.round(ds.x.astype(float), 5), y=np.round(ds.y.astype(float), 5) ) - ds = maybe_swap_spatial_dims(ds) + ds = cast("xr.Dataset", maybe_swap_spatial_dims(ds)) if add_lon_lat: ds = ds.assign_coords(lon=ds.coords["x"], lat=ds.coords["y"]) - ds = ds.drop_vars(["expver", "number"], errors="ignore") - - return ds + return ds.drop_vars(["expver", "number"], errors="ignore") -def get_data_wind(retrieval_params): +def get_data_wind(retrieval_params: dict[str, Any]) -> xr.Dataset: """ - Get wind data for given retrieval parameters. + Retrieve and compute wind speed variables from ERA5. + + Downloads u/v wind components at 10m and 100m, computes wind speed, + shear exponent, azimuth angle, and surface roughness. + + Parameters + ---------- + retrieval_params : dict[str, Any] + CDS API retrieval parameters including area, time, and format. + + Returns + ------- + xr.Dataset + Dataset with variables: wnd100m, wnd_shear_exp, wnd_azimuth, roughness. """ ds = retrieve_data( variable=[ @@ -130,22 +171,44 @@ def get_data_wind(retrieval_params): ds["wnd_azimuth"] = azimuth.where(azimuth >= 0, azimuth + 2 * np.pi) ds = ds.drop_vars(["u100", "v100", "u10", "v10", "wnd10m"]) - ds = ds.rename({"fsr": "roughness"}) - - return ds + return ds.rename({"fsr": "roughness"}) -def sanitize_wind(ds): +def sanitize_wind(ds: xr.Dataset) -> xr.Dataset: """ - Sanitize retrieved wind data. + Clip negative roughness values to a minimum of 2e-4. + + Parameters + ---------- + ds : xr.Dataset + Wind dataset containing 'roughness' variable. + + Returns + ------- + xr.Dataset + Dataset with corrected roughness values. """ ds["roughness"] = ds["roughness"].where(ds["roughness"] >= 0.0, 2e-4) return ds -def get_data_influx(retrieval_params): +def get_data_influx(retrieval_params: dict[str, Any]) -> xr.Dataset: """ - Get influx data for given retrieval parameters. + Retrieve and compute solar radiation variables from ERA5. + + Downloads radiation components, converts from J/m² to W/m², computes + albedo, diffuse radiation, and solar position (altitude/azimuth). + + Parameters + ---------- + retrieval_params : dict[str, Any] + CDS API retrieval parameters including area, time, and format. + + Returns + ------- + xr.Dataset + Dataset with variables: influx_toa, influx_direct, influx_diffuse, + albedo, solar_altitude, solar_azimuth. """ ds = retrieve_data( variable=[ @@ -175,33 +238,54 @@ def get_data_influx(retrieval_params): ds[a] = ds[a] / (60.0 * 60.0) ds[a].attrs["units"] = "W m**-2" - # ERA5 variables are mean values for previous hour, i.e. 13:01 to 14:00 are labelled as "14:00" - # account by calculating the SolarPosition for the center of the interval for aggregation happens - # see https://github.com/PyPSA/atlite/issues/158 - # Do not show DeprecationWarning from new SolarPosition calculation (#199) + # ERA5 variables are mean values for previous hour, i.e. 13:01 to 14:00 + # are labelled as "14:00". Account by calculating the SolarPosition for the + # center of the interval for aggregation. + # See https://github.com/PyPSA/atlite/issues/158 + # Suppress DeprecationWarning from new SolarPosition calculation (#199) with warnings.catch_warnings(): warnings.simplefilter("ignore", DeprecationWarning) time_shift = pd.to_timedelta("-30 minutes") sp = SolarPosition(ds, time_shift=time_shift) sp = sp.rename({v: f"solar_{v}" for v in sp.data_vars}) - ds = xr.merge([ds, sp]) - - return ds + return xr.merge([ds, sp]) -def sanitize_influx(ds): +def sanitize_influx(ds: xr.Dataset) -> xr.Dataset: """ - Sanitize retrieved influx data. + Clip negative radiation values to zero. + + Parameters + ---------- + ds : xr.Dataset + Influx dataset with influx_direct, influx_diffuse, influx_toa. + + Returns + ------- + xr.Dataset + Dataset with non-negative radiation values. """ for a in ("influx_direct", "influx_diffuse", "influx_toa"): ds[a] = ds[a].clip(min=0.0) return ds -def get_data_temperature(retrieval_params): +def get_data_temperature(retrieval_params: dict[str, Any]) -> xr.Dataset: """ - Get wind temperature for given retrieval parameters. + Retrieve temperature variables from ERA5. + + Downloads 2m temperature, soil temperature (level 4), and 2m dewpoint. + + Parameters + ---------- + retrieval_params : dict[str, Any] + CDS API retrieval parameters including area, time, and format. + + Returns + ------- + xr.Dataset + Dataset with variables: temperature, soil temperature, dewpoint temperature. """ ds = retrieve_data( variable=[ @@ -213,79 +297,115 @@ def get_data_temperature(retrieval_params): ) ds = _rename_and_clean_coords(ds) - ds = ds.rename( - { - "t2m": "temperature", - "stl4": "soil temperature", - "d2m": "dewpoint temperature", - } - ) - - return ds + return ds.rename({ + "t2m": "temperature", + "stl4": "soil temperature", + "d2m": "dewpoint temperature", + }) -def get_data_runoff(retrieval_params): +def get_data_runoff(retrieval_params: dict[str, Any]) -> xr.Dataset: """ - Get runoff data for given retrieval parameters. + Retrieve runoff data from ERA5. + + Parameters + ---------- + retrieval_params : dict[str, Any] + CDS API retrieval parameters including area, time, and format. + + Returns + ------- + xr.Dataset + Dataset with 'runoff' variable. """ ds = retrieve_data(variable=["runoff"], **retrieval_params) ds = _rename_and_clean_coords(ds) - ds = ds.rename({"ro": "runoff"}) - - return ds + return ds.rename({"ro": "runoff"}) -def sanitize_runoff(ds): +def sanitize_runoff(ds: xr.Dataset) -> xr.Dataset: """ - Sanitize retrieved runoff data. + Clip negative runoff values to zero. + + Parameters + ---------- + ds : xr.Dataset + Runoff dataset containing 'runoff' variable. + + Returns + ------- + xr.Dataset + Dataset with non-negative runoff values. """ ds["runoff"] = ds["runoff"].clip(min=0.0) return ds -def get_data_height(retrieval_params): +def get_data_height(retrieval_params: dict[str, Any]) -> xr.Dataset: """ - Get height data for given retrieval parameters. + Retrieve geopotential and convert to terrain height. + + Parameters + ---------- + retrieval_params : dict[str, Any] + CDS API retrieval parameters including area, time, and format. + + Returns + ------- + xr.Dataset + Dataset with 'height' variable in meters. """ ds = retrieve_data(variable="geopotential", **retrieval_params) ds = _rename_and_clean_coords(ds) - ds = _add_height(ds) + return _add_height(ds) - return ds +def _area(coords: dict[str, xr.DataArray]) -> list[float]: + """ + Extract CDS API bounding box from coordinates. + + Parameters + ---------- + coords : dict[str, xr.DataArray] + Coordinate arrays with 'x' (longitude) and 'y' (latitude). -def _area(coords): - # North, West, South, East. Default: global + Returns + ------- + list[float] + Bounding box as [north, west, south, east]. + """ x0, x1 = coords["x"].min().item(), coords["x"].max().item() y0, y1 = coords["y"].min().item(), coords["y"].max().item() return [y1, x0, y0, x1] -def retrieval_times(coords, static=False, monthly_requests=False): +def retrieval_times( + coords: dict[str, xr.DataArray], + static: bool = False, + monthly_requests: bool = False, +) -> dict[str, Any] | list[dict[str, Any]]: """ - Get list of retrieval cdsapi arguments for time dimension in coordinates. + Generate time parameter chunks for CDS API requests. - If static is False, this function creates a query for each month and year - in the time axis in coords. This ensures not running into size query limits - of the cdsapi even with very (spatially) large cutouts. - If static is True, the function return only one set of parameters - for the very first time point. + Splits the time coordinate into year-based (or year-month-based) chunks + suitable for the CDS API query format. Parameters ---------- - coords : atlite.Cutout.coords + coords : dict[str, xr.DataArray] + Coordinate arrays with 'time' dimension. static : bool, optional + If True, return a single time point (for time-invariant fields). monthly_requests : bool, optional - If True, the data is requested on a monthly basis. This is useful for - large cutouts, where the data is requested in smaller chunks. The - default is False + If True, split requests by month within each year. Returns ------- - list of dicts witht retrieval arguments - + dict[str, Any] or list[dict[str, Any]] + Single dict if static, otherwise list of dicts with + 'year', 'month', 'day', 'time' keys. """ time = coords["time"].to_index() if static: @@ -296,8 +416,7 @@ def retrieval_times(coords, static=False, monthly_requests=False): "time": time[0].strftime("%H:00"), } - # Prepare request for all months and years - times = [] + times: list[dict[str, Any]] = [] for year in time.year.unique(): t = time[time.year == year] if monthly_requests: @@ -320,26 +439,63 @@ def retrieval_times(coords, static=False, monthly_requests=False): return times -def noisy_unlink(path): +def noisy_unlink(path: PathLike) -> None: """ - Delete file at given path. + Remove a file with debug logging, handling PermissionError gracefully. + + Parameters + ---------- + path : PathLike + Path to the file to delete. """ - logger.debug(f"Deleting file {path}") + logger.debug("Deleting file %s", path) try: - os.unlink(path) + Path(path).unlink() except PermissionError: - logger.error(f"Unable to delete file {path}, as it is still in use.") + logger.error("Unable to delete file %s, as it is still in use.", path) + +def add_finalizer(ds: xr.Dataset, target: PathLike) -> None: + """ + Register a weak-reference callback to delete a temp file on garbage collection. + + Parameters + ---------- + ds : xr.Dataset + Dataset whose lifetime controls the temp file. + target : PathLike + Path to the temporary file to clean up. + """ + logger.debug("Adding finalizer for %s", target) + assert ds._close is not None + weakref.finalize(cast("Any", ds._close).__self__.ds, noisy_unlink, target) + + +def sanitize_chunks(chunks: Any, **dim_mapping: str) -> Any: + """ + Remap internal dimension names to ERA5/CDS dimension names in chunk specs. -def add_finalizer(ds: xr.Dataset, target: str | Path): - logger.debug(f"Adding finalizer for {target}") - weakref.finalize(ds._close.__self__.ds, noisy_unlink, target) + Translates atlite dimension names (time, x, y) to the corresponding + ERA5 names (valid_time, longitude, latitude). + Parameters + ---------- + chunks : Any + Chunk specification. If not a dict, returned as-is. + **dim_mapping : str + Additional or override dimension name mappings. -def sanitize_chunks(chunks, **dim_mapping): - dim_mapping = dict(time="valid_time", x="longitude", y="latitude") | dim_mapping + Returns + ------- + Any + Remapped chunk dict, or original value if not a dict. + """ + dim_mapping = { + "time": "valid_time", + "x": "longitude", + "y": "latitude", + } | dim_mapping if not isinstance(chunks, dict): - # preserve "auto" or None return chunks return { @@ -350,40 +506,40 @@ def sanitize_chunks(chunks, **dim_mapping): def open_with_grib_conventions( - grib_file: str | Path, chunks=None, tmpdir: str | Path | None = None + grib_file: PathLike, + chunks: dict[str, int] | None = None, + tmpdir: PathLike | None = None, ) -> xr.Dataset: """ - Convert grib file of ERA5 data from the CDS to netcdf file. + Open a GRIB file using cfgrib with standardized coordinate conventions. - The function does the same thing as the CDS backend does, but locally. - This is needed, as the grib file is the recommended download file type for CDS, with conversion to netcdf locally. - The routine is a reduced version based on the documentation here: - https://confluence.ecmwf.int/display/CKB/GRIB+to+netCDF+conversion+on+new+CDS+and+ADS+systems#GRIBtonetCDFconversiononnewCDSandADSsystems-jupiternotebook + Performs the same conversion as the CDS backend, but locally. + Based on the documentation at + https://confluence.ecmwf.int/display/CKB/GRIB+to+netCDF+conversion+on+new+CDS+and+ADS+systems Parameters ---------- - grib_file : str | Path - Path to the grib file to be converted. - chunks - Chunks - tmpdir : Path, optional - If None adds a finalizer to the dataset object + grib_file : PathLike + Path to the GRIB file. + chunks : dict[str, int] or None, optional + Dask chunk specification for lazy loading. + tmpdir : PathLike or None, optional + If set, the file is kept (managed externally). Returns ------- xr.Dataset + Opened dataset with standardized dimensions. """ - # - # Open grib file as dataset - # Options to open different datasets into a datasets of consistent hypercubes which are compatible netCDF - # There are options that might be relevant for e.g. for wave model data, that have been removed here - # to keep the code cleaner and shorter + # Open grib file as dataset. + # Options below normalize different ERA5 grib variants into consistent + # netCDF-compatible hypercubes. Options relevant only to e.g. wave-model + # data have been removed to keep this routine focused on the products we use. ds = xr.open_dataset( grib_file, engine="cfgrib", time_dims=["valid_time"], ignore_keys=["edition"], - # extra_coords={"expver": "valid_time"}, coords_as_attributes=[ "surface", "depthBelowLandLayer", @@ -397,9 +553,12 @@ def open_with_grib_conventions( add_finalizer(ds, grib_file) def safely_expand_dims(dataset: xr.Dataset, expand_dims: list[str]) -> xr.Dataset: - """ - Expand dimensions in an xarray dataset, ensuring that the new dimensions are not already in the dataset - and that the order of dimensions is preserved. + """Expand missing dimensions while preserving their original order. + + Returns + ------- + xr.Dataset + Dataset with the requested dimensions present. """ dims_required = [ c for c in dataset.coords if c in expand_dims + list(dataset.dims) @@ -413,7 +572,6 @@ def safely_expand_dims(dataset: xr.Dataset, expand_dims: list[str]) -> xr.Datase return dataset logger.debug("Converting grib file to netcdf format") - # Variables and dimensions to rename if they exist in the dataset rename_vars = { "time": "forecast_reference_time", "step": "forecast_period", @@ -423,7 +581,6 @@ def safely_expand_dims(dataset: xr.Dataset, expand_dims: list[str]) -> xr.Datase rename_vars = {k: v for k, v in rename_vars.items() if k in ds} ds = ds.rename(rename_vars) - # safely expand dimensions in an xarray dataset to ensure that data for the new dimensions are in the dataset ds = safely_expand_dims(ds, ["valid_time", "pressure_level", "model_level"]) return ds @@ -432,37 +589,35 @@ def safely_expand_dims(dataset: xr.Dataset, expand_dims: list[str]) -> xr.Datase def retrieve_data( product: str, chunks: dict[str, int] | None = None, - tmpdir: str | Path | None = None, + tmpdir: PathLike | None = None, lock: SerializableLock | None = None, - **updates, + **updates: Any, ) -> xr.Dataset: """ - Download data like ERA5 from the Climate Data Store (CDS). + Download ERA5 data from the CDS API and return as an xarray Dataset. - If you want to track the state of your request go to - https://cds-beta.climate.copernicus.eu/requests?tab=all + The ongoing and past requests can be tracked at + https://cds-beta.climate.copernicus.eu/requests?tab=all. Parameters ---------- product : str - Product name, e.g. 'reanalysis-era5-single-levels'. - chunks : dict, optional - Chunking for xarray dataset, e.g. {'time': 1, 'x': 100, 'y': 100}. - Default is None. - tmpdir : str, optional - Directory where the downloaded data is temporarily stored. - Default is None, which uses the system's temporary directory. - lock : dask.utils.SerializableLock, optional - Lock for thread-safe file writing. Default is None. - updates : dict - Additional parameters for the request. - Must include 'year', 'month', and 'variable'. - Can include e.g. 'data_format'. + CDS product name (e.g. 'reanalysis-era5-single-levels'). + chunks : dict[str, int] or None, optional + Dask chunk specification for lazy loading. + tmpdir : PathLike or None, optional + Directory for temporary download files. If None, files are + cleaned up via finalizer on GC. + lock : SerializableLock or None, optional + Lock for thread-safe file creation. + **updates : Any + Additional CDS API request parameters. Must include at least + 'variable', 'year', and 'month'. Returns ------- - xarray.Dataset - Dataset with the retrieved variables. + xr.Dataset + Downloaded ERA5 data. Examples -------- @@ -474,36 +629,37 @@ def retrieve_data( ... year='2020', ... month='01', ... variable=['10m_u_component_of_wind', '10m_v_component_of_wind'], - ... data_format='netcdf' + ... data_format='netcdf', ... ) """ - request = {"product_type": ["reanalysis"], "download_format": "unarchived"} + request: dict[str, Any] = { + "product_type": ["reanalysis"], + "download_format": "unarchived", + } request.update(updates) assert {"year", "month", "variable"}.issubset(request), ( "Need to specify at least 'variable', 'year' and 'month'" ) - logger.debug(f"Requesting {product} with API request: {request}") + logger.debug("Requesting %s with API request: %s", product, request) client = cdsapi.Client( - info_callback=logger.debug, debug=logging.DEBUG >= logging.root.level + info_callback=logger.debug, debug=logging.root.level <= logging.DEBUG ) result = client.retrieve(product, request) - if lock is None: - lock = nullcontext() + cm: AbstractContextManager = nullcontext() if lock is None else lock - suffix = f".{request['data_format']}" # .netcdf or .grib - with lock: + suffix = f".{request['data_format']}" + with cm: fd, target = mkstemp(suffix=suffix, dir=tmpdir) os.close(fd) - # Inform user about data being downloaded as "* variable (year-month)" timestr = f"{request['year']}-{request['month']}" variables = atleast_1d(request["variable"]) varstr = "\n\t".join([f"{v} ({timestr})" for v in variables]) - logger.info(f"CDS: Downloading variables\n\t{varstr}\n") + logger.info("CDS: Downloading variables\n\t%s\n", varstr) result.download(target) # Convert from grib to netcdf locally, same conversion as in CDS backend @@ -512,60 +668,59 @@ def retrieve_data( else: ds = xr.open_dataset(target, chunks=sanitize_chunks(chunks)) if tmpdir is None: - add_finalizer(target) + add_finalizer(ds, target) return ds def get_data( - cutout, - feature, - tmpdir, - lock=None, - data_format="grib", - monthly_requests=False, - concurrent_requests=False, - **creation_parameters, -): + cutout: Any, + feature: str, + tmpdir: PathLike, + lock: SerializableLock | None = None, + data_format: Literal["grib", "netcdf"] = "grib", + monthly_requests: bool = False, + concurrent_requests: bool = False, + **creation_parameters: Any, +) -> xr.Dataset: """ - Retrieve data from ECMWFs ERA5 dataset (via CDS). + Download ERA5 data for a given feature. - This front-end function downloads data for a specific feature and formats - it to match the given Cutout. + Dispatches to feature-specific ``get_data_{feature}`` functions, + optionally applies ``sanitize_{feature}``, and concatenates time chunks. Parameters ---------- - cutout : atlite.Cutout + cutout : Cutout + Cutout object defining the spatial and temporal extent. feature : str - Name of the feature data to retrieve. Must be in - `atlite.datasets.era5.features` - tmpdir : str/Path - Directory where the temporary netcdf files are stored. + Feature to retrieve (e.g. 'wind', 'influx', 'temperature', + 'runoff', 'height'). + tmpdir : PathLike + Directory for temporary download files. + lock : SerializableLock or None, optional + Lock for thread-safe file creation. + data_format : {{'grib', 'netcdf'}}, optional + Download format. Default 'grib'; ``grib`` is recommended over + ``netcdf`` because the CDSAPI limits request size for the latter. monthly_requests : bool, optional - If True, the data is requested on a monthly basis in ERA5. This is useful for - large cutouts, where the data is requested in smaller chunks. The - default is False - data_format : str, optional - The format of the data to be downloaded. Can be either 'grib' or 'netcdf', - 'grib' highly recommended because CDSAPI limits request size for netcdf. + If True, split API requests by month. Default False. concurrent_requests : bool, optional - If True, the monthly data requests are posted concurrently. - Only has an effect if `monthly_requests` is True. - **creation_parameters : - Additional keyword arguments. The only effective argument is 'sanitize' - (default True) which sets sanitization of the data on or off. + If True, use dask.delayed for parallel downloads. Default False. + **creation_parameters : Any + Additional parameters; 'sanitize' (bool, default True) controls + whether post-processing is applied. Returns ------- - xarray.Dataset - Dataset of dask arrays of the retrieved variables. - + xr.Dataset + ERA5 data for the requested feature, aligned to cutout coordinates. """ coords = cutout.coords sanitize = creation_parameters.get("sanitize", True) - retrieval_params = { + retrieval_params: dict[str, Any] = { "product": "reanalysis-era5-single-levels", "area": _area(coords), "chunks": cutout.chunks, @@ -575,25 +730,34 @@ def get_data( "data_format": data_format, } - func = globals().get(f"get_data_{feature}") - sanitize_func = globals().get(f"sanitize_{feature}") + func: Callable[[dict[str, Any]], xr.Dataset] | None = globals().get( + f"get_data_{feature}" + ) + sanitize_func: Callable[[xr.Dataset], xr.Dataset] | None = globals().get( + f"sanitize_{feature}" + ) - logger.info(f"Requesting data for feature {feature}...") + logger.info("Requesting data for feature %s...", feature) - def retrieve_once(time): - ds = func({**retrieval_params, **time}) + def retrieve_once(time: dict[str, Any]) -> xr.Dataset: + ds = func({**retrieval_params, **time}) # type: ignore[misc] if sanitize and sanitize_func is not None: ds = sanitize_func(ds) return ds if feature in static_features: - return retrieve_once(retrieval_times(coords, static=True)).squeeze() + static_times = retrieval_times(coords, static=True) + assert isinstance(static_times, dict) + return retrieve_once(static_times).squeeze() time_chunks = retrieval_times(coords, monthly_requests=monthly_requests) + assert isinstance(time_chunks, list) if concurrent_requests: delayed_datasets = [delayed(retrieve_once)(chunk) for chunk in time_chunks] datasets = compute(*delayed_datasets) else: datasets = map(retrieve_once, time_chunks) - return xr.concat(datasets, dim="time").sel(time=coords["time"]) + result = xr.concat(datasets, dim="time").sel(time=coords["time"]) + assert isinstance(result, xr.Dataset) + return result diff --git a/atlite/datasets/gebco.py b/atlite/datasets/gebco.py index 948e862c..017f2b05 100755 --- a/atlite/datasets/gebco.py +++ b/atlite/datasets/gebco.py @@ -3,24 +3,46 @@ # SPDX-FileCopyrightText: Contributors to atlite # # SPDX-License-Identifier: MIT -""" -Module for loading gebco data. -""" +"""Module for loading gebco data.""" + +from __future__ import annotations import logging +from typing import TYPE_CHECKING, Any import rasterio as rio import xarray as xr from pandas import to_numeric from rasterio.warp import Resampling +if TYPE_CHECKING: + from atlite._types import PathLike + logger = logging.getLogger(__name__) crs = 4326 features = {"height": ["height"]} -def get_data_gebco_height(xs, ys, gebco_path): +def get_data_gebco_height( + xs: xr.DataArray, ys: xr.DataArray, gebco_path: PathLike +) -> xr.DataArray: + """Read and resample GEBCO bathymetry/elevation to the target grid. + + Parameters + ---------- + xs : xr.DataArray + Target x (longitude) coordinates. + ys : xr.DataArray + Target y (latitude) coordinates. + gebco_path : PathLike + Path to the GEBCO GeoTIFF or NetCDF file. + + Returns + ------- + xr.DataArray + Height data resampled to the target grid with dimensions ``(y, x)``. + """ x, X = xs.data[[0, -1]] y, Y = ys.data[[0, -1]] @@ -45,41 +67,40 @@ def get_data_gebco_height(xs, ys, gebco_path): def get_data( - cutout, - feature, - tmpdir, - monthly_requests=False, - concurrent_requests=False, - **creation_parameters, -): - """ - Get the gebco height data. + cutout: Any, + feature: str, + tmpdir: PathLike, + monthly_requests: bool = False, + concurrent_requests: bool = False, + **creation_parameters: Any, +) -> xr.Dataset: + """Retrieve GEBCO height data for a cutout. Parameters ---------- - cutout : atlite.Cutout + cutout : Any + Cutout instance providing target coordinates. feature : str - Takes no effect, only here for consistency with other dataset modules. - tmpdir : str - Takes no effect, only here for consistency with other dataset modules. - monthly_requests : bool - Takes no effect, only here for consistency with other dataset modules. - concurrent_requests : bool - Takes no effect, only here for consistency with other dataset modules. - **creation_parameters : - Must include `gebco_path`. + Feature name (expected ``"height"``). + tmpdir : PathLike + Temporary directory (unused, kept for interface consistency). + monthly_requests : bool, optional + Unused, kept for interface consistency. + concurrent_requests : bool, optional + Unused, kept for interface consistency. + **creation_parameters : Any + Must include ``"gebco_path"`` pointing to the GEBCO data file. Returns ------- xr.Dataset - + Dataset with height variable on the cutout grid. """ if "gebco_path" not in creation_parameters: logger.error('Argument "gebco_path" not defined') path = creation_parameters["gebco_path"] coords = cutout.coords - # assign time dimension even if not used return ( get_data_gebco_height(coords["x"], coords["y"], path) .to_dataset() diff --git a/atlite/datasets/ncep.py b/atlite/datasets/ncep.py index 115e4d24..fae176b0 100644 --- a/atlite/datasets/ncep.py +++ b/atlite/datasets/ncep.py @@ -2,8 +2,7 @@ # # SPDX-License-Identifier: MIT """ -Module containing specific operations for creating cutouts from the NCEP -dataset. +Module for creating cutouts from the NCEP dataset. DEPRECATED ---------- @@ -12,18 +11,44 @@ for the time being! """ +from __future__ import annotations + import glob import os +from typing import TYPE_CHECKING, Any import numpy as np import pandas as pd import xarray as xr -engine = "pynio" -crs = 4326 +if TYPE_CHECKING: + from collections.abc import Generator + + from atlite._types import PathLike + +engine: str = "pynio" +crs: int = 4326 + + +def convert_lons_lats_ncep( + ds: xr.Dataset, xs: slice | np.ndarray[Any, Any], ys: slice | np.ndarray[Any, Any] +) -> xr.Dataset: + """Select and rename NCEP longitude/latitude coordinates, handling wraparound. + Parameters + ---------- + ds : xr.Dataset + Dataset with ``lon_0`` and ``lat_0`` coordinates. + xs : slice or np.ndarray + Longitude selection range or array. + ys : slice or np.ndarray + Latitude selection range or array. -def convert_lons_lats_ncep(ds, xs, ys): + Returns + ------- + xr.Dataset + Dataset with coordinates renamed to ``x``/``y`` and ``lon``/``lat``. + """ if not isinstance(xs, slice): first, second, last = np.asarray(xs)[[0, 1, -1]] xs = slice(first - 0.1 * (second - first), last + 0.1 * (second - first)) @@ -50,12 +75,24 @@ def convert_lons_lats_ncep(ds, xs, ys): ds = ds.sel(lon_0=xs) ds = ds.rename({"lon_0": "x", "lat_0": "y"}) - ds = ds.assign_coords(lon=ds.coords["x"], lat=ds.coords["y"]) - return ds + return ds.assign_coords(lon=ds.coords["x"], lat=ds.coords["y"]) + +def convert_time_hourly_ncep(ds: xr.Dataset, drop_time_vars: bool = True) -> xr.Dataset: + """Stack initial and forecast times into a single hourly time dimension. -def convert_time_hourly_ncep(ds, drop_time_vars=True): - # Combine initial_time0 and forecast_time0 + Parameters + ---------- + ds : xr.Dataset + Dataset with ``initial_time0_hours`` and ``forecast_time0`` dimensions. + drop_time_vars : bool, optional + Drop auxiliary time variables (default ``True``). + + Returns + ------- + xr.Dataset + Dataset with a unified ``time`` coordinate. + """ ds = ds.stack(time=("initial_time0_hours", "forecast_time0")).assign_coords( time=np.ravel( ds.coords["initial_time0_hours"] @@ -68,20 +105,36 @@ def convert_time_hourly_ncep(ds, drop_time_vars=True): return ds -def convert_unaverage_ncep(ds): - # the fields ending in _avg contain averages which have to be unaveraged by using - # \begin{equation} - # \tilde x_1 = x_1 \quad \tilde x_i = i \cdot x_i - (i - 1) \cdot x_{i-1} \quad \forall i > 1 - # \end{equation} +def convert_unaverage_ncep(ds: xr.Dataset) -> xr.Dataset: + r"""Convert running-average variables (``*_avg``) to instantaneous values. - def unaverage(da, dim="forecast_time0"): + The fields ending in ``_avg`` are averages over the forecast window which + have to be un-averaged by + + .. math:: + \tilde x_1 = x_1, \quad + \tilde x_i = i \cdot x_i - (i - 1) \cdot x_{i-1} \quad \forall i > 1. + + Parameters + ---------- + ds : xr.Dataset + Dataset with variables ending in ``_avg``. + + Returns + ------- + xr.Dataset + Dataset with un-averaged variables replacing the originals. + """ + + def unaverage(da: xr.DataArray, dim: str = "forecast_time0") -> xr.DataArray: coords = da.coords[dim] y = da * xr.DataArray( np.arange(1, len(coords) + 1), dims=[dim], coords={dim: coords} ) - return y - y.shift(**{dim: 1}).fillna(0.0) + return y - y.shift({dim: 1}).fillna(0.0) for k, da in ds.items(): + assert isinstance(k, str) if k.endswith("_avg"): ds[k[: -len("_avg")]] = unaverage(da) ds = ds.drop(k) @@ -89,20 +142,35 @@ def unaverage(da, dim="forecast_time0"): return ds -def convert_unaccumulate_ncep(ds): - # the fields ending in _acc contain values that are accumulated over the - # forecast_time which have to be unaccumulated by using: - # \begin{equation} - # \tilde x_1 = x_1 - # \tilde x_i = x_i - x_{i-1} \forall 1 < i <= 6 - # \end{equation} - # Source: - # http://rda.ucar.edu/datasets/ds094.1/#docs/FAQs_hrly_timeseries.html +def convert_unaccumulate_ncep(ds: xr.Dataset) -> xr.Dataset: + r"""Convert accumulated variables (``*_acc``) to per-timestep values. + + The fields ending in ``_acc`` are accumulated over the forecast_time and + have to be un-accumulated by + + .. math:: + \tilde x_1 = x_1, \quad + \tilde x_i = x_i - x_{i-1} \quad \forall\, 1 < i \leq 6. + + Source: + http://rda.ucar.edu/datasets/ds094.1/#docs/FAQs_hrly_timeseries.html + + Parameters + ---------- + ds : xr.Dataset + Dataset with variables ending in ``_acc``. + + Returns + ------- + xr.Dataset + Dataset with de-accumulated variables replacing the originals. + """ - def unaccumulate(da, dim="forecast_time0"): - return da - da.shift(**{dim: 1}).fillna(0.0) + def unaccumulate(da: xr.DataArray, dim: str = "forecast_time0") -> xr.DataArray: + return da - da.shift({dim: 1}).fillna(0.0) for k, da in ds.items(): + assert isinstance(k, str) if k.endswith("_acc"): ds[k[: -len("_acc")]] = unaccumulate(da) ds = ds.drop(k) @@ -110,17 +178,58 @@ def unaccumulate(da, dim="forecast_time0"): return ds -def convert_clip_lower(ds, variable, a_min, value): - """ - Set values of `variable` that are below `a_min` to `value`. - - Similar to `numpy.clip`. +def convert_clip_lower( + ds: xr.Dataset, variable: str, a_min: float, value: float +) -> xr.Dataset: + """Replace values at or below a threshold with a fill value. + + Parameters + ---------- + ds : xr.Dataset + Input dataset. + variable : str + Name of the variable to clip. + a_min : float + Threshold; values <= ``a_min`` are replaced. + value : float + Replacement value. + + Returns + ------- + xr.Dataset + Dataset with clipped variable. """ ds[variable] = ds[variable].where(ds[variable] > a_min).fillna(value) return ds -def prepare_wnd10m_ncep(fn, yearmonth, xs, ys, engine=engine): +def prepare_wnd10m_ncep( + fn: PathLike, + yearmonth: tuple[int, int], + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], + engine: str = engine, +) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: + """Prepare 10-m wind speed from NCEP U/V components. + + Parameters + ---------- + fn : PathLike + Path to the GRIB2 file. + yearmonth : tuple of (int, int) + Year and month identifier. + xs : slice or np.ndarray + Longitude selection. + ys : slice or np.ndarray + Latitude selection. + engine : str, optional + xarray backend engine. + + Yields + ------ + tuple of (tuple of (int, int), xr.Dataset) + ``(yearmonth, dataset)`` with ``wnd10m`` variable. + """ with xr.open_dataset(fn, engine=engine) as ds: ds = convert_lons_lats_ncep(ds, xs, ys) ds = convert_time_hourly_ncep(ds) @@ -131,7 +240,33 @@ def prepare_wnd10m_ncep(fn, yearmonth, xs, ys, engine=engine): yield yearmonth, ds -def prepare_influx_ncep(fn, yearmonth, xs, ys, engine=engine): +def prepare_influx_ncep( + fn: PathLike, + yearmonth: tuple[int, int], + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], + engine: str = engine, +) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: + """Prepare downward shortwave radiation flux from NCEP data. + + Parameters + ---------- + fn : PathLike + Path to the GRIB2 file. + yearmonth : tuple of (int, int) + Year and month identifier. + xs : slice or np.ndarray + Longitude selection. + ys : slice or np.ndarray + Latitude selection. + engine : str, optional + xarray backend engine. + + Yields + ------ + tuple of (tuple of (int, int), xr.Dataset) + ``(yearmonth, dataset)`` with ``influx`` variable. + """ with xr.open_dataset(fn, engine=engine) as ds: ds = convert_lons_lats_ncep(ds, xs, ys) ds = convert_unaverage_ncep(ds) @@ -143,7 +278,33 @@ def prepare_influx_ncep(fn, yearmonth, xs, ys, engine=engine): yield yearmonth, ds -def prepare_outflux_ncep(fn, yearmonth, xs, ys, engine=engine): +def prepare_outflux_ncep( + fn: PathLike, + yearmonth: tuple[int, int], + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], + engine: str = engine, +) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: + """Prepare upward shortwave radiation flux from NCEP data. + + Parameters + ---------- + fn : PathLike + Path to the GRIB2 file. + yearmonth : tuple of (int, int) + Year and month identifier. + xs : slice or np.ndarray + Longitude selection. + ys : slice or np.ndarray + Latitude selection. + engine : str, optional + xarray backend engine. + + Yields + ------ + tuple of (tuple of (int, int), xr.Dataset) + ``(yearmonth, dataset)`` with ``outflux`` variable. + """ with xr.open_dataset(fn, engine=engine) as ds: ds = convert_lons_lats_ncep(ds, xs, ys) ds = convert_unaverage_ncep(ds) @@ -155,7 +316,33 @@ def prepare_outflux_ncep(fn, yearmonth, xs, ys, engine=engine): yield yearmonth, ds -def prepare_temperature_ncep(fn, yearmonth, xs, ys, engine=engine): +def prepare_temperature_ncep( + fn: PathLike, + yearmonth: tuple[int, int], + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], + engine: str = engine, +) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: + """Prepare 2-m air temperature from NCEP data. + + Parameters + ---------- + fn : PathLike + Path to the GRIB2 file. + yearmonth : tuple of (int, int) + Year and month identifier. + xs : slice or np.ndarray + Longitude selection. + ys : slice or np.ndarray + Latitude selection. + engine : str, optional + xarray backend engine. + + Yields + ------ + tuple of (tuple of (int, int), xr.Dataset) + ``(yearmonth, dataset)`` with ``temperature`` variable. + """ with xr.open_dataset(fn, engine=engine) as ds: ds = convert_lons_lats_ncep(ds, xs, ys) ds = convert_time_hourly_ncep(ds) @@ -164,7 +351,33 @@ def prepare_temperature_ncep(fn, yearmonth, xs, ys, engine=engine): yield yearmonth, ds -def prepare_soil_temperature_ncep(fn, yearmonth, xs, ys, engine=engine): +def prepare_soil_temperature_ncep( + fn: PathLike, + yearmonth: tuple[int, int], + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], + engine: str = engine, +) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: + """Prepare soil temperature from NCEP data. + + Parameters + ---------- + fn : PathLike + Path to the GRIB2 file. + yearmonth : tuple of (int, int) + Year and month identifier. + xs : slice or np.ndarray + Longitude selection. + ys : slice or np.ndarray + Latitude selection. + engine : str, optional + xarray backend engine. + + Yields + ------ + tuple of (tuple of (int, int), xr.Dataset) + ``(yearmonth, dataset)`` with ``soil temperature`` variable. + """ with xr.open_dataset(fn, engine=engine) as ds: ds = convert_lons_lats_ncep(ds, xs, ys) ds = convert_time_hourly_ncep(ds) @@ -173,7 +386,33 @@ def prepare_soil_temperature_ncep(fn, yearmonth, xs, ys, engine=engine): yield yearmonth, ds -def prepare_runoff_ncep(fn, yearmonth, xs, ys, engine=engine): +def prepare_runoff_ncep( + fn: PathLike, + yearmonth: tuple[int, int], + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], + engine: str = engine, +) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: + """Prepare surface runoff from NCEP data. + + Parameters + ---------- + fn : PathLike + Path to the GRIB2 file. + yearmonth : tuple of (int, int) + Year and month identifier. + xs : slice or np.ndarray + Longitude selection. + ys : slice or np.ndarray + Latitude selection. + engine : str, optional + xarray backend engine. + + Yields + ------ + tuple of (tuple of (int, int), xr.Dataset) + ``(yearmonth, dataset)`` with ``runoff`` variable. + """ with xr.open_dataset(fn, engine=engine) as ds: ds = convert_lons_lats_ncep(ds, xs, ys) # runoff has missing values: set nans to 0 @@ -185,7 +424,33 @@ def prepare_runoff_ncep(fn, yearmonth, xs, ys, engine=engine): yield yearmonth, ds -def prepare_height_ncep(fn, xs, ys, yearmonths, engine=engine): +def prepare_height_ncep( + fn: PathLike, + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], + yearmonths: list[tuple[int, int]], + engine: str = engine, +) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: + """Prepare geopotential height from NCEP data. + + Parameters + ---------- + fn : PathLike + Path to the GRIB2 file. + xs : slice or np.ndarray + Longitude selection. + ys : slice or np.ndarray + Latitude selection. + yearmonths : list of tuple of (int, int) + Year-month pairs to yield the same height data for. + engine : str, optional + xarray backend engine. + + Yields + ------ + tuple of (tuple of (int, int), xr.Dataset) + ``(yearmonth, dataset)`` with ``height`` variable for each yearmonth. + """ with xr.open_dataset(fn, engine=engine) as ds: ds = convert_lons_lats_ncep(ds, xs, ys) ds = ds.rename({"HGT_P0_L105_GGA0": "height"}) @@ -193,7 +458,33 @@ def prepare_height_ncep(fn, xs, ys, yearmonths, engine=engine): yield ym, ds -def prepare_roughness_ncep(fn, yearmonth, xs, ys, engine=engine): +def prepare_roughness_ncep( + fn: PathLike, + yearmonth: tuple[int, int], + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], + engine: str = engine, +) -> Generator[tuple[tuple[int, int], xr.Dataset], None, None]: + """Prepare surface roughness from NCEP data. + + Parameters + ---------- + fn : PathLike + Path to the GRIB2 file. + yearmonth : tuple of (int, int) + Year and month identifier. + xs : slice or np.ndarray + Longitude selection. + ys : slice or np.ndarray + Latitude selection. + engine : str, optional + xarray backend engine. + + Yields + ------ + tuple of (tuple of (int, int), xr.Dataset) + ``(yearmonth, dataset)`` with ``roughness`` variable. + """ with xr.open_dataset(fn, engine=engine) as ds: ds = convert_lons_lats_ncep(ds, xs, ys) ds = ds.rename({"SFCR_P8_L1_GGA0": "roughness"}) @@ -202,9 +493,42 @@ def prepare_roughness_ncep(fn, yearmonth, xs, ys, engine=engine): def prepare_meta_ncep( - xs, ys, year, month, template, height_config, module, engine=engine -): - fn = next(glob.iglob(template.format(year=year, month=month))) + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], + year: int, + month: int, + template: str, + height_config: dict[str, Any], + module: Any, + engine: str = engine, +) -> xr.Dataset: + """Prepare cutout metadata including coordinates and height. + + Parameters + ---------- + xs : slice or np.ndarray + Longitude selection. + ys : slice or np.ndarray + Latitude selection. + year : int + Reference year. + month : int + Reference month. + template : str + Glob-able file path template with ``{year}`` and ``{month}`` placeholders. + height_config : dict + Configuration dict for height preparation (must include ``tasks_func``). + module : Any + Dataset module reference. + engine : str, optional + xarray backend engine. + + Returns + ------- + xr.Dataset + Metadata dataset with coordinates, time, and ``height``. + """ + fn = next(glob.iglob(template.format(year=year, month=month))) # noqa: PTH207 with xr.open_dataset(fn, engine=engine) as ds: ds = ds.coords.to_dataset() ds = convert_lons_lats_ncep(ds, xs, ys) @@ -227,107 +551,141 @@ def prepare_meta_ncep( return meta -def tasks_monthly_ncep(xs, ys, yearmonths, prepare_func, template, meta_attrs): +def tasks_monthly_ncep( + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], + yearmonths: list[tuple[int, int]], + prepare_func: Any, + template: str, + meta_attrs: dict[str, Any], +) -> list[dict[str, Any]]: + """Build per-month task dicts for NCEP data preparation. + + Parameters + ---------- + xs : slice or np.ndarray + Longitude selection. + ys : slice or np.ndarray + Latitude selection. + yearmonths : list of tuple of (int, int) + Year-month pairs to create tasks for. + prepare_func : callable + Preparation function to invoke per task. + template : str + Glob-able file path template with ``{year}`` and ``{month}`` placeholders. + meta_attrs : dict + Metadata attributes (unused, kept for interface consistency). + + Returns + ------- + list of dict + Task dictionaries keyed for the preparation function. + """ return [ - dict( - prepare_func=prepare_func, - xs=xs, - ys=ys, - fn=next(glob.iglob(template.format(year=ym[0], month=ym[1]))), - engine=engine, - yearmonth=ym, - ) + { + "prepare_func": prepare_func, + "xs": xs, + "ys": ys, + "fn": next(glob.iglob(template.format(year=ym[0], month=ym[1]))), # noqa: PTH207 + "engine": engine, + "yearmonth": ym, + } for ym in yearmonths ] def tasks_height_ncep( - xs, ys, yearmonths, prepare_func, template, meta_attrs, **extra_args -): + xs: slice | np.ndarray[Any, Any], + ys: slice | np.ndarray[Any, Any], + yearmonths: list[tuple[int, int]], + prepare_func: Any, + template: str, + meta_attrs: dict[str, Any], + **extra_args: Any, +) -> list[dict[str, Any]]: + """Build a single task dict for NCEP height data preparation. + + Parameters + ---------- + xs : slice or np.ndarray + Longitude selection. + ys : slice or np.ndarray + Latitude selection. + yearmonths : list of tuple of (int, int) + Year-month pairs passed through to the preparation function. + prepare_func : callable + Preparation function to invoke. + template : str + Glob-able file path to the height data. + meta_attrs : dict + Metadata attributes (unused, kept for interface consistency). + **extra_args + Additional keyword arguments forwarded to the task dict. + + Returns + ------- + list of dict + Single-element list with the height task dictionary. + """ return [ dict( prepare_func=prepare_func, xs=xs, ys=ys, yearmonths=yearmonths, - fn=next(glob.iglob(template)), + fn=next(glob.iglob(template)), # noqa: PTH207 **extra_args, ) ] -weather_data_config = { - "influx": dict( - tasks_func=tasks_monthly_ncep, - prepare_func=prepare_influx_ncep, - template=os.path.join( - config.ncep_dir, # noqa: F821 - "{year}{month:0>2}/dswsfc.*.grb2", - ), - ), - "outflux": dict( - tasks_func=tasks_monthly_ncep, - prepare_func=prepare_outflux_ncep, - template=os.path.join( - config.ncep_dir, # noqa: F821 - "{year}{month:0>2}/uswsfc.*.grb2", - ), - ), - "temperature": dict( - tasks_func=tasks_monthly_ncep, - prepare_func=prepare_temperature_ncep, - template=os.path.join( - config.ncep_dir, # noqa: F821 - "{year}{month:0>2}/tmp2m.*.grb2", - ), - ), - "soil temperature": dict( - tasks_func=tasks_monthly_ncep, - prepare_func=prepare_soil_temperature_ncep, - template=os.path.join( - config.ncep_dir, # noqa: F821 - "{year}{month:0>2}/soilt1.*.grb2", - ), - ), - "wnd10m": dict( - tasks_func=tasks_monthly_ncep, - prepare_func=prepare_wnd10m_ncep, - template=os.path.join( - config.ncep_dir, # noqa: F821 - "{year}{month:0>2}/wnd10m.*.grb2", - ), - ), - "runoff": dict( - tasks_func=tasks_monthly_ncep, - prepare_func=prepare_runoff_ncep, - template=os.path.join( - config.ncep_dir, # noqa: F821 - "{year}{month:0>2}/runoff.*.grb2", - ), - ), - "roughness": dict( - tasks_func=tasks_monthly_ncep, - prepare_func=prepare_roughness_ncep, - template=os.path.join( - config.ncep_dir, # noqa: F821 - "{year}{month:0>2}/flxf.gdas.*.grb2", - ), - ), - "height": dict( - tasks_func=tasks_height_ncep, - prepare_func=prepare_height_ncep, - template=os.path.join( - config.ncep_dir, # noqa: F821 - "height/cdas1.20130101.splgrbanl.grb2", - ), - ), +ncep_dir = os.environ["ATLITE_NCEP_DIR"] + +weather_data_config: dict[str, dict[str, Any]] = { + "influx": { + "tasks_func": tasks_monthly_ncep, + "prepare_func": prepare_influx_ncep, + "template": os.path.join(ncep_dir, "{year}{month:0>2}/dswsfc.*.grb2"), + }, + "outflux": { + "tasks_func": tasks_monthly_ncep, + "prepare_func": prepare_outflux_ncep, + "template": os.path.join(ncep_dir, "{year}{month:0>2}/uswsfc.*.grb2"), + }, + "temperature": { + "tasks_func": tasks_monthly_ncep, + "prepare_func": prepare_temperature_ncep, + "template": os.path.join(ncep_dir, "{year}{month:0>2}/tmp2m.*.grb2"), + }, + "soil temperature": { + "tasks_func": tasks_monthly_ncep, + "prepare_func": prepare_soil_temperature_ncep, + "template": os.path.join(ncep_dir, "{year}{month:0>2}/soilt1.*.grb2"), + }, + "wnd10m": { + "tasks_func": tasks_monthly_ncep, + "prepare_func": prepare_wnd10m_ncep, + "template": os.path.join(ncep_dir, "{year}{month:0>2}/wnd10m.*.grb2"), + }, + "runoff": { + "tasks_func": tasks_monthly_ncep, + "prepare_func": prepare_runoff_ncep, + "template": os.path.join(ncep_dir, "{year}{month:0>2}/runoff.*.grb2"), + }, + "roughness": { + "tasks_func": tasks_monthly_ncep, + "prepare_func": prepare_roughness_ncep, + "template": os.path.join(ncep_dir, "{year}{month:0>2}/flxf.gdas.*.grb2"), + }, + "height": { + "tasks_func": tasks_height_ncep, + "prepare_func": prepare_height_ncep, + "template": os.path.join(ncep_dir, "height/cdas1.20130101.splgrbanl.grb2"), + }, } -meta_data_config = dict( - prepare_func=prepare_meta_ncep, - template=os.path.join( - config.ncep_dir, # noqa: F821 - "{year}{month:0>2}/tmp2m.*.grb2", - ), - height_config=weather_data_config["height"], -) +meta_data_config: dict[str, Any] = { + "prepare_func": prepare_meta_ncep, + "template": os.path.join(ncep_dir, "{year}{month:0>2}/tmp2m.*.grb2"), + "height_config": weather_data_config["height"], +} diff --git a/atlite/datasets/sarah.py b/atlite/datasets/sarah.py index ec7851ff..4c43ceee 100644 --- a/atlite/datasets/sarah.py +++ b/atlite/datasets/sarah.py @@ -1,16 +1,15 @@ # SPDX-FileCopyrightText: Contributors to atlite # # SPDX-License-Identifier: MIT -""" -Module containing specific operations for creating cutouts from the SARAH2 -dataset. -""" +"""Module for creating cutouts from the SARAH2 dataset.""" + +from __future__ import annotations -import glob import logging -import os import warnings from functools import partial +from pathlib import Path +from typing import TYPE_CHECKING, Any, cast import numpy as np import pandas as pd @@ -20,6 +19,9 @@ from atlite.gis import regrid from atlite.pv.solar_position import SolarPosition +if TYPE_CHECKING: + from atlite._types import PathLike + logger = logging.getLogger(__name__) @@ -36,31 +38,30 @@ "solar_azimuth", ], } -static_features = {} +static_features: dict[str, list[str]] = {} -def get_filenames(sarah_dir, coords): +def get_filenames(sarah_dir: str | PathLike, coords: dict[str, Any]) -> pd.DataFrame: """ - Get all files in directory `sarah_dir` relevent for coordinates `coords`. - - This function parses all files in the sarah directory which lay in the time - span of the coordinates. + Build a DataFrame of SIS and SID file paths for the requested time range. Parameters ---------- - sarah_dir : str - coords : atlite.Cutout.coords + sarah_dir : str or PathLike + Root directory containing SARAH NetCDF files. + coords : dict + Coordinate mapping with a ``"time"`` key whose index defines the + requested temporal range. Returns ------- - pd.DataFrame with two columns `sis` and `sid` for and timeindex for all - relevant files. - + pd.DataFrame + DataFrame with columns ``"sis"`` and ``"sid"`` indexed by date. """ - def _filenames_starting_with(name): - pattern = os.path.join(sarah_dir, "**", f"{name}*.nc") - files = pd.Series(glob.glob(pattern, recursive=True)) + def _filenames_starting_with(name: str) -> pd.Series[str]: + pattern = str(Path(sarah_dir) / "**" / f"{name}*.nc") + files = pd.Series([str(f) for f in Path(sarah_dir).rglob(f"{name}*.nc")]) assert not files.empty, ( f"No files found at {pattern}. Make sure " f"sarah_dir points to the correct directory!" @@ -70,48 +71,62 @@ def _filenames_starting_with(name): return files.sort_index() files = pd.concat( - dict(sis=_filenames_starting_with("SIS"), sid=_filenames_starting_with("SID")), + { + "sis": _filenames_starting_with("SIS"), + "sid": _filenames_starting_with("SID"), + }, join="inner", axis=1, ) - # SARAH files are named based on day, need to .floor("D") to compare correctly start = coords["time"].to_index()[0].floor("D") end = coords["time"].to_index()[-1].floor("D") if (start < files.index[0]) or (end > files.index[-1]): logger.error( - f"Files in {sarah_dir} do not cover the whole time span:" - f"\t{start} until {end}" + "Files in %s do not cover the whole time span:\t%s until %s", + sarah_dir, + start, + end, ) return files.loc[(files.index >= start) & (files.index <= end)].sort_index() -def interpolate(ds, dim="time"): +def interpolate( + ds: xr.Dataset | xr.DataArray, dim: str = "time" +) -> xr.Dataset | xr.DataArray: """ - Interpolate NaNs in a dataset along a chunked dimension. + Linearly interpolate NaN values along a dimension. + + Parameters + ---------- + ds : xr.Dataset or xr.DataArray + Input data with potential NaN gaps. + dim : str, default "time" + Dimension along which to interpolate. - This function is similar to xr.Dataset.interpolate_na but can be - used for interpolating along a chunked dimensions (default 'time''). - As the sarah data has mulitple NaN's in the areas of dawn and - nightfall and the data is per default chunked along the time axis, - use this function to interpolate. + Returns + ------- + xr.Dataset or xr.DataArray + Data with NaN values filled by linear interpolation. """ - def _interpolate1d(y): + def _interpolate1d( + y: np.ndarray[Any, np.dtype[np.floating[Any]]], + ) -> np.ndarray[Any, np.dtype[np.floating[Any]]]: nan = np.isnan(y) if nan.all() or not nan.any(): return y - def x(z): + def x(z: np.ndarray[Any, np.dtype[Any]]) -> np.ndarray[Any, np.dtype[np.intp]]: return z.nonzero()[0] y = np.array(y) y[nan] = np.interp(x(nan), x(~nan), y[~nan]) return y - def _interpolate(a): + def _interpolate(a: Any) -> Any: return a.map_blocks( partial(np.apply_along_axis, _interpolate1d, -1), dtype=a.dtype ) @@ -120,7 +135,7 @@ def _interpolate(a): dtypes = {da.dtype for da in data_vars} assert len(dtypes) == 1, "interpolate only supports datasets with homogeneous dtype" - return xr.apply_ufunc( + result: xr.Dataset | xr.DataArray = xr.apply_ufunc( _interpolate, ds, input_core_dims=[[dim]], @@ -130,24 +145,44 @@ def _interpolate(a): dask="allowed", keep_attrs=True, ) + return result -def as_slice(bounds, pad=True): +def as_slice(bounds: slice | tuple[float, float], pad: bool = True) -> slice: """ - Convert coordinate bounds to slice and pad by 0.01. + Convert coordinate bounds to a slice, optionally with small padding. + + Parameters + ---------- + bounds : slice or tuple of float + Existing slice or ``(start, stop)`` tuple. + pad : bool, default True + If *bounds* is a tuple, add ±0.01 padding. + + Returns + ------- + slice + Slice suitable for ``.sel()`` indexing. """ if not isinstance(bounds, slice): - bounds = bounds + (-0.01, 0.01) + bounds = bounds + (-0.01, 0.01) # type: ignore[assignment] bounds = slice(*bounds) return bounds -def hourly_mean(ds): +def hourly_mean(ds: xr.Dataset) -> xr.Dataset: """ - Resample time data to one hour frequency. + Compute hourly means from 30-minute data by averaging consecutive pairs. - In contrast to the standard xarray resample function this preserves - chunks sizes along the time dimension. + Parameters + ---------- + ds : xr.Dataset + Dataset with 30-minute temporal resolution. + + Returns + ------- + xr.Dataset + Dataset resampled to hourly resolution. """ ds1 = ds.isel(time=slice(None, None, 2)) ds2 = ds.isel(time=slice(1, None, 2)) @@ -160,39 +195,37 @@ def hourly_mean(ds): def get_data( - cutout, feature, tmpdir, lock=None, monthly_requests=False, **creation_parameters -): + cutout: Any, + feature: str, + tmpdir: PathLike, + lock: Any = None, + monthly_requests: bool = False, + **creation_parameters: Any, +) -> xr.Dataset: """ - Load stored SARAH data and reformat to matching the given cutout. - - This function loads and resamples the stored SARAH data for a given - `atlite.Cutout`. + Retrieve and process SARAH solar irradiance data for a cutout. Parameters ---------- - cutout : atlite.Cutout + cutout : Cutout + Target cutout defining the spatial and temporal extent. feature : str - Name of the feature data to retrieve. Must be in - `atlite.datasets.sarah.features` - monthly_requests : bool - Takes no effect, only here for consistency with other dataset modules. - concurrent_requests : bool - Takes no effect, only here for consistency with other dataset modules. - **creation_parameters : - Mandatory arguments are: - * 'sarah_dir', str. Directory of the stored SARAH data. - Possible arguments are: - * 'parallel', bool. Whether to load stored files in parallel - mode. Default is False. - * 'sarah_interpolate', bool. Whether to interpolate areas of dawn - and nightfall. This might slow down the loading process if only - a few cores are available. Default is True. + Feature name (e.g. ``"influx"``). + tmpdir : PathLike + Temporary directory for intermediate files. + lock : lock-like, optional + Lock for thread-safe file access. + monthly_requests : bool, default False + Whether to split requests by month. + **creation_parameters + Must include ``sarah_dir``. Optional keys: ``parallel`` (bool), + ``sarah_interpolate`` (bool). Returns ------- - xarray.Dataset - Dataset of dask arrays of the retrieved variables. - + xr.Dataset + Dataset with ``influx_direct``, ``influx_diffuse``, + ``solar_altitude``, and ``solar_azimuth`` variables. """ assert cutout.dt in ("30min", "30T", "h", "1h") @@ -204,41 +237,39 @@ def get_data( creation_parameters.setdefault("sarah_interpolate", True) files = get_filenames(sarah_dir, coords) - open_kwargs = dict(chunks=chunks, parallel=creation_parameters["parallel"]) + open_kwargs = {"chunks": chunks, "parallel": creation_parameters["parallel"]} ds_sis = xr.open_mfdataset(files.sis, combine="by_coords", **open_kwargs)[["SIS"]] ds_sid = xr.open_mfdataset(files.sid, combine="by_coords", **open_kwargs)[["SID"]] ds = xr.merge([ds_sis, ds_sid]) ds = ds.sel(lon=as_slice(cutout.extent[:2]), lat=as_slice(cutout.extent[2:])) - # fix float (im)precission ds = ds.assign_coords( lon=ds.lon.astype(float).round(4), lat=ds.lat.astype(float).round(4) ) - # Interpolate, resample and possible regrid - if creation_parameters["sarah_interpolate"]: - ds = interpolate(ds) - else: - ds = ds.fillna(0) + ds = cast( + "xr.Dataset", + interpolate(ds) if creation_parameters["sarah_interpolate"] else ds.fillna(0), + ) if cutout.dt not in ["30min", "30T"]: ds = hourly_mean(ds) if (cutout.dx != dx) or (cutout.dy != dy): - ds = regrid(ds, coords["lon"], coords["lat"], resampling=Resampling.average) + ds = cast( + "xr.Dataset", + regrid(ds, coords["lon"], coords["lat"], resampling=Resampling.average), + ) - dif_attrs = dict(long_name="Surface Diffuse Shortwave Flux", units="W m-2") + dif_attrs = {"long_name": "Surface Diffuse Shortwave Flux", "units": "W m-2"} ds["influx_diffuse"] = (ds["SIS"] - ds["SID"]).assign_attrs(**dif_attrs) ds = ds.rename({"SID": "influx_direct"}).drop_vars("SIS") ds = ds.assign_coords(x=ds.coords["lon"], y=ds.coords["lat"]) ds = ds.swap_dims({"lon": "x", "lat": "y"}) - # Do not show DeprecationWarning from new SolarPosition calculation (#199) with warnings.catch_warnings(): warnings.simplefilter("ignore", DeprecationWarning) sp = SolarPosition(ds, time_shift="0H") sp = sp.rename({v: f"solar_{v}" for v in sp.data_vars}) - ds = xr.merge([ds, sp]) - - return ds + return xr.merge([ds, sp]) diff --git a/atlite/gis.py b/atlite/gis.py index 0d76eef8..cd45f85f 100644 --- a/atlite/gis.py +++ b/atlite/gis.py @@ -1,14 +1,15 @@ # SPDX-FileCopyrightText: Contributors to atlite # # SPDX-License-Identifier: MIT -""" -Functions for Geographic Information System. -""" +"""Functions for Geographic Information System.""" + +from __future__ import annotations import logging import multiprocessing as mp from collections import OrderedDict from pathlib import Path +from typing import TYPE_CHECKING, Any, cast from warnings import catch_warnings, simplefilter import geopandas as gpd @@ -30,78 +31,127 @@ from shapely.strtree import STRtree from tqdm import tqdm +if TYPE_CHECKING: + from collections.abc import Callable, Iterable, Sequence + + from matplotlib.axes import Axes + from shapely.geometry.base import BaseGeometry + + from atlite._types import ( + CrsLike, + NDArray, + PathLike, + ) + logger = logging.getLogger(__name__) -def get_coords(x, y, time, dx=0.25, dy=0.25, dt="h", **kwargs): +def get_coords( + x: slice, + y: slice, + time: slice, + dx: float = 0.25, + dy: float = 0.25, + dt: str = "h", + **kwargs: Any, +) -> xr.Dataset: """ - Create an cutout coordinate system on the basis of slices and step sizes. + Create cutout coordinates from slices and resolutions. Parameters ---------- x : slice - Numerical slices with lower and upper bound of the x dimension. + Bounds of the x dimension. y : slice - Numerical slices with lower and upper bound of the y dimension. + Bounds of the y dimension. time : slice - Slice with strings with lower and upper bound of the time dimension. + Bounds of the time dimension. dx : float, optional - Step size of the x coordinate. The default is 0.25. + Step size of the x coordinate. dy : float, optional - Step size of the y coordinate. The default is 0.25. + Step size of the y coordinate. dt : str, optional - Frequency of the time coordinate. The default is 'h'. Valid are all - pandas offset aliases. + Frequency of the time coordinate. + **kwargs + Unused keyword arguments. Returns ------- - ds : xarray.Dataset - Dataset with x, y and time variables, representing the whole coordinate - system. - + xarray.Dataset + Dataset containing ``x``, ``y``, and ``time`` coordinates. """ x = slice(*sorted([x.start, x.stop])) y = slice(*sorted([y.start, y.stop])) - ds = xr.Dataset( - { - "x": np.round(np.arange(-180, 180, dx), 9), - "y": np.round(np.arange(-90, 90, dy), 9), - "time": pd.date_range(start="1940", end="now", freq=dt), - } - ) + ds = xr.Dataset({ + "x": np.round(np.arange(-180, 180, dx), 9), + "y": np.round(np.arange(-90, 90, dy), 9), + "time": pd.date_range(start="1940", end="now", freq=dt), + }) ds = ds.assign_coords(lon=ds.coords["x"], lat=ds.coords["y"]) ds = ds.sel(x=x, y=y, time=time) - return ds + return cast("xr.Dataset", ds) -def spdiag(v): +def spdiag(v: NDArray | Sequence[float]) -> sp.sparse.csr_matrix: """ - Create a sparse diagonal matrix from a 1-dimensional array. + Create a sparse diagonal matrix. + + Parameters + ---------- + v : array-like + Values placed on the diagonal. + + Returns + ------- + scipy.sparse.csr_matrix + Sparse diagonal matrix with ``v`` on the diagonal. """ N = len(v) inds = np.arange(N + 1, dtype=np.int32) return sp.sparse.csr_matrix((v, inds[:-1], inds), (N, N)) -def reproject_shapes(shapes, crs1, crs2): +def reproject_shapes( + shapes: Iterable[BaseGeometry] | pd.Series | dict[Any, BaseGeometry], + crs1: CrsLike, + crs2: CrsLike, +) -> Iterable[BaseGeometry] | pd.Series | OrderedDict[Any, BaseGeometry]: """ - Project a collection of shapes from one crs to another. + Reproject a collection of geometries. + + Parameters + ---------- + shapes : iterable, pandas.Series, or dict + Shapes to reproject. + crs1 : any + Source coordinate reference system. + crs2 : any + Target coordinate reference system. + + Returns + ------- + iterable, pandas.Series, or collections.OrderedDict + Reprojected shapes with the same container type where applicable. """ transformer = Transformer.from_crs(crs1, crs2, always_xy=True) - def _reproject_shape(shape): + def _reproject_shape(shape: BaseGeometry) -> BaseGeometry: return transform(transformer.transform, shape) if isinstance(shapes, pd.Series): return shapes.map(_reproject_shape) - elif isinstance(shapes, dict): + if isinstance(shapes, dict): return OrderedDict((k, _reproject_shape(v)) for k, v in shapes.items()) - else: - return list(map(_reproject_shape, shapes)) + return list(map(_reproject_shape, shapes)) -def compute_indicatormatrix(orig, dest, orig_crs=4326, dest_crs=4326): +def compute_indicatormatrix( + orig: gpd.GeoDataFrame | gpd.GeoSeries | Iterable[BaseGeometry], + dest: gpd.GeoDataFrame | gpd.GeoSeries | Iterable[BaseGeometry], + orig_crs: CrsLike = 4326, + dest_crs: CrsLike = 4326, +) -> sp.sparse.lil_matrix: """ Compute the indicatormatrix. @@ -117,7 +167,13 @@ def compute_indicatormatrix(orig, dest, orig_crs=4326, dest_crs=4326): Parameters ---------- orig : Collection of shapely polygons + Origin polygons. dest : Collection of shapely polygons + Destination polygons. + orig_crs : int or CRS, default 4326 + CRS of the origin polygons. + dest_crs : int or CRS, default 4326 + CRS of the destination polygons. Returns ------- @@ -128,15 +184,21 @@ def compute_indicatormatrix(orig, dest, orig_crs=4326, dest_crs=4326): orig = orig.geometry if isinstance(orig, gpd.GeoDataFrame) else orig dest = dest.geometry if isinstance(dest, gpd.GeoDataFrame) else dest dest = reproject_shapes(dest, dest_crs, orig_crs) - indicator = sp.sparse.lil_matrix((len(dest), len(orig)), dtype=float) - tree = STRtree(orig) - idx = dict((hash(o.wkt), i) for i, o in enumerate(orig)) + orig_list: list[Any] | pd.Series = ( + list(orig) if not isinstance(orig, pd.Series) else orig + ) + dest_list: list[Any] | pd.Series = ( + list(dest) if not isinstance(dest, pd.Series) else dest + ) + indicator = sp.sparse.lil_matrix((len(dest_list), len(orig_list)), dtype=float) + tree = STRtree(orig_list) + idx = {hash(o.wkt): i for i, o in enumerate(orig_list)} - for i, d in enumerate(dest): + for i, d in enumerate(dest_list): for o in tree.query(d): # STRtree query returns a list of indices for shapely >= v2.0 if isinstance(o, (int | np.integer)): - o = orig[o] + o = orig_list[o] if o.intersects(d): j = idx[hash(o.wkt)] area = d.intersection(o).area @@ -145,7 +207,12 @@ def compute_indicatormatrix(orig, dest, orig_crs=4326, dest_crs=4326): return indicator -def compute_intersectionmatrix(orig, dest, orig_crs=4326, dest_crs=4326): +def compute_intersectionmatrix( + orig: gpd.GeoDataFrame | gpd.GeoSeries | Iterable[BaseGeometry], + dest: gpd.GeoDataFrame | gpd.GeoSeries | Iterable[BaseGeometry], + orig_crs: CrsLike = 4326, + dest_crs: CrsLike = 4326, +) -> sp.sparse.lil_matrix: """ Compute the intersectionmatrix. @@ -157,7 +224,13 @@ def compute_intersectionmatrix(orig, dest, orig_crs=4326, dest_crs=4326): Parameters ---------- orig : Collection of shapely polygons + Origin polygons. dest : Collection of shapely polygons + Destination polygons. + orig_crs : int or CRS, default 4326 + CRS of the origin polygons. + dest_crs : int or CRS, default 4326 + CRS of the destination polygons. Returns ------- @@ -168,25 +241,44 @@ def compute_intersectionmatrix(orig, dest, orig_crs=4326, dest_crs=4326): orig = orig.geometry if isinstance(orig, gpd.GeoDataFrame) else orig dest = dest.geometry if isinstance(dest, gpd.GeoDataFrame) else dest dest = reproject_shapes(dest, dest_crs, orig_crs) - intersection = sp.sparse.lil_matrix((len(dest), len(orig)), dtype=float) - tree = STRtree(orig) - idx = dict((hash(o.wkt), i) for i, o in enumerate(orig)) + orig_list: list[Any] | pd.Series = ( + list(orig) if not isinstance(orig, pd.Series) else orig + ) + dest_list: list[Any] | pd.Series = ( + list(dest) if not isinstance(dest, pd.Series) else dest + ) + intersection = sp.sparse.lil_matrix((len(dest_list), len(orig_list)), dtype=float) + tree = STRtree(orig_list) + idx = {hash(o.wkt): i for i, o in enumerate(orig_list)} - for i, d in enumerate(dest): + for i, d in enumerate(dest_list): for o in tree.query(d): # STRtree query returns a list of indices for shapely >= v2.0 if isinstance(o, (int | np.integer)): - o = orig[o] + o = orig_list[o] j = idx[hash(o.wkt)] intersection[i, j] = o.intersects(d) return intersection -def padded_transform_and_shape(bounds, res): +def padded_transform_and_shape( + bounds: tuple[float, float, float, float], res: float +) -> tuple[rio.Affine, tuple[int, int]]: """ - Get the (transform, shape) tuple of a raster with resolution `res` and - bounds `bounds`. + Return a padded raster transform and shape. + + Parameters + ---------- + bounds : tuple + Bounding box as ``(left, bottom, right, top)``. + res : float + Raster resolution. + + Returns + ------- + tuple + Affine transform and raster shape covering the padded bounds. """ left, bottom = ((b // res) * res for b in bounds[:2]) right, top = ((b // res + 1) * res for b in bounds[2:]) @@ -195,10 +287,38 @@ def padded_transform_and_shape(bounds, res): def projected_mask( - raster, geom, transform=None, shape=None, crs=None, allow_no_overlap=False, **kwargs -): + raster: rio.DatasetReader, + geom: gpd.GeoSeries, + transform: rio.Affine | None = None, + shape: tuple[int, int] | None = None, + crs: CrsLike | None = None, + allow_no_overlap: bool = False, + **kwargs: Any, +) -> tuple[NDArray, rio.Affine]: """ - Load a mask and optionally project it to target resolution and shape. + Load a raster mask and optionally reproject it. + + Parameters + ---------- + raster : rasterio.DatasetReader + Raster source used to build the mask. + geom : geopandas.GeoSeries + Geometry used for masking. + transform : rasterio.Affine, optional + Target transform. + shape : tuple, optional + Target array shape. + crs : any, optional + Target coordinate reference system. + allow_no_overlap : bool, optional + Whether to return a nodata mask when geometry and raster do not overlap. + **kwargs + Additional keyword arguments passed to ``rasterio.mask.mask``. + + Returns + ------- + tuple + Masked array and its affine transform. """ nodata = kwargs.get("nodata", 255) kwargs.setdefault("indexes", 1) @@ -219,7 +339,7 @@ def projected_mask( return masked, transform_ assert shape is not None and crs is not None - return rio.warp.reproject( + return rio.warp.reproject( # type: ignore[no-any-return] masked, empty(shape), src_crs=raster.crs, @@ -230,19 +350,46 @@ def projected_mask( ) -def pad_extent(src, src_transform, dst_transform, src_crs, dst_crs, **kwargs): +def pad_extent( + src: NDArray, + src_transform: rio.Affine, + dst_transform: rio.Affine, + src_crs: CrsLike, + dst_crs: CrsLike, + **kwargs: Any, +) -> tuple[NDArray, rio.Affine]: """ Pad the extent of `src` by an equivalent of one cell of the target raster. - This ensures that the array is large enough to not be treated as - nodata in all cells of the destination raster. If src.ndim > 2, the - function expects the last two dimensions to be y,x. Additional - keyword arguments are used in `np.pad()`. + This ensures that the array is large enough to not be treated as nodata in + all cells of the destination raster. If ``src.ndim > 2``, the function + expects the last two dimensions to be ``y, x``. + + Parameters + ---------- + src : numpy.ndarray + Source array with spatial axes in the last two dimensions. + src_transform : rasterio.Affine + Transform of the source array. + dst_transform : rasterio.Affine + Transform of the destination raster. + src_crs : any + Source coordinate reference system. + dst_crs : any + Destination coordinate reference system. + **kwargs + Keyword arguments passed to ``numpy.pad``. + + Returns + ------- + tuple + Padded array and updated affine transform. """ if src.size == 0: return src, src_transform - left, top, right, bottom = *(src_transform * (0, 0)), *(src_transform * (1, 1)) + left, top = src_transform * (0, 0) + right, bottom = src_transform * (1, 1) covered = transform_bounds(src_crs, dst_crs, left, bottom, right, top) covered_res = min(abs(covered[2] - covered[0]), abs(covered[3] - covered[1])) pad = int(dst_transform[0] // covered_res * 1.1) @@ -250,7 +397,7 @@ def pad_extent(src, src_transform, dst_transform, src_crs, dst_crs, **kwargs): kwargs.setdefault("mode", "constant") if src.ndim == 2: - return rio.pad(src, src_transform, pad, **kwargs) + return rio.pad(src, src_transform, pad, **kwargs) # type: ignore[no-any-return] npad = ((0, 0),) * (src.ndim - 2) + ((pad, pad), (pad, pad)) padded = np.pad(src, npad, **kwargs) @@ -260,7 +407,9 @@ def pad_extent(src, src_transform, dst_transform, src_crs, dst_crs, **kwargs): return padded, rio.Affine(*transform[:6]) -def shape_availability(geometry, excluder): +def shape_availability( + geometry: gpd.GeoSeries, excluder: ExclusionContainer +) -> tuple[NDArray, rio.Affine]: """ Compute the eligible area in one or more geometries. @@ -268,7 +417,7 @@ def shape_availability(geometry, excluder): ---------- geometry : geopandas.Series Geometry of which the eligible area is computed. If the series contains - more than one geometry, the eligble area of the combined geometries is + more than one geometry, the eligible area of the combined geometries is computed. excluder : atlite.gis.ExclusionContainer Container of all meta data or objects which to exclude, i.e. @@ -278,7 +427,7 @@ def shape_availability(geometry, excluder): ------- masked : np.array Mask whith eligible raster cells indicated by 1 and excluded cells by 0. - transform : rasterion.Affine + transform : rasterio.Affine Affine transform of the mask. """ @@ -326,43 +475,45 @@ def shape_availability(geometry, excluder): def shape_availability_reprojected( - geometry, excluder, dst_transform, dst_crs, dst_shape -): + geometry: gpd.GeoSeries, + excluder: ExclusionContainer, + dst_transform: rio.Affine, + dst_crs: CrsLike, + dst_shape: tuple[int, int], +) -> tuple[NDArray, rio.Affine]: """ Compute and reproject the eligible area of one or more geometries. - The function executes `shape_availability` and reprojects the calculated - mask onto a new raster defined by (dst_transform, dst_crs, dst_shape). - Before reprojecting, the function pads the mask such all non-nodata data - points are projected in full cells of the target raster. The ensures that - all data within the mask are projected correclty (GDAL inherent 'problem'). + The function executes ``shape_availability`` and reprojects the calculated + mask onto a new raster defined by ``(dst_transform, dst_crs, dst_shape)``. + Before reprojecting, the function pads the mask such that all non-nodata + data points are projected in full cells of the target raster. This ensures + that all data within the mask are projected correctly (GDAL inherent + 'problem'). + Parameters ---------- - geometry : geopandas.Series - Geometry in which the eligible area is computed. If the series contains - more than one geometry, the eligble area of the combined geometries is - computed. + geometry : geopandas.GeoSeries + Geometry for which availability is computed. excluder : atlite.gis.ExclusionContainer - Container of all meta data or objects which to exclude, i.e. - rasters and geometries. + Exclusion container defining masked areas. dst_transform : rasterio.Affine Transform of the target raster. - dst_crs : rasterio.CRS/proj.CRS - CRS of the target raster. + dst_crs : any + Coordinate reference system of the target raster. dst_shape : tuple Shape of the target raster. - masked : np.array - Average share of available area per grid cell. 0 indicates excluded, - 1 is fully included. - transform : rasterio.Affine - Affine transform of the mask. + Returns + ------- + tuple + Reprojected availability array and destination transform. """ masked, transform = shape_availability(geometry, excluder) masked, transform = pad_extent( masked, transform, dst_transform, excluder.crs, dst_crs ) - return rio.warp.reproject( + return rio.warp.reproject( # type: ignore[no-any-return] masked.astype(np.uint8), empty(dst_shape), resampling=rio.warp.Resampling.average, @@ -374,11 +525,9 @@ def shape_availability_reprojected( class ExclusionContainer: - """ - Container for exclusion objects and meta data. - """ + """Container for exclusion objects and meta data.""" - def __init__(self, crs=3035, res=100): + def __init__(self, crs: CrsLike = 3035, res: float = 100) -> None: """ Initialize a container for excluded areas. @@ -394,21 +543,25 @@ def __init__(self, crs=3035, res=100): The default is 100. """ - self.rasters = [] - self.geometries = [] - self.crs = crs - self.res = res + self.rasters: list[dict[str, Any]] = [] + self.geometries: list[dict[str, Any]] = [] + self.crs: CrsLike = crs + self.res: float = res def add_raster( self, - raster, - codes=None, - buffer=0, - invert=False, - nodata=255, - allow_no_overlap=False, - crs=None, - ): + raster: PathLike | rio.DatasetReader, + codes: int + | list[int] + | Sequence[int] + | Callable[[NDArray], NDArray] + | None = None, + buffer: float = 0, + invert: bool = False, + nodata: int = 255, + allow_no_overlap: bool = False, + crs: CrsLike | None = None, + ) -> None: """ Register a raster to the ExclusionContainer. @@ -426,6 +579,8 @@ def add_raster( Buffer around the excluded areas in units of ExclusionContainer.crs. Use this to create a buffer around the excluded/included area. The default is 0. + nodata : int, optional + Value to use for nodata pixels. The default is 255. invert : bool, optional Whether to exclude (False) or include (True) the specified areas of the raster. The default is False. @@ -437,18 +592,23 @@ def add_raster( CRS of the raster. Specify this if the raster has invalid crs. """ - d = dict( - raster=raster, - codes=codes, - buffer=buffer, - invert=invert, - nodata=nodata, - allow_no_overlap=allow_no_overlap, - crs=crs, - ) + d: dict[str, Any] = { + "raster": raster, + "codes": codes, + "buffer": buffer, + "invert": invert, + "nodata": nodata, + "allow_no_overlap": allow_no_overlap, + "crs": crs, + } self.rasters.append(d) - def add_geometry(self, geometry, buffer=0, invert=False): + def add_geometry( + self, + geometry: PathLike | gpd.GeoDataFrame | gpd.GeoSeries, + buffer: float = 0, + invert: bool = False, + ) -> None: """ Register a collection of geometries to the ExclusionContainer. @@ -464,12 +624,17 @@ def add_geometry(self, geometry, buffer=0, invert=False): of the geometries. The default is False. """ - d = dict(geometry=geometry, buffer=buffer, invert=invert) + d: dict[str, Any] = {"geometry": geometry, "buffer": buffer, "invert": invert} self.geometries.append(d) - def open_files(self): + def open_files(self) -> None: """ Open rasters and load geometries. + + Raises + ------ + ValueError + If a raster has an invalid CRS and none is provided. """ for d in self.rasters: raster = d["raster"] @@ -506,24 +671,27 @@ def open_files(self): d["geometry"] = geometry @property - def all_closed(self): - """ - Check whether all files in the raster container are closed. - """ + def all_closed(self) -> bool: + """Check whether all files in the raster container are closed.""" return all(isinstance(d["raster"], (str | Path)) for d in self.rasters) and all( isinstance(d["geometry"], (str | Path)) for d in self.geometries ) @property - def all_open(self): - """ - Check whether all files in the raster container are open. - """ + def all_open(self) -> bool: + """Check whether all files in the raster container are open.""" return all( isinstance(d["raster"], rio.DatasetReader) for d in self.rasters ) and all(isinstance(d["geometry"], gpd.GeoSeries) for d in self.geometries) - def __repr__(self): + def __repr__(self) -> str: + """Return string representation of the exclusion container. + + Returns + ------- + str + Human-readable summary of the exclusion container. + """ return ( f"Exclusion Container" f"\n registered rasters: {len(self.rasters)} " @@ -532,17 +700,20 @@ def __repr__(self): ) def compute_shape_availability( - self, geometry, dst_transform=None, dst_crs=None, dst_shape=None - ): + self, + geometry: gpd.GeoDataFrame | gpd.GeoSeries, + dst_transform: rio.Affine | None = None, + dst_crs: CrsLike | None = None, + dst_shape: tuple[int, int] | None = None, + ) -> tuple[NDArray, rio.Affine]: """ - Compute the eligible area in one or more geometries and optionally - reproject. + Compute the eligible area in one or more geometries. Parameters ---------- geometry : geopandas.Series Geometry of which the eligible area is computed. If the series contains - more than one geometry, the eligble area of the combined geometries is + more than one geometry, the eligible area of the combined geometries is computed. dst_transform : rasterio.Affine Transform of the target raster. Define if the availability @@ -558,9 +729,14 @@ def compute_shape_availability( ------- masked : np.array Mask whith eligible raster cells indicated by 1 and excluded cells by 0. - transform : rasterion.Affine + transform : rasterio.Affine Affine transform of the mask. + Raises + ------ + ValueError + If only some of ``dst_transform``, ``dst_crs``, ``dst_shape`` are given. + """ if isinstance(geometry, gpd.GeoDataFrame): geometry = geometry.geometry @@ -576,40 +752,44 @@ def compute_shape_availability( "Arguments dst_transform, dst_crs, dst_shape " "should be all None or all defined." ) + assert ( + dst_transform is not None + and dst_crs is not None + and dst_shape is not None + ) return shape_availability_reprojected( geometry, self, dst_transform, dst_crs, dst_shape ) - else: - return shape_availability(geometry, self) + return shape_availability(geometry, self) def plot_shape_availability( self, - geometry, - ax=None, - set_title=True, - dst_transform=None, - dst_crs=None, - dst_shape=None, - show_kwargs={}, - plot_kwargs={}, - ): + geometry: gpd.GeoDataFrame | gpd.GeoSeries, + ax: Axes | None = None, + set_title: bool = True, + dst_transform: rio.Affine | None = None, + dst_crs: CrsLike | None = None, + dst_shape: tuple[int, int] | None = None, + show_kwargs: dict[str, Any] | None = None, + plot_kwargs: dict[str, Any] | None = None, + ) -> Axes: """ - Plot the eligible area for one or more geometries and optionally - reproject. + Plot the eligible area for one or more geometries. This function uses its own default values for ``rasterio.plot.show`` and ``geopandas.GeoSeries.plot``. Therefore eligible land is drawn in green - Note that this funtion will likely fail if another CRS than the one of the + Note that this function will likely fail if another CRS than the one of the ExclusionContainer is used in the axis (e.g. cartopy projections). Parameters ---------- geometry : geopandas.Series Geometry of which the eligible area is computed. If the series contains - more than one geometry, the eligble area of the combined geometries is + more than one geometry, the eligible area of the combined geometries is computed. - ax : matplotlib Axis, optional - set_title: boolean, optional + ax : matplotlib.axes.Axes, optional + Axes to plot on. If None, a new figure is created. + set_title : boolean, optional Whether to set the title with additional information on the share of eligible land. dst_transform : rasterio.Affine @@ -628,10 +808,14 @@ def plot_shape_availability( Returns ------- - _type_ - _description_ + matplotlib.axes.Axes + Axes with the plotted availability. """ + if show_kwargs is None: + show_kwargs = {} + if plot_kwargs is None: + plot_kwargs = {} import matplotlib.pyplot as plt if isinstance(geometry, gpd.GeoDataFrame): @@ -658,22 +842,43 @@ def plot_shape_availability( return ax -def _init_process(shapes_, excluder_, dst_transform_, dst_crs_, dst_shapes_): - global shapes, excluder, dst_transform, dst_crs, dst_shapes - shapes, excluder = shapes_, excluder_ - dst_transform, dst_crs, dst_shapes = dst_transform_, dst_crs_, dst_shapes_ +_mp_shapes: gpd.GeoSeries +_mp_excluder: ExclusionContainer +_mp_dst_transform: rio.Affine +_mp_dst_crs: CrsLike +_mp_dst_shapes: tuple[int, int] + + +def _init_process( + shapes_: gpd.GeoSeries, + excluder_: ExclusionContainer, + dst_transform_: rio.Affine, + dst_crs_: CrsLike, + dst_shapes_: tuple[int, int], +) -> None: + global _mp_shapes, _mp_excluder, _mp_dst_transform, _mp_dst_crs, _mp_dst_shapes + _mp_shapes, _mp_excluder = shapes_, excluder_ + _mp_dst_transform, _mp_dst_crs, _mp_dst_shapes = ( + dst_transform_, + dst_crs_, + dst_shapes_, + ) -def _process_func(i): - args = (excluder, dst_transform, dst_crs, dst_shapes) +def _process_func(i: Any) -> NDArray: + args = (_mp_excluder, _mp_dst_transform, _mp_dst_crs, _mp_dst_shapes) with catch_warnings(): simplefilter("ignore") - return shape_availability_reprojected(shapes.loc[[i]], *args)[0] + return shape_availability_reprojected(_mp_shapes.loc[[i]], *args)[0] def compute_availabilitymatrix( - cutout, shapes, excluder, nprocesses=None, disable_progressbar=True -): + cutout: Any, + shapes: gpd.GeoDataFrame | gpd.GeoSeries, + excluder: ExclusionContainer, + nprocesses: int | None = None, + disable_progressbar: bool = True, +) -> xr.DataArray: """ Compute the eligible share within cutout cells in the overlap with shapes. @@ -719,12 +924,12 @@ def compute_availabilitymatrix( shapes = shapes.to_crs(excluder.crs) args = (excluder, cutout.transform_r, cutout.crs, cutout.shape) - tqdm_kwargs = dict( - ascii=False, - unit=" gridcells", - total=len(shapes), - desc="Compute availability matrix", - ) + tqdm_kwargs = { + "ascii": False, + "unit": " gridcells", + "total": len(shapes), + "desc": "Compute availability matrix", + } if nprocesses is None: if not disable_progressbar: @@ -741,13 +946,12 @@ def compute_availabilitymatrix( assert excluder.all_closed, ( "For parallelization all raster files in excluder must be closed" ) - kwargs = { - "initializer": _init_process, - "initargs": (shapes, *args), - "maxtasksperchild": 20, - "processes": nprocesses, - } - with mp.get_context("spawn").Pool(**kwargs) as pool: + with mp.get_context("spawn").Pool( + processes=nprocesses, + initializer=_init_process, + initargs=(shapes, *args), + maxtasksperchild=20, + ) as pool: if disable_progressbar: availability = list(pool.map(_process_func, shapes.index)) else: @@ -755,16 +959,32 @@ def compute_availabilitymatrix( tqdm(pool.imap(_process_func, shapes.index), **tqdm_kwargs) ) - availability = np.stack(availability)[:, ::-1] # flip axis, see Notes - if availability.ndim == 4: - availability = availability.squeeze(axis=1) + availability_arr = np.stack(availability)[:, ::-1] # flip axis, see Notes + if availability_arr.ndim == 4: + availability_arr = availability_arr.squeeze(axis=1) coords = [(shapes.index), ("y", cutout.data.y.data), ("x", cutout.data.x.data)] - return xr.DataArray(availability, coords=coords) + return xr.DataArray(availability_arr, coords=coords) -def maybe_swap_spatial_dims(ds, namex="x", namey="y"): +def maybe_swap_spatial_dims( + ds: xr.Dataset | xr.DataArray, namex: str = "x", namey: str = "y" +) -> xr.Dataset | xr.DataArray: """ - Swap order of spatial dimensions according to atlite concention. + Ensure spatial coordinates follow atlite's axis ordering. + + Parameters + ---------- + ds : xarray.Dataset or xarray.DataArray + Object with spatial coordinates. + namex : str, optional + Name of the x dimension. + namey : str, optional + Name of the y dimension. + + Returns + ------- + xarray.Dataset or xarray.DataArray + Input object with spatial dimensions reversed if needed. """ swaps = {} lx, rx = ds.indexes[namex][[0, -1]] @@ -775,10 +995,10 @@ def maybe_swap_spatial_dims(ds, namex="x", namey="y"): if uy < ly: swaps[namey] = slice(None, None, -1) - return ds.isel(**swaps) if swaps else ds + return ds.isel(swaps) if swaps else ds -def _as_transform(x, y): +def _as_transform(x: pd.Index, y: pd.Index) -> rio.Affine: lx, rx = x[[0, -1]] ly, uy = y[[0, -1]] @@ -788,28 +1008,35 @@ def _as_transform(x, y): return rio.Affine(dx, 0, lx - dx / 2, 0, dy, ly - dy / 2) -def regrid(ds, dimx, dimy, **kwargs): +def regrid( + ds: xr.Dataset | xr.DataArray, + dimx: pd.Index, + dimy: pd.Index, + **kwargs: Any, +) -> xr.Dataset | xr.DataArray: """ - Interpolate Dataset or DataArray `ds` to a new grid, using rasterio's - reproject facility. + Interpolate `ds` to a new spatial grid using rasterio's reproject. See also: https://mapbox.github.io/rasterio/topics/resampling.html Parameters ---------- - ds : xr.Dataset|xr.DataArray - N-dim data on a spatial grid - dimx : pd.Index - New x-coordinates in destination crs. - dimx.name MUST refer to x-coord of ds. - dimy : pd.Index - New y-coordinates in destination crs. - dimy.name MUST refer to y-coord of ds. - **kwargs : - Arguments passed to rio.wrap.reproject; of note: - - resampling is one of gis.Resampling.{average,cubic,bilinear,nearest} - - src_crs, dst_crs define the different crs (default: EPSG 4326, ie latlong) + ds : xarray.Dataset or xarray.DataArray + Data on a spatial grid. + dimx : pandas.Index + Target x coordinates. ``dimx.name`` must match the source x dimension. + dimy : pandas.Index + Target y coordinates. ``dimy.name`` must match the source y dimension. + **kwargs + Keyword arguments passed to ``rasterio.warp.reproject``; of note: + ``resampling`` is one of + ``gis.Resampling.{average,cubic,bilinear,nearest}``; ``src_crs`` and + ``dst_crs`` define the source/target CRS (default: EPSG 4326, latlong). + Returns + ------- + xarray.Dataset or xarray.DataArray + Regridded object on the target coordinates. """ namex = dimx.name namey = dimy.name @@ -824,7 +1051,7 @@ def regrid(ds, dimx, dimy, **kwargs): kwargs.setdefault("src_crs", CRS.from_epsg(4326)) kwargs.setdefault("dst_crs", CRS.from_epsg(4326)) - def _reproject(src, **kwargs): + def _reproject(src: NDArray, **kwargs: Any) -> NDArray: shape = src.shape[:-2] + dst_shape src, trans = pad_extent( src, @@ -841,31 +1068,26 @@ def _reproject(src, **kwargs): if reprojected.ndim != src.ndim: reprojected = reprojected.squeeze(axis=0) - return reprojected + return cast("NDArray", reprojected) data_vars = ds.data_vars.values() if isinstance(ds, xr.Dataset) else (ds,) dtypes = {da.dtype for da in data_vars} assert len(dtypes) == 1, "regrid can only reproject datasets with homogeneous dtype" - return ( - xr.apply_ufunc( - _reproject, - ds, - input_core_dims=[[namey, namex]], - output_core_dims=[["yout", "xout"]], - output_dtypes=[dtypes.pop()], - dask_gufunc_kwargs=dict( - output_sizes={"yout": dst_shape[0], "xout": dst_shape[1]} - ), - dask="parallelized", - kwargs=kwargs, - ) - .rename({"yout": namey, "xout": namex}) - .assign_coords( - **{ - namey: (namey, dimy.data, ds.coords[namey].attrs), - namex: (namex, dimx.data, ds.coords[namex].attrs), - } - ) - .assign_attrs(**ds.attrs) + result = xr.apply_ufunc( + _reproject, + ds, + input_core_dims=[[namey, namex]], + output_core_dims=[["yout", "xout"]], + output_dtypes=[dtypes.pop()], + dask_gufunc_kwargs={ + "output_sizes": {"yout": dst_shape[0], "xout": dst_shape[1]} + }, + dask="parallelized", + kwargs=kwargs, ) + result = result.rename({"yout": namey, "xout": namex}).assign_coords({ + namey: (namey, dimy.data, ds.coords[namey].attrs), + namex: (namex, dimx.data, ds.coords[namex].attrs), + }) + return cast("xr.Dataset | xr.DataArray", result.assign_attrs(**ds.attrs)) diff --git a/atlite/hydro.py b/atlite/hydro.py index df08aacc..e449d1cf 100644 --- a/atlite/hydro.py +++ b/atlite/hydro.py @@ -1,9 +1,9 @@ # SPDX-FileCopyrightText: Contributors to atlite # # SPDX-License-Identifier: MIT -""" -Module involving hydro operations in atlite. -""" +"""Module involving hydro operations in atlite.""" + +from __future__ import annotations import logging from collections import namedtuple @@ -20,17 +20,58 @@ Basins = namedtuple("Basins", ["plants", "meta", "shapes"]) -def find_basin(shapes, lon, lat): +def find_basin( + shapes: gpd.GeoSeries, + lon: float, + lat: float, +) -> int: + """ + Find the basin containing a point. + + Parameters + ---------- + shapes : geopandas.GeoSeries + Basin geometries indexed by basin id. + lon : float + Longitude of the point. + lat : float + Latitude of the point. + + Returns + ------- + int + Basin id containing the point. + """ hids = shapes.index[shapes.intersects(Point(lon, lat))] if len(hids) > 1: logger.warning( - f"The point ({lon}, {lat}) is in several basins: {hids}. " - "Assuming the first one." + "The point (%s, %s) is in several basins: %s. Assuming the first one.", + lon, + lat, + hids, ) - return hids[0] - - -def find_upstream_basins(meta, hid): + return int(hids[0]) + + +def find_upstream_basins( + meta: pd.DataFrame, + hid: int, +) -> list[int]: + """ + Collect all upstream basins of a basin. + + Parameters + ---------- + meta : pandas.DataFrame + Basin metadata with a ``NEXT_DOWN`` column. + hid : int + Basin id from which to start. + + Returns + ------- + list[int] + Basin ids including the selected basin and all upstream basins. + """ hids = [hid] i = 0 while i < len(hids): @@ -39,7 +80,28 @@ def find_upstream_basins(meta, hid): return hids -def determine_basins(plants, hydrobasins, show_progress=False): +def determine_basins( + plants: pd.DataFrame, + hydrobasins: str | gpd.GeoDataFrame, + show_progress: bool = False, +) -> Basins: + """ + Determine local and upstream basins for hydro plants. + + Parameters + ---------- + plants : pandas.DataFrame + Plant table with ``lon`` and ``lat`` columns. + hydrobasins : str or geopandas.GeoDataFrame + HydroBASINS data source or loaded basin geometries. + show_progress : bool, default False + Whether to show a progress bar. + + Returns + ------- + Basins + Basin assignments, metadata, and geometries for the plants. + """ if isinstance(hydrobasins, str): hydrobasins = gpd.read_file(hydrobasins) @@ -48,9 +110,12 @@ def determine_basins(plants, hydrobasins, show_progress=False): f"but received `type(hydrobasins) = {type(hydrobasins)}`" ) - missing_columns = pd.Index( - ["HYBAS_ID", "DIST_MAIN", "NEXT_DOWN", "geometry"] - ).difference(hydrobasins.columns) + missing_columns = pd.Index([ + "HYBAS_ID", + "DIST_MAIN", + "NEXT_DOWN", + "geometry", + ]).difference(hydrobasins.columns) assert missing_columns.empty, ( "Couldn't find the column(s) {} in the hydrobasins dataset.".format( ", ".join(missing_columns) @@ -62,7 +127,7 @@ def determine_basins(plants, hydrobasins, show_progress=False): meta = hydrobasins[hydrobasins.columns.difference(("geometry",))] shapes = hydrobasins["geometry"] - plant_basins = [] + plant_basins: list[tuple[int, list[int]]] = [] for p in tqdm( plants.itertuples(), disable=not show_progress, @@ -70,18 +135,40 @@ def determine_basins(plants, hydrobasins, show_progress=False): ): hid = find_basin(shapes, p.lon, p.lat) plant_basins.append((hid, find_upstream_basins(meta, hid))) - plant_basins = pd.DataFrame( + plant_basins_df = pd.DataFrame( plant_basins, columns=["hid", "upstream"], index=plants.index ) - unique_basins = pd.Index(plant_basins["upstream"].sum()).unique().rename("hid") - return Basins(plant_basins, meta.loc[unique_basins], shapes.loc[unique_basins]) + unique_basins = pd.Index(plant_basins_df["upstream"].sum()).unique().rename("hid") + return Basins(plant_basins_df, meta.loc[unique_basins], shapes.loc[unique_basins]) def shift_and_aggregate_runoff_for_plants( - basins, runoff, flowspeed=1, show_progress=False -): - inflow = xr.DataArray( + basins: Basins, + runoff: xr.DataArray, + flowspeed: float = 1, + show_progress: bool = False, +) -> xr.DataArray: + """ + Shift basin runoff in time and aggregate it per plant. + + Parameters + ---------- + basins : Basins + Basin mappings and metadata for the plants. + runoff : xarray.DataArray + Runoff time series indexed by ``hid`` and ``time``. + flowspeed : float, default 1 + Flow speed in m/s used to convert distance to travel time. + show_progress : bool, default False + Whether to show a progress bar. + + Returns + ------- + xarray.DataArray + Plant inflow time series indexed by ``plant`` and ``time``. + """ + inflow: xr.DataArray = xr.DataArray( np.zeros((len(basins.plants), runoff.indexes["time"].size)), [("plant", basins.plants.index), runoff.coords["time"]], ) @@ -91,12 +178,12 @@ def shift_and_aggregate_runoff_for_plants( disable=not show_progress, desc="Shift and aggregate runoff by plant", ): - inflow_plant = inflow.loc[dict(plant=ppl.Index)] - distances = ( + inflow_plant: xr.DataArray = inflow.loc[{"plant": ppl.Index}] + distances: pd.Series = ( basins.meta.loc[ppl.upstream, "DIST_MAIN"] - basins.meta.at[ppl.hid, "DIST_MAIN"] ) - nhours = (distances / (flowspeed * 3.6) + 0.5).astype(int) + nhours: pd.Series = (distances / (flowspeed * 3.6) + 0.5).astype(int) for b in ppl.upstream: inflow_plant += runoff.sel(hid=b).roll(time=nhours.at[b]) diff --git a/atlite/pv/__init__.py b/atlite/pv/__init__.py index c5528ebf..e05416cb 100644 --- a/atlite/pv/__init__.py +++ b/atlite/pv/__init__.py @@ -1,6 +1,38 @@ # SPDX-FileCopyrightText: Contributors to atlite # # SPDX-License-Identifier: MIT -""" -atlite pv module. -""" +"""Photovoltaic modeling functions.""" + +from __future__ import annotations + +from atlite.pv.irradiation import ( + DiffuseHorizontalIrrad, + TiltedDiffuseIrrad, + TiltedDirectIrrad, + TiltedGroundIrrad, + TiltedIrradiation, +) +from atlite.pv.orientation import ( + SurfaceOrientation, + get_orientation, + make_constant, + make_latitude, + make_latitude_optimal, +) +from atlite.pv.solar_panel_model import SolarPanelModel +from atlite.pv.solar_position import SolarPosition + +__all__: list[str] = [ + "DiffuseHorizontalIrrad", + "TiltedDirectIrrad", + "TiltedDiffuseIrrad", + "TiltedGroundIrrad", + "TiltedIrradiation", + "SurfaceOrientation", + "get_orientation", + "make_constant", + "make_latitude", + "make_latitude_optimal", + "SolarPanelModel", + "SolarPosition", +] diff --git a/atlite/pv/irradiation.py b/atlite/pv/irradiation.py index 8275d58f..3cf9f526 100644 --- a/atlite/pv/irradiation.py +++ b/atlite/pv/irradiation.py @@ -2,20 +2,66 @@ # # SPDX-License-Identifier: MIT +"""Solar irradiation decomposition and transposition models.""" + +from __future__ import annotations + import logging +from typing import TYPE_CHECKING import numpy as np from dask.array import cos, fmax, fmin, radians, sin, sqrt +if TYPE_CHECKING: + import xarray as xr + + from atlite._types import ( + ClearskyModel, + IrradiationType, + TrackingType, + TrigonModel, + ) + logger = logging.getLogger(__name__) -def DiffuseHorizontalIrrad(ds, solar_position, clearsky_model, influx): - # Clearsky model from Reindl 1990 to split downward radiation into direct - # and diffuse contributions. Should switch to more up-to-date model, f.ex. - # Ridley et al. (2010) http://dx.doi.org/10.1016/j.renene.2009.07.018 , - # Lauret et al. (2013):http://dx.doi.org/10.1016/j.renene.2012.01.049 +def DiffuseHorizontalIrrad( + ds: xr.Dataset, + solar_position: xr.Dataset, + clearsky_model: ClearskyModel | None, + influx: xr.DataArray, +) -> xr.DataArray: + """ + Estimate diffuse horizontal irradiation from total horizontal irradiation. + + Clearsky model from Reindl 1990 to split downward radiation into direct + and diffuse contributions. Should switch to more up-to-date model, e.g. + Ridley et al. (2010) http://dx.doi.org/10.1016/j.renene.2009.07.018 , + Lauret et al. (2013): http://dx.doi.org/10.1016/j.renene.2012.01.049 + + Parameters + ---------- + ds : xarray.Dataset + Dataset containing top-of-atmosphere irradiation and, for the enhanced + model, temperature and humidity. + solar_position : xarray.Dataset + Solar position with an ``altitude`` variable in radians. + clearsky_model : str or None + Reindl clearsky model to use, either ``"simple"`` or ``"enhanced"``. + If None, the model is chosen from the available data. + influx : xarray.DataArray + Total horizontal irradiation. + + Returns + ------- + xarray.DataArray + Diffuse horizontal irradiation. + Raises + ------ + KeyError + If ``clearsky_model`` is not ``'simple'`` or ``'enhanced'``. + """ sinaltitude = sin(solar_position["altitude"]) influx_toa = ds["influx_toa"] @@ -25,14 +71,9 @@ def DiffuseHorizontalIrrad(ds, solar_position, clearsky_model, influx): ) # Reindl 1990 clearsky model - k = influx / influx_toa # clearsky index - # k.values[k.values > 1.0] = 1.0 - # k = k.rename('clearsky index') if clearsky_model == "simple": - # Simple Reindl model without ambient air temperature and - # relative humidity fraction = ( ((k > 0.0) & (k <= 0.3)) * fmin(1.0, 1.020 - 0.254 * k + 0.0123 * sinaltitude) @@ -41,8 +82,6 @@ def DiffuseHorizontalIrrad(ds, solar_position, clearsky_model, influx): + (k >= 0.78) * fmax(0.1, 0.486 * k - 0.182 * sinaltitude) ) elif clearsky_model == "enhanced": - # Enhanced Reindl model with ambient air temperature and relative - # humidity T = ds["temperature"] rh = ds["humidity"] @@ -66,16 +105,38 @@ def DiffuseHorizontalIrrad(ds, solar_position, clearsky_model, influx): else: raise KeyError("`clearsky model` must be chosen from 'simple' and 'enhanced'") - # Set diffuse fraction to one when the sun isn't up - # fraction = fraction.where(sinaltitude >= sin(radians(threshold))).fillna(1.0) - # fraction = fraction.rename('fraction index') + result: xr.DataArray = (influx * fraction).rename("diffuse horizontal") + return result - return (influx * fraction).rename("diffuse horizontal") +def TiltedDiffuseIrrad( + ds: xr.Dataset, + solar_position: xr.Dataset, + surface_orientation: xr.Dataset, + direct: xr.DataArray, + diffuse: xr.DataArray, +) -> xr.DataArray: + """ + Calculate diffuse irradiation on a tilted surface (Hay-Davies model). -def TiltedDiffuseIrrad(ds, solar_position, surface_orientation, direct, diffuse): - # Hay-Davies Model + Parameters + ---------- + ds : xarray.Dataset + Dataset containing top-of-atmosphere irradiation. + solar_position : xarray.Dataset + Solar position with an ``altitude`` variable in radians. + surface_orientation : xarray.Dataset + Surface orientation including ``cosincidence`` and ``slope``. + direct : xarray.DataArray + Direct horizontal irradiation. + diffuse : xarray.DataArray + Diffuse horizontal irradiation. + Returns + ------- + xarray.DataArray + Diffuse tilted irradiation. + """ sinaltitude = sin(solar_position["altitude"]) influx_toa = ds["influx_toa"] @@ -85,13 +146,9 @@ def TiltedDiffuseIrrad(ds, solar_position, surface_orientation, direct, diffuse) influx = direct + diffuse with np.errstate(divide="ignore", invalid="ignore"): - # brightening factor - f = sqrt(direct / influx).fillna(0.0) + f = sqrt(direct / influx).fillna(0.0) # brightening factor + A = direct / influx_toa # anisotropy factor - # anisotropy factor - A = direct / influx_toa - - # geometric factor R_b = cosincidence / sinaltitude diffuse_t = ( @@ -101,60 +158,122 @@ def TiltedDiffuseIrrad(ds, solar_position, surface_orientation, direct, diffuse) + A * R_b ) * diffuse + if ( + logger.isEnabledFor(logging.WARNING) + and ((diffuse_t < 0.0) & (sinaltitude > sin(radians(1.0)))).any() + ): + logger.warning("diffuse_t exhibits negative values above altitude threshold.") + # fixup: clip all negative values (unclear why it gets negative) # note: REatlas does not do the fixup - if logger.isEnabledFor(logging.WARNING): - if ((diffuse_t < 0.0) & (sinaltitude > sin(radians(1.0)))).any(): - logger.warning( - "diffuse_t exhibits negative values above altitude threshold." - ) - with np.errstate(invalid="ignore"): diffuse_t = diffuse_t.clip(min=0).fillna(0) - return diffuse_t.rename("diffuse tilted") + result: xr.DataArray = diffuse_t.rename("diffuse tilted") + return result + + +def TiltedDirectIrrad( + solar_position: xr.Dataset, surface_orientation: xr.Dataset, direct: xr.DataArray +) -> xr.DataArray: + """ + Calculate direct irradiation on a tilted surface. + Parameters + ---------- + solar_position : xarray.Dataset + Solar position with an ``altitude`` variable in radians. + surface_orientation : xarray.Dataset + Surface orientation including ``cosincidence``. + direct : xarray.DataArray + Direct horizontal irradiation. -def TiltedDirectIrrad(solar_position, surface_orientation, direct): + Returns + ------- + xarray.DataArray + Direct tilted irradiation. + """ sinaltitude = sin(solar_position["altitude"]) cosincidence = surface_orientation["cosincidence"] - # geometric factor R_b = cosincidence / sinaltitude - return (R_b * direct).rename("direct tilted") + result: xr.DataArray = (R_b * direct).rename("direct tilted") + return result -def _albedo(ds, influx): - if "albedo" in ds: - albedo = ds["albedo"] - elif "outflux" in ds: - albedo = (ds["outflux"] / influx.where(influx != 0)).fillna(0).clip(max=1) - else: - raise AssertionError( - "Need either albedo or outflux as a variable in the dataset. " - "Check your cutout and dataset module." - ) +def _albedo(ds: xr.Dataset, influx: xr.DataArray) -> xr.DataArray: + """ + Retrieve or derive surface albedo from the dataset. - return albedo + Parameters + ---------- + ds : xarray.Dataset + Dataset containing either ``albedo`` or ``outflux``. + influx : xarray.DataArray + Downward surface irradiation used when deriving albedo from outflux. + Returns + ------- + xarray.DataArray + Surface albedo. -def TiltedGroundIrrad(ds, solar_position, surface_orientation, influx): + Raises + ------ + AssertionError + If the dataset lacks both ``albedo`` and ``outflux`` variables. + """ + if "albedo" in ds: + return ds["albedo"] + if "outflux" in ds: + return (ds["outflux"] / influx.where(influx != 0)).fillna(0).clip(max=1) + raise AssertionError( + "Need either albedo or outflux as a variable in the dataset. " + "Check your cutout and dataset module." + ) + + +def TiltedGroundIrrad( + ds: xr.Dataset, + solar_position: xr.Dataset, + surface_orientation: xr.Dataset, + influx: xr.DataArray, +) -> xr.DataArray: + """ + Calculate ground-reflected irradiation on a tilted surface. + + Parameters + ---------- + ds : xarray.Dataset + Dataset containing albedo information or reflected outflux. + solar_position : xarray.Dataset + Solar position dataset. + surface_orientation : xarray.Dataset + Surface orientation including ``slope``. + influx : xarray.DataArray + Total horizontal irradiation. + + Returns + ------- + xarray.DataArray + Ground-reflected tilted irradiation. + """ surface_slope = surface_orientation["slope"] ground_t = influx * _albedo(ds, influx) * (1.0 - cos(surface_slope)) / 2.0 - return ground_t.rename("ground tilted") + result: xr.DataArray = ground_t.rename("ground tilted") + return result def TiltedIrradiation( - ds, - solar_position, - surface_orientation, - trigon_model, - clearsky_model, - tracking=0, - altitude_threshold=1.0, - irradiation="total", -): + ds: xr.Dataset, + solar_position: xr.Dataset, + surface_orientation: xr.Dataset, + trigon_model: TrigonModel, + clearsky_model: ClearskyModel | None, + tracking: TrackingType | int | None = 0, + altitude_threshold: float = 1.0, + irradiation: IrradiationType = "total", +) -> xr.DataArray: """ Calculate the irradiation on a tilted surface. @@ -168,7 +287,8 @@ def TiltedIrradiation( surface_orientation : xarray.Dataset Surface orientation calculated using atlite.orientation.SurfaceOrientation. trigon_model : str - Type of trigonometry model. Defaults to 'simple'if used via `convert_irradiation`. + Type of trigonometry model. Defaults to 'simple' if used via + `convert_irradiation`. clearsky_model : str or None Either the 'simple' or the 'enhanced' Reindl clearsky model. The default choice of None will choose dependending on @@ -176,6 +296,8 @@ def TiltedIrradiation( incorporates ambient air temperature and relative humidity. NOTE: this option is only used if the used climate dataset doesn't provide direct and diffuse irradiation separately! + tracking : int or str, default 0 + Type of solar tracking. 0 for fixed, other values for tracking modes. altitude_threshold : float Threshold for solar altitude in degrees. Values in range (0, altitude_threshold] will be set to zero. Default value equals 1.0 degrees. @@ -192,10 +314,16 @@ def TiltedIrradiation( result : xarray.DataArray The desired irradiation quantity on the tilted surface. + Raises + ------ + AssertionError + If the dataset lacks required irradiation variables. + ValueError + If ``irradiation`` is not a recognized type. """ influx_toa = ds["influx_toa"] - def clip(influx, influx_max): + def clip(influx: xr.DataArray, influx_max: xr.DataArray) -> xr.DataArray: # use .data in clip due to dask-xarray incompatibilities return influx.clip(min=0, max=influx_max.transpose(*influx.dims).data) @@ -211,6 +339,7 @@ def clip(influx, influx_max): "Need either influx or influx_direct and influx_diffuse in the " "dataset. Check your cutout and dataset module." ) + if trigon_model == "simple": k = surface_orientation["cosincidence"] / sin(solar_position["altitude"]) if tracking != "dual": @@ -235,6 +364,7 @@ def clip(influx, influx_max): total_t = direct_t + diffuse_t + ground_t + result: xr.DataArray if irradiation == "total": result = total_t.rename("total tilted") elif irradiation == "direct": @@ -243,11 +373,13 @@ def clip(influx, influx_max): result = diffuse_t.rename("diffuse tilted") elif irradiation == "ground": result = ground_t.rename("ground tilted") + else: + msg = f"Unknown irradiation type: {irradiation}" + raise ValueError(msg) # The solar_position algorithms have a high error for small solar altitude # values, leading to big overall errors from the 1/sinaltitude factor. - # => Suppress irradiation below solar altitudes of 1 deg. - + # => Suppress irradiation below altitude_threshold. cap_alt = solar_position["altitude"] < radians(altitude_threshold) result = result.where(~(cap_alt | (direct + diffuse <= 0.01)), 0) result.attrs["units"] = "W m**-2" diff --git a/atlite/pv/orientation.py b/atlite/pv/orientation.py index d5240546..ced88ab1 100644 --- a/atlite/pv/orientation.py +++ b/atlite/pv/orientation.py @@ -2,31 +2,60 @@ # # SPDX-License-Identifier: MIT +"""Panel orientation and tilt angle utilities.""" + +from __future__ import annotations + import sys +from typing import TYPE_CHECKING, Any import numpy as np import xarray as xr from dask.array import arccos, arcsin, arctan, cos, logical_and, radians, sin from numpy import pi +if TYPE_CHECKING: + from collections.abc import Callable + + from atlite._types import OrientationName, TrackingType + -def get_orientation(name, **params): +def get_orientation( + name: OrientationName | dict[str, Any], **params: Any +) -> Callable[[xr.DataArray, xr.DataArray, xr.Dataset], dict[str, xr.DataArray]]: """ - Definitions: - -`slope` is the angle between ground and panel. - -`azimuth` is the clockwise angle from North. - i.e. azimuth = 180 faces exactly South + Return an orientation factory by name. + + Conventions: + - ``slope`` is the angle between ground and panel. + - ``azimuth`` is the clockwise angle from North (i.e. azimuth=180 faces South). + + Parameters + ---------- + name : str or dict + Orientation name or parameter dictionary containing ``name``. + **params + Parameters passed to the orientation factory. + + Returns + ------- + callable + Orientation function returning ``slope`` and ``azimuth``. """ if isinstance(name, dict): params = name name = params.pop("name", "constant") - return getattr(sys.modules[__name__], f"make_{name}")(**params) + result: Callable[ + [xr.DataArray, xr.DataArray, xr.Dataset], dict[str, xr.DataArray] + ] = getattr(sys.modules[__name__], f"make_{name}")(**params) + return result -def make_latitude_optimal(): +def make_latitude_optimal() -> Callable[ + [xr.DataArray, xr.DataArray, xr.Dataset], dict[str, xr.DataArray] +]: """ - Returns an optimal tilt angle for the given ``lat``, assuming that the - panel is facing towards the equator, using a simple method from [1]. + Return an optimal tilt angle assuming the panel faces the equator. This method only works for latitudes between 0 and 50. For higher latitudes, a static 40 degree angle is returned. @@ -40,14 +69,32 @@ def make_latitude_optimal(): [2] http://dx.doi.org/10.1016/j.solener.2010.12.014 [3] https://github.com/renewables-ninja/gsee/blob/master/gsee/pv.py - Parameters - ---------- - lat : float - Latitude in degrees. - + Returns + ------- + callable + Orientation function returning latitude-optimal ``slope`` and ``azimuth``. """ - def latitude_optimal(lon, lat, solar_position): + def latitude_optimal( + lon: xr.DataArray, lat: xr.DataArray, solar_position: xr.Dataset + ) -> dict[str, xr.DataArray]: + """ + Build an orientation with latitude-dependent optimal tilt. + + Parameters + ---------- + lon : xarray.DataArray + Longitudes in radians. + lat : xarray.DataArray + Latitudes in radians. + solar_position : xarray.Dataset + Solar position dataset. + + Returns + ------- + dict + Mapping with ``slope`` and ``azimuth``. + """ slope = np.empty_like(lat.values) below_25 = np.abs(lat.values) <= np.radians(25) @@ -61,37 +108,141 @@ def latitude_optimal(lon, lat, solar_position): # South orientation for panels on northern hemisphere and vice versa azimuth = np.where(lat.values < 0, 0, pi) - return dict( - slope=xr.DataArray(slope, coords=lat.coords), - azimuth=xr.DataArray(azimuth, coords=lat.coords), - ) + return { + "slope": xr.DataArray(slope, coords=lat.coords), + "azimuth": xr.DataArray(azimuth, coords=lat.coords), + } return latitude_optimal -def make_constant(slope, azimuth): - slope = radians(slope) - azimuth = radians(azimuth) +def make_constant( + slope: float, azimuth: float +) -> Callable[[xr.DataArray, xr.DataArray, xr.Dataset], dict[str, xr.DataArray]]: + """ + Create an orientation function with constant slope and azimuth. - def constant(lon, lat, solar_position): - return dict(slope=slope, azimuth=azimuth) + Parameters + ---------- + slope : float + Surface slope in degrees. + azimuth : float + Surface azimuth in degrees clockwise from north. + + Returns + ------- + callable + Orientation function returning constant ``slope`` and ``azimuth``. + """ + slope_rad = radians(slope) + azimuth_rad = radians(azimuth) + + def constant( + lon: xr.DataArray, lat: xr.DataArray, solar_position: xr.Dataset + ) -> dict[str, xr.DataArray]: + """ + Return the configured constant panel orientation. + + Parameters + ---------- + lon : xarray.DataArray + Longitudes in radians. + lat : xarray.DataArray + Latitudes in radians. + solar_position : xarray.Dataset + Solar position dataset. + + Returns + ------- + dict + Mapping with constant ``slope`` and ``azimuth``. + """ + return {"slope": slope_rad, "azimuth": azimuth_rad} return constant -def make_latitude(azimuth=180): - azimuth = radians(azimuth) +def make_latitude( + azimuth: float = 180, +) -> Callable[[xr.DataArray, xr.DataArray, xr.Dataset], dict[str, xr.DataArray]]: + """ + Create an orientation function with slope equal to latitude. + + Parameters + ---------- + azimuth : float, default 180 + Surface azimuth in degrees clockwise from north. - def latitude(lon, lat, solar_position): - return dict(slope=lat, azimuth=azimuth) + Returns + ------- + callable + Orientation function using latitude as slope. + """ + azimuth_rad = radians(azimuth) + + def latitude( + lon: xr.DataArray, lat: xr.DataArray, solar_position: xr.Dataset + ) -> dict[str, xr.DataArray]: + """ + Return an orientation with slope equal to latitude. + + Parameters + ---------- + lon : xarray.DataArray + Longitudes in radians. + lat : xarray.DataArray + Latitudes in radians. + solar_position : xarray.Dataset + Solar position dataset. + + Returns + ------- + dict + Mapping with latitude-based ``slope`` and constant ``azimuth``. + """ + return {"slope": lat, "azimuth": azimuth_rad} return latitude -def SurfaceOrientation(ds, solar_position, orientation, tracking=None): +def SurfaceOrientation( + ds: xr.Dataset, + solar_position: xr.Dataset, + orientation: Callable[ + [xr.DataArray, xr.DataArray, xr.Dataset], dict[str, xr.DataArray] + ], + tracking: TrackingType | None = None, +) -> xr.Dataset: """ Compute cos(incidence) for slope and panel azimuth. + Parameters + ---------- + ds : xarray.Dataset + Weather dataset containing ``lon`` and ``lat`` coordinates in degrees. + solar_position : xarray.Dataset + Dataset with solar position variables ``altitude`` and ``azimuth`` + in radians. + orientation : callable + Function returning a dict with ``slope`` and ``azimuth`` (in radians) + given ``(lon, lat, solar_position)``. Typically produced by + :func:`get_orientation`. + tracking : {None, 'horizontal', 'tilted_horizontal', 'vertical', 'dual'}, optional + Tracking type. ``None`` for fixed panels, ``'horizontal'`` for 1-axis + horizontal tracking, ``'tilted_horizontal'`` for 1-axis horizontal + tracking of a tilted panel, ``'vertical'`` for 1-axis vertical + tracking, or ``'dual'`` for 2-axis tracking. + + Returns + ------- + xarray.Dataset + Dataset with ``cosincidence``, ``slope``, and ``azimuth``. + + Raises + ------ + AssertionError + If ``tracking`` is not a recognized tracking type. + References ---------- [1] Sproul, A. B., Derivation of the solar geometric relationships using @@ -104,9 +255,9 @@ def SurfaceOrientation(ds, solar_position, orientation, tracking=None): lon = radians(ds["lon"]) lat = radians(ds["lat"]) - orientation = orientation(lon, lat, solar_position) - surface_slope = orientation["slope"] - surface_azimuth = orientation["azimuth"] + orientation_dict = orientation(lon, lat, solar_position) + surface_slope = orientation_dict["slope"] + surface_azimuth = orientation_dict["azimuth"] sun_altitude = solar_position["altitude"] sun_azimuth = solar_position["azimuth"] @@ -117,23 +268,21 @@ def SurfaceOrientation(ds, solar_position, orientation, tracking=None): ) + cos(surface_slope) * sin(sun_altitude) elif tracking == "horizontal": # horizontal tracking with horizontal axis - axis_azimuth = orientation[ - "azimuth" - ] # here orientation['azimuth'] refers to the azimuth of the tracker axis. + # orientation_dict['azimuth'] refers here to the azimuth of the tracker axis + axis_azimuth = orientation_dict["azimuth"] rotation = arctan( (cos(sun_altitude) / sin(sun_altitude)) * sin(sun_azimuth - axis_azimuth) ) surface_slope = abs(rotation) - surface_azimuth = axis_azimuth + arcsin(sin(rotation) / sin(surface_slope)) # the 2nd part yields +/-1 and determines if the panel is facing east or west + surface_azimuth = axis_azimuth + arcsin(sin(rotation) / sin(surface_slope)) cosincidence = cos(surface_slope) * sin(sun_altitude) + sin( surface_slope ) * cos(sun_altitude) * cos(sun_azimuth - surface_azimuth) - elif tracking == "tilted_horizontal": # horizontal tracking with tilted axis' - axis_tilt = orientation[ - "slope" - ] # here orientation['slope'] refers to the tilt of the tracker axis. + elif tracking == "tilted_horizontal": # horizontal tracking with tilted axis + # orientation_dict['slope'] refers here to the tilt of the tracker axis + axis_tilt = orientation_dict["slope"] rotation = arctan( (cos(sun_altitude) * sin(sun_azimuth - surface_azimuth)) @@ -146,10 +295,10 @@ def SurfaceOrientation(ds, solar_position, orientation, tracking=None): surface_slope = arccos(cos(rotation) * cos(axis_tilt)) azimuth_difference = sun_azimuth - surface_azimuth - azimuth_difference = np.where( + azimuth_difference = xr.where( azimuth_difference > pi, azimuth_difference - 2 * pi, azimuth_difference ) - azimuth_difference = np.where( + azimuth_difference = xr.where( azimuth_difference < -pi, 2 * pi + azimuth_difference, azimuth_difference ) rotation = np.where( @@ -168,29 +317,28 @@ def SurfaceOrientation(ds, solar_position, orientation, tracking=None): + cos(axis_tilt) * sin(sun_altitude) ) + sin(rotation) * cos(sun_altitude) * sin(sun_azimuth - surface_azimuth) - elif tracking == "vertical": # vertical tracking, surface azimuth = sun_azimuth + elif tracking == "vertical": cosincidence = sin(surface_slope) * cos(sun_altitude) + cos( surface_slope ) * sin(sun_altitude) - elif tracking == "dual": # both vertical and horizontal tracking + elif tracking == "dual": cosincidence = np.float64(1.0) else: - assert False, ( + msg = ( "Values describing tracking system must be None for no tracking," + "'horizontal' for 1-axis horizontal tracking," - + "tilted_horizontal' for 1-axis horizontal tracking of tilted panle," - + "vertical' for 1-axis vertical tracking, or 'dual' for 2-axis tracking" + + "'tilted_horizontal' for 1-axis horizontal tracking of tilted panel," + + "'vertical' for 1-axis vertical tracking, or 'dual' for 2-axis tracking" ) + raise AssertionError(msg) # fixup incidence angle: if the panel is badly oriented and the sun shines - # on the back of the panel (incidence angle > 90degree), the irradiation - # would be negative instead of 0; this is prevented here. + # on the back of the panel (incidence > 90 deg), the irradiation would be + # negative instead of 0; this is prevented here. cosincidence = cosincidence.clip(min=0) - return xr.Dataset( - { - "cosincidence": cosincidence, - "slope": surface_slope, - "azimuth": surface_azimuth, - } - ) + return xr.Dataset({ + "cosincidence": cosincidence, + "slope": surface_slope, + "azimuth": surface_azimuth, + }) diff --git a/atlite/pv/solar_panel_model.py b/atlite/pv/solar_panel_model.py index e713635d..97bd3cec 100644 --- a/atlite/pv/solar_panel_model.py +++ b/atlite/pv/solar_panel_model.py @@ -2,14 +2,26 @@ # # SPDX-License-Identifier: MIT +"""Solar panel electrical performance models. + +The Huld model was copied from gsee -- global solar energy estimator +by Stefan Pfenninger. +https://github.com/renewables-ninja/gsee/blob/master/gsee/pv.py +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Literal + import numpy as np -# Huld model was copied from gsee -- global solar energy estimator -# by Stefan Pfenninger -# https://github.com/renewables-ninja/gsee/blob/master/gsee/pv.py +if TYPE_CHECKING: + import xarray as xr -def _power_huld(irradiance, t_amb, pc): +def _power_huld( + irradiance: xr.DataArray, t_amb: xr.DataArray, pc: dict[str, Any] +) -> xr.DataArray: """ AC power per capacity predicted by Huld model, based on W/m2 irradiance. @@ -18,6 +30,11 @@ def _power_huld(irradiance, t_amb, pc): [1] Huld, T. et al., 2010. Mapping the performance of PV modules, effects of module type and data averaging. Solar Energy, 84(2), p.324-338. DOI: 10.1016/j.solener.2009.12.002 + + Returns + ------- + xr.DataArray + Specific generation in kWh/kWp. """ # normalized module temperature T_ = (pc["c_temp_amb"] * t_amb + pc["c_temp_irrad"] * irradiance) - pc["r_tmod"] @@ -39,21 +56,26 @@ def _power_huld(irradiance, t_amb, pc): da = G_ * eff * pc.get("inverter_efficiency", 1.0) da.attrs["units"] = "kWh/kWp" - da = da.rename("specific generation") + result: xr.DataArray = da.rename("specific generation") + return result - return da - -def _power_bofinger(irradiance, t_amb, pc): +def _power_bofinger( + irradiance: xr.DataArray, t_amb: xr.DataArray, pc: dict[str, Any] +) -> xr.DataArray: """ - AC power per capacity predicted by bofinger model, based on W/m2 - irradiance. + Predict AC power per capacity using the Bofinger model. Maximum power point tracking is assumed. [2] Hans Beyer, Gerd Heilscher and Stefan Bofinger, 2004. A robust model for the MPP performance of different types of PV-modules applied for the performance check of grid connected systems. + + Returns + ------- + xr.DataArray + Specific generation in kWh/kWp. """ fraction = (pc["NOCT"] - pc["Tamb"]) / pc["Intc"] @@ -71,15 +93,39 @@ def _power_bofinger(irradiance, t_amb, pc): capacity = (pc["A"] + pc["B"] * 1000.0 + pc["C"] * np.log(1000.0)) * 1e3 power = irradiance * eta * (pc.get("inverter_efficiency", 1.0) / capacity) power = power.where(irradiance >= pc["threshold"], 0) - return power.rename("AC power") + result: xr.DataArray = power.rename("AC power") + return result -def SolarPanelModel(ds, irradiance, pc): - model = pc.get("model", "huld") +def SolarPanelModel( + ds: xr.Dataset, irradiance: xr.DataArray, pc: dict[str, Any] +) -> xr.DataArray: + """ + Compute PV power output for the selected panel model. + + Parameters + ---------- + ds : xarray.Dataset + Dataset containing ambient temperature. + irradiance : xarray.xr.DataArray + Plane-of-array irradiation. + pc : dict + Panel configuration including the model parameters. + + Returns + ------- + xarray.xr.DataArray + Specific PV power output. + + Raises + ------ + AssertionError + If the panel model is unknown. + """ + model: Literal["huld", "bofinger"] = pc.get("model", "huld") if model == "huld": return _power_huld(irradiance, ds["temperature"], pc) - elif model == "bofinger": + if model == "bofinger": return _power_bofinger(irradiance, ds["temperature"], pc) - else: - AssertionError(f"Unknown panel model: {model}") + raise AssertionError(f"Unknown panel model: {model}") diff --git a/atlite/pv/solar_position.py b/atlite/pv/solar_position.py index 494cd433..5267739f 100644 --- a/atlite/pv/solar_position.py +++ b/atlite/pv/solar_position.py @@ -2,6 +2,10 @@ # # SPDX-License-Identifier: MIT +"""Solar position calculation utilities.""" + +from __future__ import annotations + from warnings import warn import pandas as pd @@ -10,7 +14,7 @@ from numpy import pi -def SolarPosition(ds, time_shift="0H"): +def SolarPosition(ds: xr.Dataset, time_shift: str | pd.Timedelta = "0H") -> xr.Dataset: """ Compute solar azimuth and altitude. @@ -27,9 +31,14 @@ def SolarPosition(ds, time_shift="0H"): instantenous data (e.g. SARAH). Must be parseable by pandas.to_timedelta(). Default: "0H" + Returns + ------- + xarray.Dataset + Dataset with ``altitude`` and ``azimuth`` in radians. + References ---------- - [1] Michalsky, J. J., The astronomical almanac’s algorithm for approximate + [1] Michalsky, J. J., The astronomical almanac's algorithm for approximate solar position (1950–2050), Solar Energy, 40(3), 227–235 (1988). [2] Sproul, A. B., Derivation of the solar geometric relationships using vector analysis, Renewable Energy, 32(7), 1187–1205 (2007). @@ -48,7 +57,6 @@ def SolarPosition(ds, time_shift="0H"): The unfortunately quite computationally intensive SPA algorithm [4,5] has been implemented using numba or plain numpy for a single location at https://github.com/pvlib/pvlib-python/blob/master/pvlib/spa.py. - """ # Act like a getter if these return variables are already in ds rvs = { @@ -60,10 +68,14 @@ def SolarPosition(ds, time_shift="0H"): return ds[rvs].rename({v: v.replace("solar_", "") for v in rvs}) warn( - """The calculation method and handling of solar position variables will change. - The solar position will in the future be a permanent variables of a cutout. - Recreate your cutout to remove this warning and permanently include the solar position variables into your cutout.""", + ( + "The calculation method and handling of solar position variables will " + "change. The solar position will in the future be a permanent variable of " + "a cutout. Recreate your cutout to remove this warning and permanently " + "include the solar position variables into your cutout." + ), DeprecationWarning, + stacklevel=2, ) # up to h and dec from [1] @@ -77,11 +89,10 @@ def SolarPosition(ds, time_shift="0H"): # Operations make new DataArray eager; reconvert to lazy dask arrays chunks = ds.chunksizes.get("time", "auto") - if isinstance(chunks, tuple): - chunks = chunks[0] - n = n.chunk(chunks) - hour = hour.chunk(chunks) - minute = minute.chunk(chunks) + chunk_size = chunks[0] if isinstance(chunks, tuple) else chunks + n = n.chunk(chunk_size) + hour = hour.chunk(chunk_size) + minute = minute.chunk(chunk_size) L = 280.460 + 0.9856474 * n # mean longitude (deg) g = radians(357.528 + 0.9856003 * n) # mean anomaly (rad) @@ -116,6 +127,4 @@ def SolarPosition(ds, time_shift="0H"): az.attrs["units"] = "rad" vars = {da.name: da for da in [alt, az]} - solar_position = xr.Dataset(vars) - - return solar_position + return xr.Dataset(vars) diff --git a/atlite/resource.py b/atlite/resource.py index 0cc0443a..a8258c56 100644 --- a/atlite/resource.py +++ b/atlite/resource.py @@ -1,10 +1,7 @@ # SPDX-FileCopyrightText: Contributors to atlite # # SPDX-License-Identifier: MIT -""" -Module for providing access to external ressources, like windturbine or pv -panel configurations. -""" +"""Module for accessing external resources like wind turbine and PV panel configurations.""" from __future__ import annotations @@ -13,7 +10,7 @@ import re from operator import itemgetter from pathlib import Path -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any, Literal, cast import numpy as np import pandas as pd @@ -26,7 +23,6 @@ logger = logging.getLogger(name=__name__) - RESOURCE_DIRECTORY = Path(__file__).parent / "resources" WINDTURBINE_DIRECTORY = RESOURCE_DIRECTORY / "windturbine" SOLARPANEL_DIRECTORY = RESOURCE_DIRECTORY / "solarpanel" @@ -35,20 +31,45 @@ if TYPE_CHECKING: from typing import TypedDict + import xarray as xr from typing_extensions import NotRequired + from atlite._types import NDArray, PathLike + class TurbineConfig(TypedDict): - V: np.ndarray - POW: np.ndarray + """Wind turbine configuration dictionary.""" + + V: NDArray + POW: NDArray P: float hub_height: float | int name: NotRequired[str] manufacturer: NotRequired[str] source: NotRequired[str] + class PanelConfig(TypedDict): + """Solar panel configuration dictionary.""" + + model: NotRequired[Literal["huld", "bofinger"]] + efficiency: NotRequired[float] + A: NotRequired[float] + B: NotRequired[float] + C: NotRequired[float] + name: NotRequired[str] + source: NotRequired[str] + + class CSPConfig(TypedDict): + """CSP installation configuration dictionary.""" + + efficiency: xr.DataArray + path: PathLike + technology: NotRequired[str] + name: NotRequired[str] + source: NotRequired[str] + def get_windturbineconfig( - turbine: str | Path | dict, add_cutout_windspeed: bool = True + turbine: str | PathLike | dict[str, Any], add_cutout_windspeed: bool = True ) -> TurbineConfig: """ Load the wind 'turbine' configuration. @@ -78,38 +99,47 @@ def get_windturbineconfig( config : dict Config with details on the turbine + Raises + ------ + KeyError + If ``turbine`` is not a str, Path, or dict. + """ - if not isinstance(turbine, (str | Path | dict)): + if not isinstance(turbine, (str, Path, dict)): raise KeyError( f"`turbine` must be a str, pathlib.Path or dict, but is {type(turbine)}." ) if isinstance(turbine, str) and turbine.startswith("oedb:"): - conf = get_oedb_windturbineconfig(turbine[len("oedb:") :]) + conf = cast( + "dict[str, Any]", get_oedb_windturbineconfig(turbine[len("oedb:") :]) + ) - elif isinstance(turbine, (str | Path)): + elif isinstance(turbine, (str, Path)): if isinstance(turbine, str): turbine_path = windturbines[turbine.replace(".yaml", "")] elif isinstance(turbine, Path): turbine_path = turbine - with open(turbine_path) as f: + with Path(turbine_path).open() as f: conf = yaml.safe_load(f) - conf = dict( - V=np.array(conf["V"]), - POW=np.array(conf["POW"]), - hub_height=conf["HUB_HEIGHT"], - P=np.max(conf["POW"]), - ) + conf = { + "V": np.array(conf["V"]), + "POW": np.array(conf["POW"]), + "hub_height": conf["HUB_HEIGHT"], + "P": np.max(conf["POW"]), + } elif isinstance(turbine, dict): conf = turbine - return _validate_turbine_config_dict(conf, add_cutout_windspeed) + return _validate_turbine_config_dict( + cast("dict[str, Any]", conf), add_cutout_windspeed + ) -def get_solarpanelconfig(panel): +def get_solarpanelconfig(panel: str | PathLike) -> PanelConfig: """ Load the 'panel'.yaml file from local disk and provide a solar panel dict. @@ -127,7 +157,7 @@ def get_solarpanelconfig(panel): Config with details on the solarpanel """ - assert isinstance(panel, (str | Path)) + assert isinstance(panel, (str, Path)) if isinstance(panel, str): panel_path = solarpanels[panel.replace(".yaml", "")] @@ -135,16 +165,13 @@ def get_solarpanelconfig(panel): elif isinstance(panel, Path): panel_path = panel - with open(panel_path) as f: - conf = yaml.safe_load(f) + with Path(panel_path).open() as f: + return cast("PanelConfig", yaml.safe_load(f)) - return conf - -def get_cspinstallationconfig(installation): +def get_cspinstallationconfig(installation: str | PathLike) -> CSPConfig: """ - Load the 'installation'.yaml file from local disk to provide the system - efficiencies. + Load a CSP installation configuration from a YAML file. Parameters ---------- @@ -160,7 +187,7 @@ def get_cspinstallationconfig(installation): Config with details on the CSP installation. """ - assert isinstance(installation, (str | Path)) + assert isinstance(installation, (str, Path)) if isinstance(installation, str): installation_path = cspinstallations[installation.replace(".yaml", "")] @@ -168,27 +195,20 @@ def get_cspinstallationconfig(installation): elif isinstance(installation, Path): installation_path = installation - # Load and set expected index columns - with open(installation_path) as f: - config = yaml.safe_load(f) + with Path(installation_path).open() as f: + config = cast("dict[str, Any]", yaml.safe_load(f)) config["path"] = installation_path - ## Convert efficiency dict to xr.DataArray and convert units to deg -> rad, % -> p.u. + # Convert efficiency dict to xr.DataArray and convert units: deg -> rad, % -> p.u. da = pd.DataFrame(config["efficiency"]).set_index(["altitude", "azimuth"]) - - # Handle as xarray DataArray early - da will be 'return'-ed da = da.to_xarray()["value"] - # Solar altitude + azimuth expected in deg for better readibility - # calculations use solar position in rad - # Convert da to new coordinates and drop old + # Solar altitude + azimuth expected in deg for readability; calculations use rad. da = da.rename({"azimuth": "azimuth [deg]", "altitude": "altitude [deg]"}) - da = da.assign_coords( - { - "altitude": radians(da["altitude [deg]"]), - "azimuth": radians(da["azimuth [deg]"]), - } - ) + da = da.assign_coords({ + "altitude": radians(da["altitude [deg]"]), + "azimuth": radians(da["azimuth [deg]"]), + }) da = da.swap_dims({"altitude [deg]": "altitude", "azimuth [deg]": "azimuth"}) da = da.chunk("auto") @@ -198,33 +218,67 @@ def get_cspinstallationconfig(installation): config["efficiency"] = da - return config + return cast("CSPConfig", config) -def solarpanel_rated_capacity_per_unit(panel): - # unit is m^2 here +def solarpanel_rated_capacity_per_unit(panel: str | PathLike | PanelConfig) -> float: + """ + Return the rated capacity per unit of a solar panel configuration. - if isinstance(panel, (str | Path)): + Parameters + ---------- + panel : str or pathlib.Path or dict + Solar panel configuration or reference to one. + + Returns + ------- + float + Rated capacity per unit area or per panel, depending on the model. + + Raises + ------ + ValueError + If the panel model is unknown. + """ + if isinstance(panel, (str, Path)): panel = get_solarpanelconfig(panel) model = panel.get("model", "huld") if model == "huld": - return panel["efficiency"] - elif model == "bofinger": + return cast("float", panel["efficiency"]) + if model == "bofinger": # one unit in the capacity layout is interpreted as one panel of a - # capacity (A + 1000 * B + log(1000) * C) * 1000W/m^2 * (k / 1000) + # capacity (A + 1000 * B + log(1000) * C) * 1000 W/m^2 * (k / 1000) A, B, C = itemgetter("A", "B", "C")(panel) - return (A + B * 1000.0 + C * np.log(1000.0)) * 1e3 + return cast("float", (A + B * 1000.0 + C * np.log(1000.0)) * 1e3) + raise ValueError(f"Unknown panel model: {model}") + +def windturbine_rated_capacity_per_unit( + turbine: str | PathLike | TurbineConfig, +) -> float: + """ + Return the rated capacity of a wind turbine configuration. -def windturbine_rated_capacity_per_unit(turbine): - if isinstance(turbine, (str | Path)): + Parameters + ---------- + turbine : str or pathlib.Path or dict + Wind turbine configuration or reference to one. + + Returns + ------- + float + Rated turbine capacity. + """ + if isinstance(turbine, (str, Path)): turbine = get_windturbineconfig(turbine) return turbine["P"] -def windturbine_smooth(turbine, params=None): +def windturbine_smooth( + turbine: TurbineConfig, params: dict[str, float] | None | bool = None +) -> TurbineConfig: """ Smooth the powercurve in `turbine` with a gaussian kernel. @@ -252,27 +306,27 @@ def windturbine_smooth(turbine, params=None): if params is None or params is True: params = {} - eta = params.get("eta", 0.95) - Delta_v = params.get("Delta_v", 1.27) - sigma = params.get("sigma", 2.29) + params = cast("dict[str, float]", params) + eta: float = params.get("eta", 0.95) + Delta_v: float = params.get("Delta_v", 1.27) + sigma: float = params.get("sigma", 2.29) - def kernel(v_0): - # all velocities in m/s - return ( + def kernel(v_0: NDArray) -> NDArray: + result: NDArray = ( 1.0 / np.sqrt(2 * np.pi * sigma * sigma) * np.exp(-(v_0 - Delta_v) * (v_0 - Delta_v) / (2 * sigma * sigma)) ) + return result - def smooth(velocities, power): + def smooth(velocities: NDArray, power: NDArray) -> tuple[NDArray, NDArray]: # interpolate kernel and power curve to the same, regular velocity grid velocities_reg = np.linspace(-50.0, 50.0, 1001) power_reg = np.interp(velocities_reg, velocities, power) kernel_reg = kernel(velocities_reg) - # convolve power and kernel - # the downscaling is necessary because scipy expects the velocity - # increments to be 1., but here, they are 0.1 + # the 0.1 downscaling is necessary because scipy expects velocity + # increments of 1., but here they are 0.1 convolution = 0.1 * fftconvolve(power_reg, kernel_reg, mode="same") # sample down so power curve doesn't get too long @@ -283,7 +337,7 @@ def smooth(velocities, power): turbine = turbine.copy() turbine["V"], turbine["POW"] = smooth(turbine["V"], turbine["POW"]) - turbine["P"] = np.max(turbine["POW"]) + turbine["P"] = cast("float", float(np.max(turbine["POW"]))) if any(turbine["POW"][np.where(turbine["V"] == 0.0)] > 1e-2): logger.warning( @@ -297,15 +351,17 @@ def smooth(velocities, power): return turbine -def _max_v_is_zero_pow(turbine): - return np.any(turbine["POW"][turbine["V"] == turbine["V"].max()] == 0) +def _max_v_is_zero_pow(turbine: TurbineConfig) -> bool: + return cast( + "bool", bool(np.any(turbine["POW"][turbine["V"] == turbine["V"].max()] == 0)) + ) def _validate_turbine_config_dict( - turbine: dict, add_cutout_windspeed: bool + turbine: dict[str, Any], add_cutout_windspeed: bool ) -> TurbineConfig: """ - Checks the turbine config dict format and power curve. + Check the turbine config dict format and power curve. Parameters ---------- @@ -322,6 +378,11 @@ def _validate_turbine_config_dict( dict validated and potentially modified turbine config dict + Raises + ------ + ValueError + If the turbine config dict is missing required keys or has invalid values. + """ if not all(key in turbine for key in ("POW", "V", "P", "hub_height")): err_msg = ( @@ -330,11 +391,10 @@ def _validate_turbine_config_dict( ) raise ValueError(err_msg) - if not all(isinstance(turbine[p], (np.ndarray | list)) for p in ("POW", "V")): + if not all(isinstance(turbine[p], (np.ndarray, list)) for p in ("POW", "V")): err_msg = "turbine entries 'POW' and 'V' must be np.ndarray or list" raise ValueError(err_msg) - # convert lists from user provided turbine dicts to numpy arrays if any(isinstance(turbine[p], list) for p in ("POW", "V")): turbine["V"] = np.array(turbine["V"]) turbine["POW"] = np.array(turbine["POW"]) @@ -343,37 +403,37 @@ def _validate_turbine_config_dict( err_msg = "turbine wind speed and power arrays do not have equal length." raise ValueError(err_msg) + # Uses `>=` rather than `>` because many power curves have two entries for the + # same wind speed at the cut-in and cut-out speeds. if not np.all(np.diff(turbine["V"]) >= 0): - # This check is not strict as it uses `>=` instead of `>` and thus allows equal - # wind speeds in the array. However, many power curves have two entries for the - # same wind speed at the cut-in and cut-out speeds which would make them fail if - # using `>` only. err_msg = ( "wind speed 'V' in the turbine config dict is expected to be increasing, " f"but is currently not in ascending order:\n{turbine['V']}" ) raise ValueError(err_msg) - if add_cutout_windspeed is True and not _max_v_is_zero_pow(turbine): + if add_cutout_windspeed is True and not _max_v_is_zero_pow( + cast("TurbineConfig", turbine) + ): turbine["V"] = np.pad(turbine["V"], (0, 1), "maximum") turbine["POW"] = np.pad(turbine["POW"], (0, 1), "constant", constant_values=0) logger.info( - "adding a cut-out wind speed to the turbine power curve at " - f"V={turbine['V'][-1]} m/s." + "adding a cut-out wind speed to the turbine power curve at V=%s m/s.", + turbine["V"][-1], ) - if not _max_v_is_zero_pow(turbine): + if not _max_v_is_zero_pow(cast("TurbineConfig", turbine)): logger.warning( "The power curve does not have a cut-out wind speed, i.e. the power" " output corresponding to the\nhighest wind speed is not zero. You can" " either change the power curve manually or set\n" "'add_cutout_windspeed=True' in the Cutout.wind conversion method." ) - return turbine + return cast("TurbineConfig", turbine) def get_oedb_windturbineconfig( - search: int | str | None = None, **search_params + search: int | str | None = None, **search_params: Any ) -> TurbineConfig: """ Download a windturbine configuration from the OEDB database. @@ -406,6 +466,10 @@ def get_oedb_windturbineconfig( >>> get_oedb_windturbineconfig(name="E-53/800", manufacturer="Enercon") {'V': ..., 'POW': ..., ...} + Raises + ------ + RuntimeError + If no turbine or multiple turbines match the search. """ # Parse information of different allowed 'turbine' values if isinstance(search, int): @@ -428,9 +492,8 @@ def get_oedb_windturbineconfig( _oedb_turbines = df[df.has_power_curve] logger.info( - "Searching turbine power curve in OEDB database using " - + ", ".join(f"{k}='{v}'" for (k, v) in search_params.items()) - + "." + "Searching turbine power curve in OEDB database using %s.", + ", ".join(f"{k}='{v}'" for (k, v) in search_params.items()), ) # Working copy @@ -455,7 +518,7 @@ def get_oedb_windturbineconfig( if len(df) < 1: raise RuntimeError("No turbine found.") - elif len(df) > 1: + if len(df) > 1: raise RuntimeError( f"Provided information corresponds to {len(df)} turbines," " use `id` for an unambiguous search.\n" @@ -506,13 +569,13 @@ def get_oedb_windturbineconfig( name = "{manufacturer}_{name}".format(**turbineconf).translate(charmap) windturbines[name] = turbineconf - return turbineconf + return turbineconf # type: ignore[return-value] # Global caches _oedb_turbines = None windturbines = arrowdict({p.stem: p for p in WINDTURBINE_DIRECTORY.glob("*.yaml")}) solarpanels = arrowdict({p.stem: p for p in SOLARPANEL_DIRECTORY.glob("*.yaml")}) -cspinstallations = arrowdict( - {p.stem: p for p in CSPINSTALLATION_DIRECTORY.glob("*.yaml")} -) +cspinstallations = arrowdict({ + p.stem: p for p in CSPINSTALLATION_DIRECTORY.glob("*.yaml") +}) diff --git a/atlite/utils.py b/atlite/utils.py index 564a7921..8809eb89 100644 --- a/atlite/utils.py +++ b/atlite/utils.py @@ -1,14 +1,15 @@ # SPDX-FileCopyrightText: Contributors to atlite # # SPDX-License-Identifier: MIT -""" -General utility functions for internal use. -""" +"""General utility functions for internal use.""" + +from __future__ import annotations import logging import re import textwrap from pathlib import Path +from typing import TYPE_CHECKING, Any, TypeAlias, cast import pandas as pd import xarray as xr @@ -16,12 +17,38 @@ from atlite.datasets import modules as datamodules from atlite.gis import maybe_swap_spatial_dims +if TYPE_CHECKING: + from collections.abc import Callable + + pass + logger = logging.getLogger(__name__) +PathLike: TypeAlias = str | Path +NDArray: TypeAlias = Any +DataArray: TypeAlias = xr.DataArray +Dataset: TypeAlias = xr.Dataset + def ensure_coords(index: pd.Index | xr.Coordinates) -> xr.Coordinates: """ - Convert an index or multiindex to coordinates + Convert an index or multiindex to coordinates. + + Parameters + ---------- + index : pd.Index or xr.Coordinates + The index or multiindex to convert. + + Returns + ------- + xr.Coordinates + The converted coordinates. + + Raises + ------ + ValueError + If the index is not a pandas index or xarray coordinates. + """ if isinstance(index, pd.MultiIndex): coords = xr.Coordinates.from_pandas_multiindex(index, index.name or "dim_0") @@ -36,9 +63,27 @@ def ensure_coords(index: pd.Index | xr.Coordinates) -> xr.Coordinates: return coords -def migrate_from_cutout_directory(old_cutout_dir, path): +def migrate_from_cutout_directory(old_cutout_dir: PathLike, path: PathLike) -> Dataset: """ - Convert an old style cutout directory to new style netcdf file. + Convert an old-style cutout directory to a new-style netCDF file. + + Parameters + ---------- + old_cutout_dir : str or Path + Path to the legacy cutout directory containing ``meta.nc``. + path : str or Path + Output path for the migrated ``.nc`` file. + + Returns + ------- + xr.Dataset + The migrated cutout data. + + Raises + ------ + MergeError + If automatic migration of multi-file datasets fails. + """ old_cutout_dir = Path(old_cutout_dir) with xr.open_dataset(old_cutout_dir / "meta.nc") as meta: @@ -78,7 +123,7 @@ def migrate_from_cutout_directory(old_cutout_dir, path): ) raise - data = maybe_swap_spatial_dims(data) + data = cast("Dataset", maybe_swap_spatial_dims(data)) module = data.attrs["module"] data.attrs["prepared_features"] = list(datamodules[module].features) for v in data: @@ -89,34 +134,48 @@ def migrate_from_cutout_directory(old_cutout_dir, path): path = Path(path).with_suffix(".nc") logger.info( - f"Writing cutout data to {path}. When done, load it again using" - f"\n\n\tatlite.Cutout('{path}')" + "Writing cutout data to %s. When done, load it again using" + "\n\n\tatlite.Cutout('%s')", + path, + path, ) data.to_netcdf(path) return data -def timeindex_from_slice(timeslice): +def timeindex_from_slice(timeslice: Any) -> pd.DatetimeIndex: + """ + Create an hourly DatetimeIndex from a slice with start/end month strings. + + Parameters + ---------- + timeslice : slice + Slice with start and end as month strings (e.g. ``"2013-01"``). + + Returns + ------- + pd.DatetimeIndex + Hourly index spanning the given months. + """ end = pd.Timestamp(timeslice.end) + pd.offsets.DateOffset(months=1) return pd.date_range(timeslice.start, end, freq="1h", closed="left") -class arrowdict(dict): - """ - A subclass of dict, which allows you to get items in the dict using the - attribute syntax! - """ +class arrowdict(dict[str, Any]): + """Dict subclass enabling attribute-style access to items.""" - def __getattr__(self, item): + def __getattr__(self, item: str) -> Any: + """Retrieve a dict value as an attribute, raising AttributeError on missing keys.""" # noqa: DOC201, DOC501 try: return self.__getitem__(item) except KeyError as e: - raise AttributeError(e.args[0]) + raise AttributeError(e.args[0]) from e _re_pattern = re.compile("[a-zA-Z_][a-zA-Z0-9_]*") - def __dir__(self): - dict_keys = [] + def __dir__(self) -> list[str]: + """List keys that are valid Python identifiers for tab-completion.""" # noqa: DOC201 + dict_keys: list[str] = [] for k in self.keys(): if isinstance(k, str): m = self._re_pattern.match(k) @@ -127,29 +186,41 @@ def __dir__(self): class CachedAttribute: """ - Computes attribute value and caches it in the instance. + Descriptor that computes an attribute value once and caches it on the instance. - From the Python Cookbook (Denis Otkidach) This decorator allows you - to create a property which can be computed once and accessed many - times. Sort of like memoization. + Based on the Python Cookbook recipe by Denis Otkidach. """ - # For python 3.8 >= use functoolts.cached_property instead. + method: Callable[[Any], Any] + name: str + __doc__: str | None - def __init__(self, method, name=None, doc=None): - # record the unbound-method and the name + def __init__( + self, + method: Callable[[Any], Any], + name: str | None = None, + doc: str | None = None, + ) -> None: + """ + Initialize the cached attribute descriptor. + + Parameters + ---------- + method : callable + Method whose return value will be cached. + name : str, optional + Attribute name for caching. Defaults to ``method.__name__``. + doc : str, optional + Docstring override. Defaults to ``method.__doc__``. + """ self.method = method self.name = name or method.__name__ self.__doc__ = doc or method.__doc__ - def __get__(self, inst, cls): + def __get__(self, inst: Any, cls: type[Any] | None) -> Any: + """Compute on first access, cache the result, and return it.""" # noqa: DOC201 if inst is None: - # instance attribute accessed on class, return self - # You get here if you write `Foo.bar` return self - # compute, cache and return the instance's attribute value result = self.method(inst) - # setattr redefines the instance's attribute so this doesn't get called - # again setattr(inst, self.name, result) return result diff --git a/atlite/wind.py b/atlite/wind.py index 92dd1f19..a7b28645 100644 --- a/atlite/wind.py +++ b/atlite/wind.py @@ -1,24 +1,22 @@ # SPDX-FileCopyrightText: Contributors to atlite # # SPDX-License-Identifier: MIT -""" -Functions for use in conjunction with wind data generation. -""" +"""Functions for use in conjunction with wind data generation.""" from __future__ import annotations import logging import re -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Literal, cast import numpy as np -import xarray as xr -logger = logging.getLogger(__name__) +if TYPE_CHECKING: + import xarray as xr + from atlite._types import NDArray -if TYPE_CHECKING: - from typing import Literal +logger = logging.getLogger(__name__) def extrapolate_wind_speed( @@ -59,8 +57,12 @@ def extrapolate_wind_speed( Raises ------ + AssertionError + If no wind speed variables are found in the dataset. RuntimeError - If the cutout is missing the data for the chosen `method` + If the cutout is missing the data for the chosen `method`. + ValueError + If ``method`` is not ``'logarithmic'`` or ``'power'``. References ---------- @@ -72,42 +74,42 @@ def extrapolate_wind_speed( Wind Resource Assessment: A Comparison against Tall Towers' https://doi.org/10.3390/en14144169 . """ - # Fast lane - to_name = f"wnd{int(to_height):0d}m" + to_name: str = f"wnd{int(to_height):0d}m" if to_name in ds: return ds[to_name] if from_height is None: - # Determine closest height to to_name - heights = np.asarray([int(s[3:-1]) for s in ds if re.match(r"wnd\d+m", s)]) + heights: NDArray = np.asarray([ + int(str(s)[3:-1]) for s in ds if re.match(r"wnd\d+m", str(s)) + ]) if len(heights) == 0: raise AssertionError("Wind speed is not in dataset") - from_height = heights[np.argmin(np.abs(heights - to_height))] + from_height = int(heights[np.argmin(np.abs(heights - to_height))]) - from_name = f"wnd{int(from_height):0d}m" + from_name: str = f"wnd{int(from_height):0d}m" if method == "logarithmic": try: - roughness = ds["roughness"] + roughness: xr.DataArray = ds["roughness"] except KeyError: raise RuntimeError( "The logarithmic interpolation method requires surface roughness (roughness);\n" "make sure you choose a compatible dataset like ERA5" - ) - wnd_spd = ds[from_name] * ( + ) from None + wnd_spd: xr.DataArray = ds[from_name] * ( np.log(to_height / roughness) / np.log(from_height / roughness) ) - method_desc = "logarithmic method with roughness" + method_desc: str = "logarithmic method with roughness" elif method == "power": try: - wnd_shear_exp = ds["wnd_shear_exp"] + wnd_shear_exp: xr.DataArray = ds["wnd_shear_exp"] except KeyError: raise RuntimeError( "The power law interpolation method requires a wind shear exponent (wnd_shear_exp);\n" "make sure you choose a compatible dataset like ERA5 and update your cutout" - ) + ) from None wnd_spd = ds[from_name] * (to_height / from_height) ** wnd_shear_exp method_desc = "power method with wind shear exponent" else: @@ -115,14 +117,12 @@ def extrapolate_wind_speed( f"Interpolation method must be 'logarithmic' or 'power', but is: {method}" ) - wnd_spd.attrs.update( - { - "long name": ( - f"extrapolated {to_height} m wind speed using {method_desc} " - f" and {from_height} m wind speed" - ), - "units": "m s**-1", - } - ) - - return wnd_spd.rename(to_name) + wnd_spd.attrs.update({ + "long name": ( + f"extrapolated {to_height} m wind speed using {method_desc} " + f" and {from_height} m wind speed" + ), + "units": "m s**-1", + }) + + return cast("xr.DataArray", wnd_spd.rename(to_name)) diff --git a/doc/chart.py b/doc/chart.py index e03a0e0b..4d08befe 100755 --- a/doc/chart.py +++ b/doc/chart.py @@ -7,6 +7,8 @@ This is a temporary script file. """ +from typing import Any + import matplotlib.pyplot as plt fig, ax = plt.subplots(figsize=(12, 5)) @@ -37,16 +39,20 @@ processedstr = "\n" + "\n\n\n".join([" ◦ " + s for s in processeddata]) + "\n" # defaults for boxes and arrows -kwargs = dict(verticalalignment="center", fontsize=14, color="#545454") -arrowkwargs = dict( - head_width=0.2, - width=0.13, - head_length=0.05, - edgecolor="white", - length_includes_head=True, - color="lightgray", - alpha=1, -) +kwargs: dict[str, Any] = { + "verticalalignment": "center", + "fontsize": 14, + "color": "#545454", +} +arrowkwargs = { + "head_width": 0.2, + "width": 0.13, + "head_length": 0.05, + "edgecolor": "white", + "length_includes_head": True, + "color": "lightgray", + "alpha": 1, +} y = 0.5 # First arrow @@ -61,7 +67,12 @@ y, climatestr, **kwargs, - bbox=dict(facecolor="indianred", alpha=0.5, edgecolor="None", boxstyle="round"), + bbox={ + "facecolor": "indianred", + "alpha": 0.5, + "edgecolor": "None", + "boxstyle": "round", + }, ) # Second arrow @@ -74,7 +85,12 @@ y, processedstr, **kwargs, - bbox=dict(facecolor="olivedrab", alpha=0.5, edgecolor="None", boxstyle="round"), + bbox={ + "facecolor": "olivedrab", + "alpha": 0.5, + "edgecolor": "None", + "boxstyle": "round", + }, ) diff --git a/doc/conf.py b/doc/conf.py index 64119206..15e6e9d2 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -2,6 +2,8 @@ # # SPDX-License-Identifier: MIT +"""Sphinx configuration for atlite documentation.""" + # # atlite documentation build configuration file, created by # sphinx-quickstart on Tue Jan 5 10:04:42 2016. @@ -15,7 +17,6 @@ # All configuration values have a default; values that are commented out # serve to show the default. - from importlib.metadata import version as get_version # If extensions (or modules to document with autodoc) are in another directory, @@ -33,7 +34,7 @@ # ones. extensions = [ "sphinx.ext.autodoc", - #'sphinx.ext.autosummary', + # 'sphinx.ext.autosummary', "sphinx.ext.intersphinx", "sphinx.ext.todo", "sphinx.ext.mathjax", @@ -41,8 +42,8 @@ "nbsphinx", "nbsphinx_link", # 'sphinx.ext.pngmath', - #'sphinxcontrib.tikz', - #'rinoh.frontend.sphinx', + # 'sphinxcontrib.tikz', + # 'rinoh.frontend.sphinx', "sphinx.ext.imgconverter", # for SVG conversion ] @@ -253,15 +254,15 @@ # -- Options for LaTeX output --------------------------------------------- -latex_elements = { +latex_elements: dict[str, str] = { # The paper size ('letterpaper' or 'a4paper'). - #'papersize': 'letterpaper', + # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). - #'pointsize': '10pt', + # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. - #'preamble': '', + # 'preamble': '', # Latex figure (float) alignment - #'figure_align': 'htbp', + # 'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples diff --git a/examples/building_stock_weather_aggregation.ipynb b/examples/building_stock_weather_aggregation.ipynb index fafc2373..727847c5 100644 --- a/examples/building_stock_weather_aggregation.ipynb +++ b/examples/building_stock_weather_aggregation.ipynb @@ -307,7 +307,8 @@ "# Clip the raster data and reproject the result back into EPSG:4326 to match the cutout,\n", "# also remove some unnecessary dimensions via `squeeze()`\n", "layout = (\n", - " population_density.rio.clip(\n", + " population_density.rio\n", + " .clip(\n", " finland_3035.geometry, from_disk=True\n", " ) # Clip the population density raster data with the reprojected Finland shape.\n", " .rio.reproject( # Reproject and resample the population density raster to match the cutout.\n", @@ -445,7 +446,8 @@ "dirs = {\"north\": 0.0, \"east\": 90.0, \"south\": 180.0, \"west\": 270.0}\n", "for name, lout in layouts.items():\n", " irr_total[name] = {\n", - " d: cutout.irradiation(\n", + " d: cutout\n", + " .irradiation(\n", " orientation={\"slope\": 90.0, \"azimuth\": az}, layout=lout.fillna(0.0)\n", " )\n", " .squeeze()\n", @@ -453,7 +455,8 @@ " for d, az in dirs.items()\n", " }\n", " irr_direct[name] = {\n", - " d: cutout.irradiation(\n", + " d: cutout\n", + " .irradiation(\n", " orientation={\"slope\": 90.0, \"azimuth\": az},\n", " layout=lout.fillna(0.0),\n", " irradiation=\"direct\",\n", @@ -463,7 +466,8 @@ " for d, az in dirs.items()\n", " }\n", " irr_diffuse[name] = {\n", - " d: cutout.irradiation(\n", + " d: cutout\n", + " .irradiation(\n", " orientation={\"slope\": 90.0, \"azimuth\": az},\n", " layout=lout.fillna(0.0),\n", " irradiation=\"diffuse\",\n", diff --git a/examples/historic-comparison-germany.ipynb b/examples/historic-comparison-germany.ipynb index dda326ef..64dbe871 100644 --- a/examples/historic-comparison-germany.ipynb +++ b/examples/historic-comparison-germany.ipynb @@ -67,17 +67,17 @@ "metadata": {}, "outputs": [], "source": [ - "import os\n", "import zipfile\n", + "from pathlib import Path\n", "\n", "import requests\n", "\n", "\n", "def download_file(url, local_filename):\n", " # variant of http://stackoverflow.com/a/16696317\n", - " if not os.path.exists(local_filename):\n", + " if not Path(local_filename).exists():\n", " r = requests.get(url, stream=True)\n", - " with open(local_filename, \"wb\") as f:\n", + " with Path(local_filename).open(\"wb\") as f:\n", " for chunk in r.iter_content(chunk_size=1024):\n", " if chunk:\n", " f.write(chunk)\n", @@ -121,7 +121,7 @@ "opsd.index = opsd.index.tz_convert(None)\n", "\n", "# We are only interested in the 2012 data\n", - "opsd = opsd[(\"2011\" < opsd.index) & (opsd.index < \"2013\")]" + "opsd = opsd[(opsd.index > \"2011\") & (opsd.index < \"2013\")]" ] }, { @@ -261,31 +261,33 @@ "\n", " Parameters\n", " ----------\n", - " typ : str\n", - " Type of energy source, e.g. \"Solarstrom\" (PV), \"Windenergie\" (wind).\n", - " cap_range : (optional) list-like\n", - " Two entries, limiting the lower and upper range of capacities (in kW)\n", - " to include. Left-inclusive, right-exclusive.\n", - " until : str\n", - " String representation of a datetime object understood by pandas.to_datetime()\n", - " for limiting to installations existing until this datetime.\n", + " typ : str\n", + " Type of energy source, e.g. \"Solarstrom\" (PV), \"Windenergie\" (wind).\n", + " cap_range : (optional) list-like\n", + " Two entries, limiting the lower and upper range of capacities (in kW)\n", + " to include. Left-inclusive, right-exclusive.\n", + " until : str\n", + " String representation of a datetime object understood by pandas.to_datetime()\n", + " for limiting to installations existing until this datetime.\n", "\n", - " \"\"\"\n", + " Returns\n", + " -------\n", + " pandas.DataFrame\n", + " Filtered capacities.\n", "\n", + " \"\"\"\n", " # Load locations of installed capacities and remove incomplete entries\n", - " cols = OrderedDict(\n", - " (\n", - " (\"installation_date\", 0),\n", - " (\"plz\", 2),\n", - " (\"city\", 3),\n", - " (\"type\", 6),\n", - " (\"capacity\", 8),\n", - " (\"level\", 9),\n", - " (\"lat\", 19),\n", - " (\"lon\", 20),\n", - " (\"validation\", 22),\n", - " )\n", - " )\n", + " cols = OrderedDict((\n", + " (\"installation_date\", 0),\n", + " (\"plz\", 2),\n", + " (\"city\", 3),\n", + " (\"type\", 6),\n", + " (\"capacity\", 8),\n", + " (\"level\", 9),\n", + " (\"lat\", 19),\n", + " (\"lon\", 20),\n", + " (\"validation\", 22),\n", + " ))\n", " database = pd.read_csv(\n", " \"eeg_anlagenregister_2015.08.utf8.csv\",\n", " sep=\";\",\n", @@ -455,9 +457,10 @@ ], "source": [ "compare = (\n", - " pd.DataFrame(\n", - " dict(atlite=pv.squeeze().to_series(), opsd=opsd[\"DE_solar_generation_actual\"])\n", - " )\n", + " pd.DataFrame({\n", + " \"atlite\": pv.squeeze().to_series(),\n", + " \"opsd\": opsd[\"DE_solar_generation_actual\"],\n", + " })\n", " / 1e3\n", ") # in GW\n", "compare.resample(\"1W\").mean().plot(figsize=(8, 5))\n", @@ -492,11 +495,10 @@ "source": [ "pv_opt = cutout.pv(panel=\"CSi\", orientation=\"latitude_optimal\", layout=solar_layout)\n", "compare_opt = (\n", - " pd.DataFrame(\n", - " dict(\n", - " atlite=pv_opt.squeeze().to_series(), opsd=opsd[\"DE_solar_generation_actual\"]\n", - " )\n", - " )\n", + " pd.DataFrame({\n", + " \"atlite\": pv_opt.squeeze().to_series(),\n", + " \"opsd\": opsd[\"DE_solar_generation_actual\"],\n", + " })\n", " / 1e3\n", ") # in GW\n", "compare_opt.resample(\"1W\").mean().plot(figsize=(8, 5))\n", @@ -685,14 +687,14 @@ "outputs": [], "source": [ "turbine_categories = [\n", - " dict(name=\"Vestas_V25_200kW\", up=400.0),\n", - " dict(name=\"Vestas_V47_660kW\", up=700.0),\n", - " dict(name=\"Bonus_B1000_1000kW\", up=1100.0),\n", - " dict(name=\"Suzlon_S82_1.5_MW\", up=1600.0),\n", - " dict(name=\"Vestas_V66_1750kW\", up=1900.0),\n", - " dict(name=\"Vestas_V80_2MW_gridstreamer\", up=2200.0),\n", - " dict(name=\"Siemens_SWT_2300kW\", up=2500.0),\n", - " dict(name=\"Vestas_V90_3MW\", up=50000.0),\n", + " {\"name\": \"Vestas_V25_200kW\", \"up\": 400.0},\n", + " {\"name\": \"Vestas_V47_660kW\", \"up\": 700.0},\n", + " {\"name\": \"Bonus_B1000_1000kW\", \"up\": 1100.0},\n", + " {\"name\": \"Suzlon_S82_1.5_MW\", \"up\": 1600.0},\n", + " {\"name\": \"Vestas_V66_1750kW\", \"up\": 1900.0},\n", + " {\"name\": \"Vestas_V80_2MW_gridstreamer\", \"up\": 2200.0},\n", + " {\"name\": \"Siemens_SWT_2300kW\", \"up\": 2500.0},\n", + " {\"name\": \"Vestas_V90_3MW\", \"up\": 50000.0},\n", "]" ] }, @@ -1235,13 +1237,11 @@ }, "outputs": [], "source": [ - "compare = pd.DataFrame(\n", - " {\n", - " \"atlite\": wind[\"total\"].squeeze().to_series(),\n", - " \"< 1600 kW\": wind[\"< 1600.0 kW\"].squeeze().to_series(),\n", - " \"opsd\": opsd[\"DE_wind_generation_actual\"],\n", - " }\n", - ")\n", + "compare = pd.DataFrame({\n", + " \"atlite\": wind[\"total\"].squeeze().to_series(),\n", + " \"< 1600 kW\": wind[\"< 1600.0 kW\"].squeeze().to_series(),\n", + " \"opsd\": opsd[\"DE_wind_generation_actual\"],\n", + "})\n", "\n", "compare = compare / 1e3 # in GW" ] @@ -1399,7 +1399,8 @@ " filter(lambda r: r.attributes[\"iso_3166_2\"].startswith(\"DE\"), shp.records())\n", ")\n", "laender = (\n", - " gpd.GeoDataFrame([{**r.attributes, \"geometry\": r.geometry} for r in de_records])\n", + " gpd\n", + " .GeoDataFrame([{**r.attributes, \"geometry\": r.geometry} for r in de_records])\n", " .rename(columns={\"iso_3166_2\": \"state\"})\n", " .set_index(\"state\")\n", " .set_crs(4236)\n", diff --git a/examples/plotting_with_atlite.ipynb b/examples/plotting_with_atlite.ipynb index eba004ac..51843928 100644 --- a/examples/plotting_with_atlite.ipynb +++ b/examples/plotting_with_atlite.ipynb @@ -263,14 +263,14 @@ "gs = GridSpec(3, 3, figure=fig)\n", "\n", "ax = fig.add_subplot(gs[:, 0:2], projection=projection)\n", - "plot_grid_dict = dict(\n", - " alpha=0.1,\n", - " edgecolor=\"k\",\n", - " zorder=4,\n", - " aspect=\"equal\",\n", - " facecolor=\"None\",\n", - " transform=plate(),\n", - ")\n", + "plot_grid_dict = {\n", + " \"alpha\": 0.1,\n", + " \"edgecolor\": \"k\",\n", + " \"zorder\": 4,\n", + " \"aspect\": \"equal\",\n", + " \"facecolor\": \"None\",\n", + " \"transform\": plate(),\n", + "}\n", "UkIr.plot(ax=ax, zorder=1, transform=plate())\n", "cells.plot(ax=ax, **plot_grid_dict)\n", "country_bound.plot(ax=ax, edgecolor=\"orange\", facecolor=\"None\", transform=plate())\n", @@ -401,7 +401,8 @@ "cells_generation = sites.merge(cells, how=\"inner\").rename(pd.Series(sites.index))\n", "\n", "layout = (\n", - " xr.DataArray(cells_generation.set_index([\"y\", \"x\"]).capacity.unstack())\n", + " xr\n", + " .DataArray(cells_generation.set_index([\"y\", \"x\"]).capacity.unstack())\n", " .reindex_like(cap_factors)\n", " .rename(\"Installed Capacity [MW]\")\n", ")\n", @@ -519,7 +520,8 @@ " spine.set_edgecolor(\"white\")\n", "\n", "power_generation = (\n", - " cutout.wind(\"Vestas_V112_3MW\", layout=layout.fillna(0), shapes=UkIr)\n", + " cutout\n", + " .wind(\"Vestas_V112_3MW\", layout=layout.fillna(0), shapes=UkIr)\n", " .to_pandas()\n", " .rename_axis(index=\"\", columns=\"shapes\")\n", ")\n", diff --git a/examples/solarpv_tracking_options.ipynb b/examples/solarpv_tracking_options.ipynb index e31d345e..50a49205 100644 --- a/examples/solarpv_tracking_options.ipynb +++ b/examples/solarpv_tracking_options.ipynb @@ -408,7 +408,9 @@ "source": [ "day_profiles = [ds.loc[day, point].squeeze() for ds in data]\n", "\n", - "df = pd.DataFrame({k: v.to_series() for k, v in zip(labels, day_profiles)})\n", + "df = pd.DataFrame({\n", + " k: v.to_series() for k, v in zip(labels, day_profiles, strict=False)\n", + "})\n", "df.plot(figsize=(10, 5))\n", "plt.title(\"PV Tracking: Portugal @(-9°, 40°), May 1, 2019\")" ] @@ -452,7 +454,7 @@ ], "source": [ "average = [ds.mean(\"dim_0\").mean().item() for ds in data]\n", - "df = pd.Series({k: v for k, v in zip(labels, average)})\n", + "df = pd.Series(dict(zip(labels, average, strict=False)))\n", "df.mul(100).plot.barh(figsize=(10, 5), zorder=2)\n", "plt.grid(axis=\"x\", zorder=1)\n", "plt.title(\"PV Tracking: Average Capacity Factor per Cell [%]\")" diff --git a/examples/working-with-csp.ipynb b/examples/working-with-csp.ipynb index ca9eaf72..301af585 100644 --- a/examples/working-with-csp.ipynb +++ b/examples/working-with-csp.ipynb @@ -27,6 +27,8 @@ "metadata": {}, "outputs": [], "source": [ + "from pathlib import Path\n", + "\n", "import geopandas as gpd\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", @@ -521,7 +523,7 @@ "}\n", "\n", "layout = xr.zeros_like(cf)\n", - "layout.loc[dict(x=nearest_location[\"x\"], y=nearest_location[\"y\"])] = installed_power" + "layout.loc[{\"x\": nearest_location[\"x\"], \"y\": nearest_location[\"y\"]}] = installed_power" ] }, { @@ -586,18 +588,14 @@ ], "source": [ "# Calculate time-series for layout with both installation configurations\n", - "time_series = xr.merge(\n", - " [\n", - " cutout.csp(\n", - " installation=\"lossless_installation\",\n", - " technology=\"solar tower\",\n", - " layout=layout,\n", - " ).rename(\"lossless_installation\"),\n", - " cutout.csp(installation=\"SAM_solar_tower\", layout=layout).rename(\n", - " \"SAM_solar_tower\"\n", - " ),\n", - " ]\n", - ")\n", + "time_series = xr.merge([\n", + " cutout.csp(\n", + " installation=\"lossless_installation\",\n", + " technology=\"solar tower\",\n", + " layout=layout,\n", + " ).rename(\"lossless_installation\"),\n", + " cutout.csp(installation=\"SAM_solar_tower\", layout=layout).rename(\"SAM_solar_tower\"),\n", + "])\n", "\n", "# Load reference time-series from file\n", "df = pd.read_csv(\"../profiles_and_efficiencies_from_sam/ST-salt_time-series_spain.csv\")\n", @@ -764,21 +762,19 @@ "\n", "# installed power = 950 W/m^2 * area = 1205.0 MW\n", "installed_power = config[\"r_irradiance\"] * area / 1.0e6\n", - "layout.loc[dict(x=nearest_location[\"x\"], y=nearest_location[\"y\"])] = installed_power\n", + "layout.loc[{\"x\": nearest_location[\"x\"], \"y\": nearest_location[\"y\"]}] = installed_power\n", "\n", "# Calculate time-series for layout with both installation configurations\n", - "time_series = xr.merge(\n", - " [\n", - " cutout.csp(\n", - " installation=\"lossless_installation\",\n", - " technology=\"parabolic trough\",\n", - " layout=layout,\n", - " ).rename(\"lossless_installation\"),\n", - " cutout.csp(installation=\"SAM_parabolic_trough\", layout=layout).rename(\n", - " \"SAM_parabolic_trough\"\n", - " ),\n", - " ]\n", - ")\n", + "time_series = xr.merge([\n", + " cutout.csp(\n", + " installation=\"lossless_installation\",\n", + " technology=\"parabolic trough\",\n", + " layout=layout,\n", + " ).rename(\"lossless_installation\"),\n", + " cutout.csp(installation=\"SAM_parabolic_trough\", layout=layout).rename(\n", + " \"SAM_parabolic_trough\"\n", + " ),\n", + "])\n", "\n", "# Load reference time-series from file\n", "df = pd.read_csv(\n", @@ -951,17 +947,15 @@ ], "source": [ "# Calculate time-series for layout with both installation configurations\n", - "time_series = xr.merge(\n", - " [\n", - " cutout_sarah.csp(installation=\"SAM_parabolic_trough\", layout=layout).rename(\n", - " \"SARAH\"\n", - " ),\n", - " cutout.csp(\n", - " installation=\"SAM_parabolic_trough\",\n", - " layout=layout,\n", - " ).rename(\"ERA5\"),\n", - " ]\n", - ")\n", + "time_series = xr.merge([\n", + " cutout_sarah.csp(installation=\"SAM_parabolic_trough\", layout=layout).rename(\n", + " \"SARAH\"\n", + " ),\n", + " cutout.csp(\n", + " installation=\"SAM_parabolic_trough\",\n", + " layout=layout,\n", + " ).rename(\"ERA5\"),\n", + "])\n", "\n", "# Load reference NREL SAM time-series from file\n", "df = pd.read_csv(\n", @@ -1187,14 +1181,16 @@ "# Interpolate values to a finer grid and fill missing values by extrapolation\n", "# Order is relevant: Start with Azimuth (where we have sufficient values) and then continue with altitude\n", "da = (\n", - " da.interpolate_na(\"azimuth\")\n", + " da\n", + " .interpolate_na(\"azimuth\")\n", " .interpolate_na(\"altitude\")\n", " .interpolate_na(\"azimuth\", fill_value=\"extrapolate\")\n", ")\n", "\n", "# Use rolling horizon to smooth values, average over 3x3 adjacent values per pixel\n", "da = (\n", - " da.rolling(azimuth=3, altitude=3)\n", + " da\n", + " .rolling(azimuth=3, altitude=3)\n", " .mean()\n", " .interpolate_na(\"altitude\", fill_value=\"extrapolate\")\n", " .interpolate_na(\"azimuth\", fill_value=\"extrapolate\")\n", @@ -1286,7 +1282,7 @@ " \"efficiency\": df,\n", "}\n", "\n", - "with open(\"SAM_solar_tower.yaml\", \"w\") as f:\n", + "with Path(\"SAM_solar_tower.yaml\").open(\"w\") as f:\n", " yaml.safe_dump(config, f, default_flow_style=False, sort_keys=False)" ] } diff --git a/pyproject.toml b/pyproject.toml index d7d3c952..f42c77b3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,7 +58,7 @@ Documentation = "https://atlite.readthedocs.io/en/latest/" [project.optional-dependencies] -dev = ["pre-commit", "pytest", "pytest-cov", "matplotlib", "ruff"] +dev = ["pre-commit", "pytest", "pytest-cov", "matplotlib", "ruff", "mypy", "types-PyYAML", "types-requests"] docs = [ "numpydoc==1.8.0", @@ -81,42 +81,49 @@ include = ["atlite"] [tool.ruff] extend-include = ['*.ipynb'] +preview = true [tool.ruff.lint] select = [ - 'F', # pyflakes - 'E', # pycodestyle: Error - 'W', # pycodestyle: Warning - 'I', # isort - 'D', # pydocstyle - 'UP', # pyupgrade - 'TID', # flake8-tidy-imports - # 'NPY', # numpy - 'RUF013', # ruff + 'F', # pyflakes + 'E', # pycodestyle: Error + 'W', # pycodestyle: Warning + 'I', # isort + 'D', # pydocstyle + 'UP', # pyupgrade + 'TID', # flake8-tidy-imports + 'B', # flake8-bugbear + 'SIM', # flake8-simplify + 'RET', # flake8-return + 'C4', # flake8-comprehensions + 'TC', # flake8-type-checking + 'NPY', # numpy + 'G', # flake8-logging-format + 'PTH', # flake8-use-pathlib + 'DOC', # pydoclint: docstring-signature alignment + 'RUF013', # ruff: implicit-optional + 'RUF100', # ruff: unused-noqa ] ignore = [ - 'E501', # line too long - 'E741', # ambiguous variable names - 'D105', # Missing docstring in magic method - 'D212', # Multi-line docstring summary should start at the second line - 'D200', # One-line docstring should fit on one line with quotes - 'D401', # First line should be in imperative mood - 'D404', # First word of the docstring should not be "This - 'D413', # Missing blank line after last section - - # # pydocstyle ignores, which could be enabled in future when existing - # # issues are fixed - 'D100', # Missing docstring in public module - 'D101', # Missing docstring in public class - 'D102', # Missing docstring in public method - 'D103', # Missing docstring in public function - 'D107', # Missing docstring in __init__ - 'D202', # No blank lines allowed after function docstring - 'D203', # 1 blank line required before class docstring - 'D205', # 1 blank line required between summary line and description - 'D400', # First line should end with a period - 'D415', # First line should end with a period, question mark, or exclamation point - 'D417', # Missing argument descriptions in the docstring - + 'E501', # line too long + 'E741', # ambiguous variable names + 'PTH118', # os.path.join -> Path (keep os.path.join for template strings) ] + +[tool.ruff.lint.per-file-ignores] +"test/**" = ["D101", "D102", "D103", "D205"] +"examples/**" = ["D103"] + +[tool.ruff.lint.pydocstyle] +convention = "numpy" + +[tool.mypy] +python_version = "3.11" +warn_return_any = true +warn_unused_configs = true +ignore_missing_imports = true +warn_unused_ignores = true +disallow_incomplete_defs = true +check_untyped_defs = true +exclude = [".venv/", "build/"] diff --git a/test/conftest.py b/test/conftest.py index 9dfb99a6..77235d36 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -2,12 +2,14 @@ # # SPDX-License-Identifier: MIT +"""Shared pytest fixtures for atlite tests.""" + import os from datetime import date from pathlib import Path import pytest -from dateutil.relativedelta import relativedelta +from dateutil.relativedelta import relativedelta # type: ignore[import-untyped] from atlite import Cutout @@ -35,8 +37,7 @@ def cutouts_path(tmp_path_factory, pytestconfig): path = Path(custom_path) path.mkdir(parents=True, exist_ok=True) return path - else: - return tmp_path_factory.mktemp("atlite_cutouts") + return tmp_path_factory.mktemp("atlite_cutouts") def _prepare_era5_cutout(path, prepare_kwargs=None, **kwargs): @@ -110,18 +111,16 @@ def cutout_era5_weird_resolution(cutouts_path): @pytest.fixture(scope="session") def cutout_era5_reduced(cutouts_path): tmp_path = cutouts_path / "cutout_era5_reduced.nc" - cutout = Cutout(path=tmp_path, module="era5", bounds=BOUNDS, time=TIME) - return cutout + return Cutout(path=tmp_path, module="era5", bounds=BOUNDS, time=TIME) @pytest.fixture(scope="session") def cutout_era5_overwrite(cutouts_path, cutout_era5_reduced): tmp_path = cutouts_path / "cutout_era5_overwrite.nc" - cutout = Cutout(path=tmp_path, module="era5", bounds=BOUNDS, time=TIME) + return Cutout(path=tmp_path, module="era5", bounds=BOUNDS, time=TIME) # cutout.data = cutout.data.drop_vars("influx_direct") # cutout.prepare("influx", overwrite=True) # TODO Needs to be fixed - return cutout @pytest.fixture(scope="session") diff --git a/test/test_aggregate_time.py b/test/test_aggregate_time.py index 86f0f469..97f17b64 100644 --- a/test/test_aggregate_time.py +++ b/test/test_aggregate_time.py @@ -2,6 +2,10 @@ # # SPDX-License-Identifier: MIT +"""Tests for time aggregation functionality.""" + +from typing import Any + import numpy as np import pandas as pd import pytest @@ -10,6 +14,12 @@ from atlite.convert import convert_and_aggregate +def _call(*args: Any, **kwargs: Any) -> xr.DataArray | xr.Dataset: + result = convert_and_aggregate(*args, **kwargs) + assert not isinstance(result, tuple) + return result + + class MockCutout: def __init__(self, data): self.data = data @@ -23,44 +33,42 @@ def identity_convert(ds, **kwargs): @pytest.fixture def cutout(): - np.random.seed(42) + rng = np.random.default_rng(42) times = xr.date_range("2020-01-01", periods=24, freq="h") - data = xr.Dataset( - { - "var": xr.DataArray( - np.random.rand(24, 3, 4), - dims=["time", "y", "x"], - coords={ - "time": times, - "y": [50.0, 51.0, 52.0], - "x": [5.0, 6.0, 7.0, 8.0], - }, - ) - } - ) + data = xr.Dataset({ + "var": xr.DataArray( + rng.random((24, 3, 4)), + dims=["time", "y", "x"], + coords={ + "time": times, + "y": [50.0, 51.0, 52.0], + "x": [5.0, 6.0, 7.0, 8.0], + }, + ) + }) return MockCutout(data) class TestAggregateTimeNoSpatial: def test_aggregate_time_none_returns_timeseries(self, cutout): - result = convert_and_aggregate(cutout, identity_convert, aggregate_time=None) + result = _call(cutout, identity_convert, aggregate_time=None) assert "time" in result.dims def test_aggregate_time_mean(self, cutout): - result = convert_and_aggregate(cutout, identity_convert, aggregate_time="mean") + result = _call(cutout, identity_convert, aggregate_time="mean") assert "time" not in result.dims expected = cutout.data["var"].mean("time") np.testing.assert_allclose(result.values, expected.values) def test_aggregate_time_sum(self, cutout): - result = convert_and_aggregate(cutout, identity_convert, aggregate_time="sum") + result = _call(cutout, identity_convert, aggregate_time="sum") assert "time" not in result.dims expected = cutout.data["var"].sum("time") np.testing.assert_allclose(result.values, expected.values) def test_legacy_default_no_spatial_sums_over_time(self, cutout): with pytest.warns(FutureWarning, match="aggregate_time='legacy'"): - result = convert_and_aggregate(cutout, identity_convert) + result = _call(cutout, identity_convert) expected = cutout.data["var"].sum("time") assert "time" not in result.dims xr.testing.assert_identical(result, expected) @@ -77,14 +85,12 @@ def layout(cutout): @pytest.fixture def result_ts(cutout, layout): - return convert_and_aggregate( - cutout, identity_convert, layout=layout, aggregate_time=None - ) + return _call(cutout, identity_convert, layout=layout, aggregate_time=None) class TestAggregateTimeWithSpatial: def test_aggregate_time_mean_with_layout(self, cutout, layout, result_ts): - result_mean = convert_and_aggregate( + result_mean = _call( cutout, identity_convert, layout=layout, aggregate_time="mean" ) assert "time" in result_ts.dims @@ -92,7 +98,7 @@ def test_aggregate_time_mean_with_layout(self, cutout, layout, result_ts): np.testing.assert_allclose(result_mean.values, result_ts.mean("time").values) def test_aggregate_time_sum_with_layout(self, cutout, layout, result_ts): - result_sum = convert_and_aggregate( + result_sum = _call( cutout, identity_convert, layout=layout, aggregate_time="sum" ) assert "time" not in result_sum.dims @@ -100,7 +106,7 @@ def test_aggregate_time_sum_with_layout(self, cutout, layout, result_ts): def test_legacy_default_with_layout_returns_timeseries(self, cutout, layout): with pytest.warns(FutureWarning, match="aggregate_time='legacy'"): - result = convert_and_aggregate(cutout, identity_convert, layout=layout) + result = _call(cutout, identity_convert, layout=layout) assert "time" in result.dims def test_aggregate_time_with_per_unit(self, cutout): @@ -109,7 +115,7 @@ def test_aggregate_time_with_per_unit(self, cutout): dims=["y", "x"], coords={"y": cutout.data.y, "x": cutout.data.x}, ) - result_pu = convert_and_aggregate( + result_pu = _call( cutout, identity_convert, layout=layout, @@ -118,7 +124,7 @@ def test_aggregate_time_with_per_unit(self, cutout): ) assert "time" not in result_pu.dims - result_pu_ts = convert_and_aggregate( + result_pu_ts = _call( cutout, identity_convert, layout=layout, @@ -131,23 +137,19 @@ def test_aggregate_time_with_per_unit(self, cutout): class TestDeprecatedParams: def test_capacity_factor_warns(self, cutout): with pytest.warns(FutureWarning, match="capacity_factor is deprecated"): - result = convert_and_aggregate( - cutout, identity_convert, capacity_factor=True - ) + result = _call(cutout, identity_convert, capacity_factor=True) assert "time" not in result.dims def test_capacity_factor_timeseries_warns(self, cutout): with pytest.warns( FutureWarning, match="capacity_factor_timeseries is deprecated" ): - result = convert_and_aggregate( - cutout, identity_convert, capacity_factor_timeseries=True - ) + result = _call(cutout, identity_convert, capacity_factor_timeseries=True) assert "time" in result.dims def test_capacity_factor_with_aggregate_time_raises(self, cutout): with pytest.raises(ValueError, match="Cannot use"): - convert_and_aggregate( + _call( cutout, identity_convert, capacity_factor=True, @@ -158,12 +160,12 @@ def test_capacity_factor_with_aggregate_time_raises(self, cutout): class TestInvalidArgs: def test_invalid_aggregate_time_value(self, cutout): with pytest.raises(ValueError, match="aggregate_time must be"): - convert_and_aggregate(cutout, identity_convert, aggregate_time="invalid") + _call(cutout, identity_convert, aggregate_time="invalid") def test_aggregate_time_false_raises(self, cutout): with pytest.raises(ValueError, match="aggregate_time must be"): - convert_and_aggregate(cutout, identity_convert, aggregate_time=False) + _call(cutout, identity_convert, aggregate_time=False) def test_aggregate_time_true_raises(self, cutout): with pytest.raises(ValueError, match="aggregate_time must be"): - convert_and_aggregate(cutout, identity_convert, aggregate_time=True) + _call(cutout, identity_convert, aggregate_time=True) diff --git a/test/test_creation.py b/test/test_creation.py index 52d95c7a..055b089a 100755 --- a/test/test_creation.py +++ b/test/test_creation.py @@ -50,12 +50,12 @@ def test_shape(ref): def test_extent(ref): reference_extent = [-4.125, 1.625, 55.875, 61.125] - assert all([x == y for x, y in zip(ref.extent, reference_extent)]) + assert all(x == y for x, y in zip(ref.extent, reference_extent, strict=False)) def test_bounds(ref): reference_extent = [-4.125, 55.875, 1.625, 61.125] - assert all([x == y for x, y in zip(ref.bounds, reference_extent)]) + assert all(x == y for x, y in zip(ref.bounds, reference_extent, strict=False)) def test_transform(ref): @@ -147,7 +147,7 @@ def test_dx_dy_dt(): ) assert dx == cutout.dx assert dy == cutout.dy - assert "h" == cutout.dt + assert cutout.dt == "h" def test_available_features(ref): diff --git a/test/test_dynamic_line_rating.py b/test/test_dynamic_line_rating.py index 58ea591d..e0e0b1ca 100644 --- a/test/test_dynamic_line_rating.py +++ b/test/test_dynamic_line_rating.py @@ -11,8 +11,10 @@ import numpy as np import pandas as pd +import pytest +from shapely.geometry import LineString, Point -from atlite.convert import convert_line_rating +from atlite.convert import convert_line_rating, line_azimuth_degrees def test_ieee_sample_case(): @@ -103,9 +105,7 @@ def test_suedkabel_sample_case(): def test_right_angle_in_different_configuration(): - """ - Test different configurations of angle difference of 90 degree. - """ + """Test different configurations of angle difference of 90 degree.""" ds = { "temperature": 313, "wnd100m": 0.61, @@ -149,9 +149,7 @@ def test_right_angle_in_different_configuration(): def test_angle_increase(): - """ - Test an increasing angle which should lead to an increasing capacity. - """ + """Test an increasing angle which should lead to an increasing capacity.""" ds = { "temperature": 313, "wnd100m": 0.61, @@ -180,3 +178,19 @@ def func(psi): # check point reflection assert np.isclose(res.iloc[:19], res.iloc[:17:-1], atol=1e-10, rtol=1e-10).all() assert np.isclose(res.iloc[:19], res.iloc[18:], atol=1e-10, rtol=1e-10).all() + + +@pytest.mark.parametrize( + ("start", "end", "expected"), + [ + ((0.0, 0.0), (0.0, 1.0), 180.0), # N-pointing line + ((0.0, 0.0), (0.0, -1.0), 0.0), # S-pointing line + ((0.0, 0.0), (1.0, 0.0), -90.0), # E-pointing + ((0.0, 0.0), (-1.0, 0.0), 90.0), # W-pointing + ((0.0, 0.0), (1.0, 1.0), -135.0), # NE diagonal + ], +) +def test_line_azimuth_degrees(start, end, expected): + """`line_azimuth_degrees` returns degrees consistent with `convert_line_rating`'s `psi`.""" + shape = LineString([Point(*start), Point(*end)]) + assert np.isclose(line_azimuth_degrees(shape), expected) diff --git a/test/test_gis.py b/test/test_gis.py index 23c28fe7..6d003f6b 100755 --- a/test/test_gis.py +++ b/test/test_gis.py @@ -72,7 +72,8 @@ def raster(tmp_path_factory): bounds = (X0, Y0, X1, Y1) # same as in test_gis.py res = 0.01 transform, shape = padded_transform_and_shape(bounds, res) - mask = np.random.rand(*shape) < raster_clip + rng = np.random.default_rng(42) + mask = rng.random(shape) < raster_clip mask = mask.astype(rio.int32) path = tmp_path / "raster.tif" with rio.open( @@ -96,7 +97,8 @@ def raster_reproject(tmp_path_factory): bounds = rio.warp.transform_bounds(4326, 3035, X0, Y0, X1, Y1) res = 1000 transform, shape = padded_transform_and_shape(bounds, res) - mask = np.random.rand(*shape) < raster_clip + rng = np.random.default_rng(42) + mask = rng.random(shape) < raster_clip mask = mask.astype(rio.int32) path = tmp_path / "raster_reproject.tif" with rio.open( @@ -120,7 +122,8 @@ def raster_codes(tmp_path_factory): bounds = (X0, Y0, X1, Y1) # same as in test_gis.py res = 0.01 transform, shape = padded_transform_and_shape(bounds, res) - mask = (np.random.rand(*shape) * 100).astype(int) + rng = np.random.default_rng(42) + mask = (rng.random(shape) * 100).astype(int) mask = mask.astype(rio.int32) path = tmp_path / "raster_codes.tif" with rio.open( @@ -139,9 +142,7 @@ def raster_codes(tmp_path_factory): def test_exclusioncontainer_repr(ref): - """ - Test ExclusionContainer.__repr__. - """ + """Test ExclusionContainer.__repr__.""" excluder = ExclusionContainer(ref.crs, res=0.01) assert "Exclusion Container" in repr(excluder) @@ -199,9 +200,7 @@ def test_open_closed_checks(ref, geometry, raster): def test_area(ref): - """ - Test the area of the cutout. - """ + """Test the area of the cutout.""" area = ref.area(crs=3035) assert isinstance(area, xr.DataArray) assert area.dims == ("y", "x") @@ -249,9 +248,7 @@ def test_bounds(ref): def test_regrid(): - """ - Test the atlite.gis.regrid function with average resampling. - """ + """Test the atlite.gis.regrid function with average resampling.""" # define blocks A = 0.25 B = 0.5 @@ -293,9 +290,7 @@ def test_regrid(): def test_pad_extent(): - """ - Test whether padding works with arrays of dimension > 2. - """ + """Test whether padding works with arrays of dimension > 2.""" src = np.ones((3, 2)) src_trans = rio.Affine(1, 0, 0, 0, 1, 0) dst_trans = rio.Affine(2, 0, 0, 0, 2, 0) @@ -379,9 +374,7 @@ def test_availability_matrix_flat_parallel_anonymous_function(ref, raster_codes) def test_availability_matrix_flat_wo_progressbar(ref): - """ - Same as `test_availability_matrix_flat` but without progressbar. - """ + """Same as `test_availability_matrix_flat` but without progressbar.""" shapes = gpd.GeoSeries( [box(X0 + 1, Y0 + 1, X1 - 1, Y1 - 1)], crs=ref.crs ).rename_axis("shape") @@ -410,9 +403,7 @@ def test_availability_matrix_flat_parallel_wo_progressbar(ref): def test_shape_availability_area(ref): - """ - Area of the mask and the shape must be close. - """ + """Area of the mask and the shape must be close.""" shapes = gpd.GeoSeries([box(X0 + 1, Y0 + 1, X1 - 1, Y1 - 1)], crs=ref.crs) res = 100 excluder = ExclusionContainer(res=res) @@ -481,9 +472,7 @@ def test_shape_availability_exclude_geometry(ref): def test_shape_availability_exclude_raster(ref, raster): - """ - When excluding the half of the geometry, the eligible area must be half. - """ + """When excluding the half of the geometry, the eligible area must be half.""" shapes = gpd.GeoSeries([box(X0, Y0, X1, Y1)], crs=ref.crs) res = 0.01 @@ -518,9 +507,7 @@ def test_shape_availability_exclude_raster(ref, raster): def test_shape_availability_excluder_partial_overlap(ref, raster): - """ - Test behavior, when a raster only overlaps half of the geometry. - """ + """Test behavior, when a raster only overlaps half of the geometry.""" bounds = X0 - 2, Y0, X0 + 2, Y1 area = abs((bounds[2] - bounds[0]) * (bounds[3] - bounds[1])) shapes = gpd.GeoSeries([box(*bounds)], crs=ref.crs) @@ -543,9 +530,7 @@ def test_shape_availability_excluder_partial_overlap(ref, raster): def test_shape_availability_excluder_raster_no_overlap(ref, raster): - """ - Check if the allow_no_overlap flag works. - """ + """Check if the allow_no_overlap flag works.""" bounds = X0 - 10.0, Y0 - 10.0, X0 - 2.0, Y0 - 2.0 area = abs((bounds[2] - bounds[0]) * (bounds[3] - bounds[1])) shapes = gpd.GeoSeries([box(*bounds)], crs=ref.crs) @@ -627,9 +612,7 @@ def test_availability_matrix_rastered_repro(ref, raster_reproject): def test_shape_availability_exclude_raster_codes(ref, raster_codes): - """ - Test exclusion of multiple raster codes. - """ + """Test exclusion of multiple raster codes.""" shapes = gpd.GeoSeries([box(X0, Y0, X1, Y1)], crs=ref.crs) res = 0.01 @@ -653,9 +636,7 @@ def test_shape_availability_exclude_raster_codes(ref, raster_codes): def test_plot_shape_availability(ref, raster): - """ - Test plotting of shape availability. - """ + """Test plotting of shape availability.""" shapes = gpd.GeoSeries([box(X0, Y0, X1, Y1)], crs=ref.crs) res = 0.01 diff --git a/test/test_preparation_and_conversion.py b/test/test_preparation_and_conversion.py index 1ed9bdf1..a9e698b7 100644 --- a/test/test_preparation_and_conversion.py +++ b/test/test_preparation_and_conversion.py @@ -12,13 +12,14 @@ import os import sys from datetime import date +from pathlib import Path import geopandas as gpd import numpy as np import pandas as pd import pytest import urllib3 -from dateutil.relativedelta import relativedelta +from dateutil.relativedelta import relativedelta # type: ignore[import-untyped] from shapely.geometry import LineString as Line from shapely.geometry import Point @@ -36,16 +37,12 @@ def all_notnull_test(cutout): - """ - Test if no nan's in the prepared data occur. - """ + """Test if no nan's in the prepared data occur.""" assert cutout.data.notnull().all() def prepared_features_test(cutout): - """ - The prepared features series should contain all variables in cutout.data. - """ + """Verify that prepared features contain all variables in cutout.data.""" assert set(cutout.prepared_features) == set(cutout.data) @@ -106,7 +103,8 @@ def pv_test(cutout, time=TIME, skip_optimal_sum_test=False): return_capacity=True, ) cap_per_region = ( - cells.assign(cap_factor=cap_factor.stack(spatial=["y", "x"]).values) + cells + .assign(cap_factor=cap_factor.stack(spatial=["y", "x"]).values) .groupby("regions") .cap_factor.sum() ) @@ -228,7 +226,7 @@ def csp_test(cutout): Test the atlite.Cutout.csp function with different for different settings and technologies. """ - ## Test technology = "solar tower" + # Test technology = "solar tower" st = cutout.csp(atlite.cspinstallations.SAM_solar_tower, capacity_factor=True) assert st.notnull().all() @@ -240,7 +238,7 @@ def csp_test(cutout): ll = cutout.csp(atlite.cspinstallations.lossless_installation) assert (st <= ll).all() - ## Test technology = "parabolic trough" + # Test technology = "parabolic trough" pt = cutout.csp(atlite.cspinstallations.SAM_parabolic_trough, capacity_factor=True) assert pt.notnull().all() @@ -254,27 +252,21 @@ def csp_test(cutout): def solar_thermal_test(cutout): - """ - Test the atlite.Cutout.solar_thermal function with different settings. - """ + """Test the atlite.Cutout.solar_thermal function with different settings.""" cap_factor = cutout.solar_thermal() assert cap_factor.notnull().all() assert cap_factor.sum() > 0 def heat_demand_test(cutout): - """ - Test the atlite.Cutout.heat_demand function with different settings. - """ + """Test the atlite.Cutout.heat_demand function with different settings.""" demand = cutout.heat_demand() assert demand.notnull().all() assert demand.sum() > 0 def soil_temperature_test(cutout): - """ - Test the atlite.Cutout.soil_temperature function with different settings. - """ + """Test the atlite.Cutout.soil_temperature function with different settings.""" demand = cutout.soil_temperature() assert demand.notnull().all() assert demand.sum() > 0 @@ -358,19 +350,17 @@ def runoff_test(cutout): def hydro_test(cutout): - """ - Test the atlite.Cutout.hydro function. - """ + """Test the atlite.Cutout.hydro function.""" plants = pd.DataFrame( cutout.grid.loc[[0], ["x", "y"]].values, columns=["lon", "lat"] ) basins = gpd.GeoDataFrame( - dict( - geometry=[cutout.grid.geometry[0]], - HYBAS_ID=[0], - DIST_MAIN=10, - NEXT_DOWN=None, - ), + { + "geometry": [cutout.grid.geometry[0]], + "HYBAS_ID": [0], + "DIST_MAIN": 10, + "NEXT_DOWN": None, + }, index=[0], crs=cutout.crs, ) @@ -386,9 +376,7 @@ def line_rating_test(cutout): def coefficient_of_performance_test(cutout): - """ - Test the coefficient_of_performance function. - """ + """Test the coefficient_of_performance function.""" cap_factor = cutout.coefficient_of_performance(source="air") assert cap_factor.notnull().all() assert cap_factor.sum() > 0 @@ -405,21 +393,17 @@ def test_data_module_arguments_era5(cutout_era5): All data variables should have an attribute to which module they belong. """ - for v in cutout_era5.data: + for _v in cutout_era5.data: assert cutout_era5.data.attrs["module"] == "era5" @staticmethod def test_all_non_na_era5(cutout_era5): - """ - Every cells should have data. - """ + """Every cells should have data.""" assert np.isfinite(cutout_era5.data).all() @staticmethod def test_all_non_na_era5_coarse(cutout_era5_coarse): - """ - Every cells should have data. - """ + """Every cells should have data.""" assert np.isfinite(cutout_era5_coarse.data).all() @staticmethod @@ -428,24 +412,18 @@ def test_all_non_na_era5_coarse(cutout_era5_coarse): reason="This test breaks on windows machine on CI due to unknown reasons.", ) def test_all_non_na_era5_weird_resolution(cutout_era5_weird_resolution): - """ - Every cells should have data. - """ + """Every cells should have data.""" assert np.isfinite(cutout_era5_weird_resolution.data).all() @staticmethod def test_dx_dy_preservation_era5(cutout_era5): - """ - The coordinates should be the same after preparation. - """ + """The coordinates should be the same after preparation.""" assert np.allclose(np.diff(cutout_era5.data.x), 0.25) assert np.allclose(np.diff(cutout_era5.data.y), 0.25) @staticmethod def test_dx_dy_preservation_era5_coarse(cutout_era5_coarse): - """ - The coordinates should be the same after preparation. - """ + """The coordinates should be the same after preparation.""" assert np.allclose( np.diff(cutout_era5_coarse.data.x), cutout_era5_coarse.data.attrs["dx"] ) @@ -459,9 +437,7 @@ def test_dx_dy_preservation_era5_coarse(cutout_era5_coarse): reason="This test breaks on windows machine on CI due to unknown reasons.", ) def test_dx_dy_preservation_era5_weird_resolution(cutout_era5_weird_resolution): - """ - The coordinates should be the same after preparation. - """ + """The coordinates should be the same after preparation.""" assert np.allclose( np.diff(cutout_era5_weird_resolution.data.x), cutout_era5_weird_resolution.data.attrs["dx"], @@ -510,9 +486,7 @@ def test_pv_tracking_era5(cutout_era5): @staticmethod def test_pv_era5_2days_crossing_months(cutout_era5_2days_crossing_months): - """ - See https://github.com/PyPSA/atlite/issues/256. - """ + """See https://github.com/PyPSA/atlite/issues/256.""" # noqa: DOC201 return pv_test(cutout_era5_2days_crossing_months, time="2013-03-01") @staticmethod @@ -532,6 +506,11 @@ def test_pv_era5_and_era5t(cutout_era5t): Note: the above page says that ERA5 data are made available with a *3* month delay, but experience shows that it's with a *2* month delay. Hence the test with previous vs. second-previous month. + + Returns + ------- + object + PV test result. """ today = date.today() first_day_this_month = today.replace(day=1) @@ -588,35 +567,27 @@ def test_line_rating_era5(cutout_era5): @pytest.mark.skipif( - not os.path.exists(SARAH_DIR), reason="'sarah_dir' is not a valid path" + not Path(SARAH_DIR).exists(), reason="'sarah_dir' is not a valid path" ) class TestSarah: @staticmethod def test_all_non_na_sarah(cutout_sarah): - """ - Every cells should have data. - """ + """Every cells should have data.""" assert np.isfinite(cutout_sarah.data).all() @staticmethod def test_all_non_na_sarah_fine(cutout_sarah_fine): - """ - Every cells should have data. - """ + """Every cells should have data.""" assert np.isfinite(cutout_sarah_fine.data).all() @staticmethod def test_all_non_na_sarah_weird_resolution(cutout_sarah_weird_resolution): - """ - Every cells should have data. - """ + """Every cells should have data.""" assert np.isfinite(cutout_sarah_weird_resolution.data).all() @staticmethod def test_dx_dy_preservation_sarah(cutout_sarah): - """ - The coordinates should be the same after preparation. - """ + """The coordinates should be the same after preparation.""" assert np.allclose(np.diff(cutout_sarah.data.x), 0.25) assert np.allclose(np.diff(cutout_sarah.data.y), 0.25) @@ -642,12 +613,10 @@ def test_runoff_sarah(cutout_sarah): @pytest.mark.skipif( - not os.path.exists(GEBCO_PATH), reason="'gebco_path' is not a valid path" + not Path(GEBCO_PATH).exists(), reason="'gebco_path' is not a valid path" ) class TestGebco: @staticmethod def test_all_non_na_gebco(cutout_gebco): - """ - Every cells should have data. - """ + """Every cells should have data.""" assert np.isfinite(cutout_gebco.data).all()