Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions docs/api/pipeline/pipeline_api.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@
.. autoclass:: pyearthtools.pipeline.modifications.TemporalRetrieval
:members:
.. autoclass:: pyearthtools.pipeline.modifications.TemporalWindow
:members:
:members:
.. autoclass:: pyearthtools.pipeline.modifications.idx_modification
:members:
```
Expand Down Expand Up @@ -159,7 +159,7 @@
:members:
.. autoclass:: pyearthtools.pipeline.operations.xarray.values.MaskValue
:members:
.. autoclass:: pyearthtools.pipeline.operations.xarray.values.ForceNormalised
.. autoclass:: pyearthtools.pipeline.operations.xarray.values.Clip
:members:
.. autoclass:: pyearthtools.pipeline.operations.xarray.values.Derive
:members:
Expand Down Expand Up @@ -247,7 +247,7 @@
:members:
.. autoclass:: pyearthtools.pipeline.operations.dask.values.MaskValue
:members:
.. autoclass:: pyearthtools.pipeline.operations.dask.values.ForceNormalised
.. autoclass:: pyearthtools.pipeline.operations.dask.values.Clip
:members:

.. autoclass:: pyearthtools.pipeline.operations.dask.normalisation.daskNormalisation
Expand Down Expand Up @@ -324,7 +324,7 @@
:members:
.. autoclass:: pyearthtools.pipeline.operations.numpy.values.MaskValue
:members:
.. autoclass:: pyearthtools.pipeline.operations.numpy.values.ForceNormalised
.. autoclass:: pyearthtools.pipeline.operations.numpy.values.Clip
:members:

.. autoclass:: pyearthtools.pipeline.operations.numpy.normalisation.numpyNormalisation
Expand Down
6 changes: 3 additions & 3 deletions docs/api/pipeline/pipeline_index.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ The rest of this page contains reference information for the components of the P
| | | - [OnCoordinate](pipeline_api.md#pyearthtools.pipeline.operations.xarray.split.OnCoordinate) |
| `pipeline.operations.xarray.values` | | - [FillNan](pipeline_api.md#pyearthtools.pipeline.operations.xarray.values.FillNan) |
| | | - [MaskValue](pipeline_api.md#pyearthtools.pipeline.operations.xarray.values.MaskValue) |
| | | - [ForceNormalised](pipeline_api.md#pyearthtools.pipeline.operations.xarray.values.ForceNormalised) |
| | | - [Clip](pipeline_api.md#pyearthtools.pipeline.operations.xarray.values.Clip) |
| | | - [Derive](pipeline_api.md#pyearthtools.pipeline.operations.xarray.values.Derive) |
| `pipeline.operations.xarray.metadata` | | - [Rename](pipeline_api.md#pyearthtools.pipeline.operations.xarray.metadata.Rename) |
| | | - [Encoding](pipeline_api.md#pyearthtools.pipeline.operations.xarray.metadata.Encoding) |
Expand Down Expand Up @@ -110,7 +110,7 @@ The rest of this page contains reference information for the components of the P
| | | - [OnSlice](pipeline_api.md#pyearthtools.pipeline.operations.dask.split.OnSlice) |
| `pipeline.operations.dask.values` | | - [FillNan](pipeline_api.md#pyearthtools.pipeline.operations.dask.values.FillNan) |
| | | - [MaskValue](pipeline_api.md#pyearthtools.pipeline.operations.dask.values.MaskValue) |
| | | - [ForceNormalised](pipeline_api.md#pyearthtools.pipeline.operations.dask.values.ForceNormalised) |
| | | - [Clip](pipeline_api.md#pyearthtools.pipeline.operations.dask.values.Clip) |
| `pipeline.operations.dask.normalisation` | | - [daskNormalisation](pipeline_api.md#pyearthtools.pipeline.operations.dask.normalisation.daskNormalisation) |
| | | - [Anomaly](pipeline_api.md#pyearthtools.pipeline.operations.dask.normalisation.Anomaly) |
| | | - [Deviation](pipeline_api.md#pyearthtools.pipeline.operations.dask.normalisation.Deviation) |
Expand Down Expand Up @@ -143,7 +143,7 @@ The rest of this page contains reference information for the components of the P
| | | - [HSplit](pipeline_api.md#pyearthtools.pipeline.operations.numpy.split.HSplit) |
| `pipeline.operations.numpy.values` | | - [FillNan](pipeline_api.md#pyearthtools.pipeline.operations.numpy.values.FillNan) |
| | | - [MaskValue](pipeline_api.md#pyearthtools.pipeline.operations.numpy.values.MaskValue) |
| | | - [ForceNormalised](pipeline_api.md#pyearthtools.pipeline.operations.numpy.values.ForceNormalised) |
| | | - [Clip](pipeline_api.md#pyearthtools.pipeline.operations.numpy.values.Clip) |
| `pipeline.operations.numpy.normalisation` | | - [numpyNormalisation](pipeline_api.md#pyearthtools.pipeline.operations.numpy.normalisation.numpyNormalisation) |
| | | - [Anomaly](pipeline_api.md#pyearthtools.pipeline.operations.numpy.normalisation.Anomaly) |
| | | - [Deviation](pipeline_api.md#pyearthtools.pipeline.operations.numpy.normalisation.Deviation) |
Expand Down
2 changes: 1 addition & 1 deletion docs/notebooks/pipeline/Operations.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1510,7 +1510,7 @@
"| select | Select elements from dataset's | `SelectDataset`, `DropDataset`, `SliceDataset` |\n",
"| sort | Sort variables of a dataset | `Sort` |\n",
"| split | Split datasets | `OnVariables`, `OnCoordinate` |\n",
"| values | Modify values of datasets | `FillNan`, `MaskValue`, `ForceNormalised`, `Derive` |\n",
"| values | Modify values of datasets | `FillNan`, `MaskValue`, `Clip`, `Derive` |\n",
"| remapping | Reproject data | `HEALPix` | "
]
},
Expand Down
16 changes: 9 additions & 7 deletions packages/data/src/pyearthtools/data/transforms/derive.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,22 +390,24 @@ def derive_equations(
attrs["equation"] = eq

LOG.debug(f"Setting {key!r} to result of {eq!r}.")
result, eq_drop_vars = _evaluate(eq, dataset=dataset)
# shallow copy dataset, so new variables aren't added to old dataset
dataset_copy = dataset.copy(deep=False)
result, eq_drop_vars = _evaluate(eq, dataset=dataset_copy)

if key in list(dataset.coords.keys()):
dataset = dataset.assign_coords({key: result})
if key in list(dataset_copy.coords.keys()):
dataset_copy = dataset_copy.assign_coords({key: result})
else:
dataset[key] = result
dataset_copy[key] = result

if attrs.pop("drop", drop):
_ = list(drop_vars.append(var) for var in eq_drop_vars)

dataset[key].attrs.update(**attrs)
dataset_copy[key].attrs.update(**attrs)

# Drop variables used in the calculation
dataset = dataset.drop(set(drop_vars).intersection(dataset.data_vars), errors="ignore") # type: ignore
dataset_copy = dataset_copy.drop_vars(set(drop_vars).intersection(dataset_copy.data_vars), errors="ignore") # type: ignore

return dataset
return dataset_copy


class Derive(Transform):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,14 @@ def __init__(
Args:
nan (float, optional):
Value to fill nan's with.
If no value is passed then NaN values will not be replaced. Defaults to 0.
If None is passed then NaN values will be replaced with 0. Defaults to 0.
posinf (float, optional):
Value to be used to fill positive infinity values,
If no value is passed then positive infinity values will be replaced with a very large number. Defaults to None.
If None is passed then positive infinity values will be replaced with a very large number. Defaults to None.
neginf (float, optional):
Value to be used to fill negative infinity values,
If no value is passed then negative infinity values will be replaced with a very small (or negative) number. Defaults to None.
If None is passed then negative infinity values will be replaced with a very small (or negative) number. Defaults to None.
"""
raise NotImplementedError("Not implemented")

super().__init__(
operation="apply",
Expand All @@ -69,7 +68,7 @@ def __init__(
self.neginf = neginf

def apply_func(self, sample: da.Array):
return da.nan_to_num(da.array(sample), self.nan, self.posinf, self.neginf)
return da.nan_to_num(da.array(sample), True, self.nan, self.posinf, self.neginf)


class MaskValue(DaskOperation):
Expand Down Expand Up @@ -118,7 +117,7 @@ def __init__(
self.value = value
self.replacement_value = replacement_value

self._mask_transform = pyearthtools.data.transforms.mask.replace_value(value, operation, replacement_value)
self._mask_transform = pyearthtools.data.transforms.mask.Replace(value, operation, replacement_value)

def apply_func(self, sample: da.Array) -> da.Array:
"""
Expand All @@ -135,9 +134,9 @@ def apply_func(self, sample: da.Array) -> da.Array:
return self._mask_transform(sample) # type: ignore


class ForceNormalised(DaskOperation):
class Clip(DaskOperation):
"""
Operation to force data within a certain range, by default 0 & 1
Operation to force data to be within a certain range, by default 0 & 1
"""

_override_interface = ["Serial"]
Expand Down Expand Up @@ -170,10 +169,8 @@ def __init__(

self.record_initialisation()

self._force_min = MaskValue(min_value, "<", min_value) if min_value is not None else None
self._force_max = MaskValue(max_value, ">", max_value) if max_value is not None else None
self._min_value = min_value
self._max_value = max_value

def apply_func(self, sample):
for func in (func for func in [self._force_min, self._force_max] if func is not None):
sample = func.apply_func(sample)
return sample
return da.clip(sample, self._min_value, self._max_value)
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,13 @@ def __init__(
Args:
nan (float, optional):
Value to fill nan's with.
If no value is passed then NaN values will not be replaced. Defaults to 0.
If None is passed then NaN values will be replaced with 0. Defaults to 0.
posinf (float, optional):
Value to be used to fill positive infinity values,
If no value is passed then positive infinity values will be replaced with a very large number. Defaults to None.
If None is passed then positive infinity values will be replaced with a very large number. Defaults to None.
neginf (float, optional):
Value to be used to fill negative infinity values,
If no value is passed then negative infinity values will be replaced with a very small (or negative) number. Defaults to None.
If None is passed then negative infinity values will be replaced with a very small (or negative) number. Defaults to None.
"""

super().__init__(
Expand Down Expand Up @@ -112,7 +112,7 @@ def __init__(
self.value = value
self.replacement_value = replacement_value

self._mask_transform = pyearthtools.data.transforms.mask.replace_value(value, operation, replacement_value)
self._mask_transform = pyearthtools.data.transforms.mask.Replace(value, operation, replacement_value)

def apply_func(self, sample: np.ndarray) -> np.ndarray:
"""
Expand All @@ -129,9 +129,9 @@ def apply_func(self, sample: np.ndarray) -> np.ndarray:
return self._mask_transform(sample) # type: ignore


class ForceNormalised(Operation):
class Clip(Operation):
"""
Operation to force data within a certain range, by default 0 & 1
Operation to force data to be within a certain range, by default 0 & 1
"""

_override_interface = ["Delayed", "Serial"]
Expand Down Expand Up @@ -164,10 +164,8 @@ def __init__(

self.record_initialisation()

self._force_min = MaskValue(min_value, "<", min_value) if min_value is not None else None
self._force_max = MaskValue(max_value, ">", max_value) if max_value is not None else None
self._min_value = min_value
self._max_value = max_value

def apply_func(self, sample):
for func in (func for func in [self._force_min, self._force_max] if func is not None):
sample = func.apply_func(sample)
return sample
return np.clip(sample, a_min=self._min_value, a_max=self._max_value)
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,13 @@ def __init__(
Args:
nan (float, optional):
Value to fill nan's with.
If no value is passed then NaN values will not be replaced. Defaults to 0.
If None is passed then NaN values will be replaced with 0. Defaults to 0.
posinf (float, optional):
Value to be used to fill positive infinity values,
If no value is passed then positive infinity values will be replaced with a very large number. Defaults to None.
If None is passed then positive infinity values will be replaced with a very large number. Defaults to None.
neginf (float, optional):
Value to be used to fill negative infinity values,
If no value is passed then negative infinity values will be replaced with a very small (or negative) number. Defaults to None.
If None is passed then negative infinity values will be replaced with a very small (or negative) number. Defaults to None.
"""

super().__init__(
Expand All @@ -67,7 +67,25 @@ def __init__(
self.neginf = neginf

def apply_func(self, sample: T) -> T:
return sample.fillna(self.nan)

# TODO: #239 remove superfluous type checks.
if not (isinstance(sample, xr.DataArray) or isinstance(sample, xr.Dataset)):
raise TypeError("sample must be xr.DataArray or xr.Dataset.")

# create copy of input, with np.nan_to_num applied to underlying numpy arrays
if isinstance(sample, xr.DataArray):
return sample.copy(
deep=True, # since data is provided, deep copy only applies to coordinates
data=np.nan_to_num(sample.values, nan=self.nan, posinf=self.posinf, neginf=self.neginf),
)
else:
return sample.copy(
deep=True,
data={
k: np.nan_to_num(v.values, nan=self.nan, posinf=self.posinf, neginf=self.neginf)
for k, v in sample.items()
},
)


class MaskValue(Operation):
Expand Down Expand Up @@ -132,9 +150,9 @@ def apply_func(self, sample: T) -> T:
return self._mask_transform(sample)


class ForceNormalised(Operation):
class Clip(Operation):
"""
Operation to force data within a certain range, by default 0 & 1
Operation to force data to be within a certain range, by default 0 & 1
"""

_override_interface = "Serial"
Expand Down Expand Up @@ -166,13 +184,11 @@ def __init__(

self.record_initialisation()

self._force_min = MaskValue(min_value, "<", min_value) if min_value is not None else None
self._force_max = MaskValue(max_value, ">", max_value) if max_value is not None else None
self._min_value = min_value
self._max_value = max_value

def apply_func(self, sample):
for func in (func for func in [self._force_min, self._force_max] if func is not None):
sample = func.apply_func(sample)
return sample
return sample.clip(min=self._min_value, max=self._max_value)


class Derive(Operation):
Expand Down Expand Up @@ -204,7 +220,7 @@ def __init__(
**derivations (Union[str, tuple[str, dict[str, Any]]]):
Kwarg form of `derivation`.
"""
super().__init__(split_tuples=True, recursively_split_tuples=True, recognised_types=(xr.DataArray, xr.Dataset))
super().__init__(split_tuples=True, recursively_split_tuples=True, recognised_types=(xr.Dataset,))
self.record_initialisation()

derivation = derivation or {}
Expand Down
Loading