Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
## 0.1.0-rc.1 (2026-03-30)

* feat: compressor data ([1685d7d](https://github.com/sequential-parameter-optimization/spotdesirability/commit/1685d7d))
* ci: install spotdesirability jupyter kernel using --sys-prefix instead of --user ([d5139de](https://github.com/sequential-parameter-optimization/spotdesirability/commit/d5139de))
* ci: remove explicit jupyter kernel dependencies to fix CI kernel resolution ([18402c5](https://github.com/sequential-parameter-optimization/spotdesirability/commit/18402c5))
* Update uv.lock ([815d778](https://github.com/sequential-parameter-optimization/spotdesirability/commit/815d778))
## <small>0.0.27 (2026-02-28)</small>

* Merge branch 'main' into develop ([1b23893](https://github.com/sequential-parameter-optimization/spotdesirability/commit/1b23893))
Expand Down
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
recursive-include src/spotdesirability/datasets *.csv
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "uv_build"

[project]
name = "spotdesirability"
version = "0.0.27"
version = "0.1.0rc1"
authors = [
{ name="T. Bartz-Beielstein", email="tbb@bartzundbartz.de" }
]
Expand Down
43 changes: 42 additions & 1 deletion src/spotdesirability/data_utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,51 @@
import os
import importlib.resources as pkg_resources
from typing import Tuple

import pandas as pd

def get_data_folder_path():

def get_data_folder_path() -> str:
"""Returns the absolute path to the data folder located in the package."""
# Assume the 'data' directory is within the same package directory
current_file_path = os.path.abspath(__file__)
package_dir = os.path.dirname(current_file_path)
data_folder_path = os.path.join(package_dir, "data")
return data_folder_path


def load_compressor_data() -> Tuple[pd.DataFrame, pd.DataFrame]:
"""Loads the normalized compressor datasets X and Z.

This function reads df_x_normalized.csv and df_z_normalized.csv from the
spotdesirability.datasets module and returns them as pandas DataFrames.
The datasets represent the values of $X$ and $Z$ respectively.

Returns:
Tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the datasets X and Z
as pandas DataFrames respectively.

Raises:
FileNotFoundError: If the datasets cannot be found.
RuntimeError: If there is an issue reading the CSV data.

Example:
```python
import spotdesirability.data_utils as du

df_x, df_z = du.load_compressor_data()
```
"""
try:
x_path = pkg_resources.files("spotdesirability.datasets") / "df_x_normalized.csv"
z_path = pkg_resources.files("spotdesirability.datasets") / "df_z_normalized.csv"

with pkg_resources.as_file(x_path) as p_x, pkg_resources.as_file(z_path) as p_z:
df_x = pd.read_csv(p_x)
df_z = pd.read_csv(p_z)

return df_x, df_z
except FileNotFoundError as fnf_err:
raise fnf_err
except Exception as err:
raise RuntimeError(f"Error loading compressor datasets: {err}") from err
214 changes: 214 additions & 0 deletions src/spotdesirability/datasets/df_x_normalized.csv

Large diffs are not rendered by default.

214 changes: 214 additions & 0 deletions src/spotdesirability/datasets/df_z_normalized.csv

Large diffs are not rendered by default.

16 changes: 16 additions & 0 deletions tests/test_data_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import pandas as pd
import spotdesirability.data_utils as du

def test_load_compressor_data():
"""Test if load_compressor_data returns two pandas DataFrames with expected data."""
df_x, df_z = du.load_compressor_data()

assert isinstance(df_x, pd.DataFrame), "Expected df_x to be a pandas DataFrame."
assert isinstance(df_z, pd.DataFrame), "Expected df_z to be a pandas DataFrame."

assert not df_x.empty, "df_x DataFrame is empty."
assert not df_z.empty, "df_z DataFrame is empty."

# Check that they have a reasonable number of columns or rows, ensuring successful parse
assert len(df_x.columns) > 0, "df_x has no columns."
assert len(df_z.columns) > 0, "df_z has no columns."
Loading