From 0dcda76de6d46523adba0df48403463c551274f2 Mon Sep 17 00:00:00 2001 From: Ian Tse Date: Thu, 30 Apr 2026 16:06:18 -0700 Subject: [PATCH 01/10] Specify date string format to suppress warning --- solarfarmer/models/energy_calculation_results.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solarfarmer/models/energy_calculation_results.py b/solarfarmer/models/energy_calculation_results.py index c9192a3..40a8199 100644 --- a/solarfarmer/models/energy_calculation_results.py +++ b/solarfarmer/models/energy_calculation_results.py @@ -1776,7 +1776,7 @@ def _handle_pvsyst_results( if _PANDAS: with io.StringIO(pvsyst_results_text) as g: data = pd.read_csv(g, sep=";", skiprows=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12]) - data["date"] = pd.to_datetime(data["date"], utc=True).dt.tz_localize(None) + data["date"] = pd.to_datetime(data["date"], format="%d/%m/%y %H:%M", utc=True).dt.tz_localize(None) data.set_index("date", inplace=True) data.index = data.index.sort_values() return data From c0f13d4485e75296df47964706c5b9cc3f9171aa Mon Sep 17 00:00:00 2001 From: Ian Tse Date: Thu, 30 Apr 2026 16:07:08 -0700 Subject: [PATCH 02/10] New tests for handlers of timeseries in modelchain results --- tests/test_handle_timeseries_parsing.py | 139 ++++++++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 tests/test_handle_timeseries_parsing.py diff --git a/tests/test_handle_timeseries_parsing.py b/tests/test_handle_timeseries_parsing.py new file mode 100644 index 0000000..7ede92a --- /dev/null +++ b/tests/test_handle_timeseries_parsing.py @@ -0,0 +1,139 @@ +""" +Unit tests for _handle_pvsyst_results, _handle_losstree_results, +and _handle_timeseries_results. + +Tests the parsing/transformation layer using synthetic data that +matches the formats produced by SF-Core. +""" + +import warnings + +import pandas as pd +import pytest + +from solarfarmer.models.energy_calculation_results import ( + _handle_losstree_results, + _handle_pvsyst_results, + _handle_timeseries_results, +) +from solarfarmer.models.model_chain_response import ModelChainResponse + + +# --- Synthetic data matching SF-Core output formats --- + +# PVsyst CSV: 10 header rows skipped (indices 0-9), column row, units row +# (index 11 skipped), blank row (index 12 skipped), then data. +# Separator: semicolon. Date format: dd/MM/yy HH:mm +PVSYST_CSV = ( + "SF-Core 0.4.443\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "Simulation date;;30/04/26 14h00\n" + "\n" + "\n" + "\n" + "date;GlobHor;T_Amb;WindVel\n" + "-;kWh/m2;deg. C;m/s\n" + "\n" + "01/06/24 06:00;0.15;18.3;2.1\n" + "15/06/24 12:00;0.85;25.1;3.0\n" + "01/01/24 00:00;0.0;-2.5;5.4\n" + "31/12/24 23:00;0.0;5.2;4.1\n" +) + +# Loss tree TSV: 2 header rows (descriptive + blank), then column headers, +# then data. Separator: tab. Date format: yyyy-MM-ddTHH:mm:sszzz (ISO 8601) +LOSSTREE_TSV = ( + "Loss tree results for each time-step. Each value is an energy in kWh" + " for a certain step in the calculation\n" + "\n" + "Start of period\tGHI\tGlobalEffective\tNetEnergy\n" + "2024-06-01T06:00:00+00:00\t0.150\t0.120\t0.100\n" + "2024-06-01T07:00:00+00:00\t0.450\t0.400\t0.380\n" + "2024-06-01T08:00:00+00:00\t0.720\t0.680\t0.650\n" +) + +# Detailed TSV: single header row then data rows. Separator: tab. +# First columns are StartOfPeriod and PeriodInMinutes. +DETAILED_TSV = ( + "StartOfPeriod\tPeriodInMinutes\tGHI\tDHI\tTAmb\n" + "2024-06-01T06:00:00+00:00\t60\t150\t80\t18.3\n" + "2024-06-01T07:00:00+00:00\t60\t450\t200\t19.1\n" + "2024-06-01T08:00:00+00:00\t60\t720\t300\t20.5\n" +) + + +class TestHandlePvsystResults: + """Tests for _handle_pvsyst_results with PVsyst-format CSV data.""" + + def test_parses_pvsyst_csv_to_dataframe(self, tmp_path): + """Parsing a valid PVsyst CSV returns a DataFrame with expected shape and columns.""" + response = ModelChainResponse(PvSystFormatResultsFile=PVSYST_CSV) + df = _handle_pvsyst_results(response, tmp_path, save_outputs=False) + + assert isinstance(df, pd.DataFrame) + assert len(df) == 4 + assert list(df.columns) == ["GlobHor", "T_Amb", "WindVel"] + + def test_dates_parsed_day_first(self, tmp_path): + """Date column uses dd/MM/yy format; 01/06 must be June 1st, not Jan 6th.""" + response = ModelChainResponse(PvSystFormatResultsFile=PVSYST_CSV) + df = _handle_pvsyst_results(response, tmp_path, save_outputs=False) + + # 01/06/24 = June 1st (day > 12 in 31/12/24 proves day-first) + assert df.index[0] == pd.Timestamp("2024-01-01 00:00:00") + assert df.index[-1] == pd.Timestamp("2024-12-31 23:00:00") + # Index is sorted + assert df.index.is_monotonic_increasing + + def test_no_warning_emitted(self, tmp_path): + """Parsing must not emit UserWarning about dateutil fallback (SM-326).""" + response = ModelChainResponse(PvSystFormatResultsFile=PVSYST_CSV) + with warnings.catch_warnings(): + warnings.simplefilter("error") + _handle_pvsyst_results(response, tmp_path, save_outputs=False) + + def test_saves_file_when_requested(self, tmp_path): + """With save_outputs=True, the raw CSV is written to disk.""" + response = ModelChainResponse(PvSystFormatResultsFile=PVSYST_CSV) + _handle_pvsyst_results(response, tmp_path, save_outputs=True) + + saved = list(tmp_path.glob("*")) + assert len(saved) == 1 + + +class TestHandleLosstreeResults: + """Tests for _handle_losstree_results with loss-tree TSV data.""" + + def test_parses_losstree_tsv_to_dataframe(self, tmp_path): + """Parsing a valid loss-tree TSV returns a DataFrame with expected shape.""" + response = ModelChainResponse(LossTreeResults=LOSSTREE_TSV) + df = _handle_losstree_results(response, tmp_path, save_outputs=False) + + assert isinstance(df, pd.DataFrame) + assert len(df) == 3 + assert list(df.columns) == ["GHI", "GlobalEffective", "NetEnergy"] + assert df.index.name == "Start of period" + + def test_iso_dates_parsed_correctly(self, tmp_path): + """ISO 8601 timestamps are parsed to correct datetime values.""" + response = ModelChainResponse(LossTreeResults=LOSSTREE_TSV) + df = _handle_losstree_results(response, tmp_path, save_outputs=False) + + assert df.index[0] == pd.Timestamp("2024-06-01 06:00:00", tz="UTC") + + +class TestHandleTimeseriesResults: + """Tests for _handle_timeseries_results (detailed format) with TSV data.""" + + def test_parses_detailed_tsv_to_dataframe(self, tmp_path): + """Parsing a valid detailed TSV returns a DataFrame with expected columns.""" + response = ModelChainResponse(ResultsFile=DETAILED_TSV) + df = _handle_timeseries_results(response, tmp_path, save_outputs=False) + + assert isinstance(df, pd.DataFrame) + assert len(df) == 3 + assert list(df.columns) == ["StartOfPeriod", "PeriodInMinutes", "GHI", "DHI", "TAmb"] From 2bb92fd2bcde6ad07f7c08aa3e22bdaf77442362 Mon Sep 17 00:00:00 2001 From: Ian Tse Date: Thu, 30 Apr 2026 21:51:09 -0700 Subject: [PATCH 03/10] Fix datetimeindex sorting Co-authored-by: Copilot --- solarfarmer/models/energy_calculation_results.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/solarfarmer/models/energy_calculation_results.py b/solarfarmer/models/energy_calculation_results.py index 40a8199..7640016 100644 --- a/solarfarmer/models/energy_calculation_results.py +++ b/solarfarmer/models/energy_calculation_results.py @@ -1723,7 +1723,7 @@ def _handle_losstree_results( data = pd.read_csv(g, sep="\t", skiprows=[0, 1]) data["Start of period"] = pd.to_datetime(data["Start of period"]) data.set_index("Start of period", inplace=True) - data.index = data.index.sort_values() + data.sort_index(inplace=True) return data else: warnings.warn( @@ -1778,7 +1778,7 @@ def _handle_pvsyst_results( data = pd.read_csv(g, sep=";", skiprows=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12]) data["date"] = pd.to_datetime(data["date"], format="%d/%m/%y %H:%M", utc=True).dt.tz_localize(None) data.set_index("date", inplace=True) - data.index = data.index.sort_values() + data.sort_index(inplace=True) return data else: warnings.warn( From 9dcab1a18d6af993f9b46660ab329883564de7cf Mon Sep 17 00:00:00 2001 From: Ian Tse Date: Thu, 30 Apr 2026 21:51:43 -0700 Subject: [PATCH 04/10] Update tests --- tests/test_handle_timeseries_parsing.py | 30 ++++++++++++++++++++----- 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/tests/test_handle_timeseries_parsing.py b/tests/test_handle_timeseries_parsing.py index 7ede92a..8a050e3 100644 --- a/tests/test_handle_timeseries_parsing.py +++ b/tests/test_handle_timeseries_parsing.py @@ -9,7 +9,6 @@ import warnings import pandas as pd -import pytest from solarfarmer.models.energy_calculation_results import ( _handle_losstree_results, @@ -83,11 +82,20 @@ def test_dates_parsed_day_first(self, tmp_path): response = ModelChainResponse(PvSystFormatResultsFile=PVSYST_CSV) df = _handle_pvsyst_results(response, tmp_path, save_outputs=False) - # 01/06/24 = June 1st (day > 12 in 31/12/24 proves day-first) - assert df.index[0] == pd.Timestamp("2024-01-01 00:00:00") - assert df.index[-1] == pd.Timestamp("2024-12-31 23:00:00") - # Index is sorted - assert df.index.is_monotonic_increasing + # 01/06/24 must parse as June 1st, not January 6th + assert pd.Timestamp("2024-06-01 06:00:00") in df.index + # 31/12/24 would blow up with month=31 if parsed month-first + assert pd.Timestamp("2024-12-31 23:00:00") in df.index + + def test_sort_preserves_data_alignment(self, tmp_path): + """Sorting the index must keep data rows aligned with their timestamps.""" + response = ModelChainResponse(PvSystFormatResultsFile=PVSYST_CSV) + df = _handle_pvsyst_results(response, tmp_path, save_outputs=False) + + # Input row "01/01/24 00:00" has GlobHor=0.0 and T_Amb=-2.5 + jan_row = df.loc[pd.Timestamp("2024-01-01 00:00:00")] + assert jan_row["GlobHor"] == 0.0 + assert jan_row["T_Amb"] == -2.5 def test_no_warning_emitted(self, tmp_path): """Parsing must not emit UserWarning about dateutil fallback (SM-326).""" @@ -103,6 +111,7 @@ def test_saves_file_when_requested(self, tmp_path): saved = list(tmp_path.glob("*")) assert len(saved) == 1 + assert saved[0].name == "PVsystResults.csv" class TestHandleLosstreeResults: @@ -125,6 +134,15 @@ def test_iso_dates_parsed_correctly(self, tmp_path): assert df.index[0] == pd.Timestamp("2024-06-01 06:00:00", tz="UTC") + def test_sort_preserves_data_alignment(self, tmp_path): + """Sorting the index must keep data rows aligned with their timestamps.""" + response = ModelChainResponse(LossTreeResults=LOSSTREE_TSV) + df = _handle_losstree_results(response, tmp_path, save_outputs=False) + + # First row in sorted output should have GHI=0.150 + assert df.iloc[0]["GHI"] == 0.150 + assert df.index[0] == pd.Timestamp("2024-06-01 06:00:00", tz="UTC") + class TestHandleTimeseriesResults: """Tests for _handle_timeseries_results (detailed format) with TSV data.""" From 830dc0838ed93cfce5d7650044e56468074d8466 Mon Sep 17 00:00:00 2001 From: Ian Tse Date: Thu, 30 Apr 2026 22:02:26 -0700 Subject: [PATCH 05/10] Add pandas to test dependency group Co-authored-by: Copilot --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 7c07bb7..b980832 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,6 +56,7 @@ docs = [ test = [ "pytest>=7.0", + "pandas>=2.0", ] dev = [ From 02b8fb3ccee7008cfebbec621685a4830ab5286e Mon Sep 17 00:00:00 2001 From: Ian Tse Date: Thu, 30 Apr 2026 22:05:17 -0700 Subject: [PATCH 06/10] Linting --- tests/test_handle_timeseries_parsing.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/tests/test_handle_timeseries_parsing.py b/tests/test_handle_timeseries_parsing.py index 8a050e3..ee8970c 100644 --- a/tests/test_handle_timeseries_parsing.py +++ b/tests/test_handle_timeseries_parsing.py @@ -1,11 +1,3 @@ -""" -Unit tests for _handle_pvsyst_results, _handle_losstree_results, -and _handle_timeseries_results. - -Tests the parsing/transformation layer using synthetic data that -matches the formats produced by SF-Core. -""" - import warnings import pandas as pd @@ -17,7 +9,6 @@ ) from solarfarmer.models.model_chain_response import ModelChainResponse - # --- Synthetic data matching SF-Core output formats --- # PVsyst CSV: 10 header rows skipped (indices 0-9), column row, units row From 623998ddd6a7d06aaaf8f766ce25561b5aeeb41f Mon Sep 17 00:00:00 2001 From: Ian Tse Date: Thu, 30 Apr 2026 22:07:27 -0700 Subject: [PATCH 07/10] Linting --- solarfarmer/models/energy_calculation_results.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/solarfarmer/models/energy_calculation_results.py b/solarfarmer/models/energy_calculation_results.py index 7640016..632b2d7 100644 --- a/solarfarmer/models/energy_calculation_results.py +++ b/solarfarmer/models/energy_calculation_results.py @@ -1776,7 +1776,9 @@ def _handle_pvsyst_results( if _PANDAS: with io.StringIO(pvsyst_results_text) as g: data = pd.read_csv(g, sep=";", skiprows=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12]) - data["date"] = pd.to_datetime(data["date"], format="%d/%m/%y %H:%M", utc=True).dt.tz_localize(None) + data["date"] = pd.to_datetime( + data["date"], format="%d/%m/%y %H:%M", utc=True + ).dt.tz_localize(None) data.set_index("date", inplace=True) data.sort_index(inplace=True) return data From cedca61790067f1aa1aa91d9157b95f245deb1d9 Mon Sep 17 00:00:00 2001 From: Ian Tse Date: Mon, 4 May 2026 10:55:04 -0700 Subject: [PATCH 08/10] Replace all uses of inplace=True Co-authored-by: Copilot --- solarfarmer/models/energy_calculation_results.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/solarfarmer/models/energy_calculation_results.py b/solarfarmer/models/energy_calculation_results.py index 632b2d7..048f44c 100644 --- a/solarfarmer/models/energy_calculation_results.py +++ b/solarfarmer/models/energy_calculation_results.py @@ -1721,9 +1721,8 @@ def _handle_losstree_results( if _PANDAS: with io.StringIO(loss_tree_results_text) as g: data = pd.read_csv(g, sep="\t", skiprows=[0, 1]) - data["Start of period"] = pd.to_datetime(data["Start of period"]) - data.set_index("Start of period", inplace=True) - data.sort_index(inplace=True) + data["Start of period"] = pd.to_datetime(data["Start of period"]) + data = data.set_index("Start of period").sort_index() return data else: warnings.warn( @@ -1779,8 +1778,7 @@ def _handle_pvsyst_results( data["date"] = pd.to_datetime( data["date"], format="%d/%m/%y %H:%M", utc=True ).dt.tz_localize(None) - data.set_index("date", inplace=True) - data.sort_index(inplace=True) + data = data.set_index("date").sort_index() return data else: warnings.warn( From 7ab8f6327f40fecec98b6fc8780cce0dbc9ea47b Mon Sep 17 00:00:00 2001 From: Ian Tse Date: Mon, 4 May 2026 11:55:47 -0700 Subject: [PATCH 09/10] Parse PVsyst timestamps as site-local time, not UTC Co-authored-by: Copilot --- solarfarmer/models/energy_calculation_results.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/solarfarmer/models/energy_calculation_results.py b/solarfarmer/models/energy_calculation_results.py index 048f44c..30b9915 100644 --- a/solarfarmer/models/energy_calculation_results.py +++ b/solarfarmer/models/energy_calculation_results.py @@ -1775,9 +1775,8 @@ def _handle_pvsyst_results( if _PANDAS: with io.StringIO(pvsyst_results_text) as g: data = pd.read_csv(g, sep=";", skiprows=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12]) - data["date"] = pd.to_datetime( - data["date"], format="%d/%m/%y %H:%M", utc=True - ).dt.tz_localize(None) + # Timestamps are site-local time as returned by the SF API. + data["date"] = pd.to_datetime(data["date"], format="%d/%m/%y %H:%M") data = data.set_index("date").sort_index() return data else: From 145ca27a5db83da396f9880146cde7fc50a49197 Mon Sep 17 00:00:00 2001 From: Ian Tse Date: Mon, 4 May 2026 11:57:26 -0700 Subject: [PATCH 10/10] Add test to assert PVsyst index is tz-naive (site-local time) --- tests/test_handle_timeseries_parsing.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/test_handle_timeseries_parsing.py b/tests/test_handle_timeseries_parsing.py index ee8970c..cae44fd 100644 --- a/tests/test_handle_timeseries_parsing.py +++ b/tests/test_handle_timeseries_parsing.py @@ -88,6 +88,13 @@ def test_sort_preserves_data_alignment(self, tmp_path): assert jan_row["GlobHor"] == 0.0 assert jan_row["T_Amb"] == -2.5 + def test_index_is_timezone_naive(self, tmp_path): + """Index must be tz-naive: the SF API returns site-local time.""" + response = ModelChainResponse(PvSystFormatResultsFile=PVSYST_CSV) + df = _handle_pvsyst_results(response, tmp_path, save_outputs=False) + + assert df.index.tz is None + def test_no_warning_emitted(self, tmp_path): """Parsing must not emit UserWarning about dateutil fallback (SM-326).""" response = ModelChainResponse(PvSystFormatResultsFile=PVSYST_CSV)