diff --git a/src/osekit/core/audio_dataset.py b/src/osekit/core/audio_dataset.py index 388d173b..32fe8c72 100644 --- a/src/osekit/core/audio_dataset.py +++ b/src/osekit/core/audio_dataset.py @@ -176,7 +176,7 @@ def _data_from_dict(cls, dictionary: dict) -> list[AudioData]: def from_folder( # noqa: PLR0913 cls, folder: Path, - strptime_format: str | None, + strptime_format: str | list[str] | None, begin: Timestamp | None = None, end: Timestamp | None = None, timezone: str | pytz.timezone | None = None, @@ -195,7 +195,7 @@ def from_folder( # noqa: PLR0913 ---------- folder: Path The folder containing the audio files. - strptime_format: str | None + strptime_format: str | list[str] | None The strptime format used in the filenames. It should use valid strftime codes (https://strftime.org/). If ``None``, the first audio file of the folder will start diff --git a/src/osekit/core/audio_file.py b/src/osekit/core/audio_file.py index 619b3361..0c133f93 100644 --- a/src/osekit/core/audio_file.py +++ b/src/osekit/core/audio_file.py @@ -45,7 +45,7 @@ def __init__( If it is not provided, ``strptime_format`` is mandatory. If both ``begin`` and ``strptime_format`` are provided, ``begin`` will overrule the timestamp embedded in the filename. - strptime_format: str | None + strptime_format: str | list[str] | None The strptime format used in the text. It should use valid strftime codes (https://strftime.org/). Example: ``'%y%m%d_%H:%M:%S'``. diff --git a/src/osekit/core/base_dataset.py b/src/osekit/core/base_dataset.py index 33a749d4..3ffeebfb 100644 --- a/src/osekit/core/base_dataset.py +++ b/src/osekit/core/base_dataset.py @@ -511,7 +511,7 @@ def _get_data_from_files_timedelta_file( def from_folder( # noqa: PLR0913 cls: type[Self], folder: Path, - strptime_format: str | None, + strptime_format: str | list[str] | None, begin: Timestamp | None = None, end: Timestamp | None = None, timezone: str | pytz.timezone | None = None, @@ -528,7 +528,7 @@ def from_folder( # noqa: PLR0913 ---------- folder: Path The folder containing the files. - strptime_format: str | None + strptime_format: str | list[str] | None The strptime format used in the filenames. It should use valid strftime codes (https://strftime.org/). If None, the first audio file of the folder will start @@ -619,7 +619,7 @@ def from_folder( # noqa: PLR0913 def _parse_file( cls: type[Self], file: Path, - strptime_format: str, + strptime_format: str | list[str] | None, timezone: str | pytz.timezone | None, begin_timestamp: Timestamp, valid_files: list[TFile], diff --git a/src/osekit/core/base_file.py b/src/osekit/core/base_file.py index 08f32402..f3df8fc4 100644 --- a/src/osekit/core/base_file.py +++ b/src/osekit/core/base_file.py @@ -62,7 +62,7 @@ def __init__( ``begin`` will overrule the timestamp embedded in the filename. end: pandas.Timestamp | None (Optional) timestamp after the last data point in the file. - strptime_format: str | None + strptime_format: str | list[str] | None The strptime format used in the text. It should use valid strftime codes (https://strftime.org/). Example: ``'%y%m%d_%H:%M:%S'``. diff --git a/src/osekit/core/spectro_dataset.py b/src/osekit/core/spectro_dataset.py index e962e6d3..71415bb5 100644 --- a/src/osekit/core/spectro_dataset.py +++ b/src/osekit/core/spectro_dataset.py @@ -527,7 +527,7 @@ def from_dict(cls, dictionary: dict) -> SpectroDataset: def from_folder( # noqa: PLR0913 cls, folder: Path, - strptime_format: str, + strptime_format: str | list[str] | None, begin: Timestamp | None = None, end: Timestamp | None = None, timezone: str | pytz.timezone | None = None, @@ -543,7 +543,7 @@ def from_folder( # noqa: PLR0913 ---------- folder: Path The folder containing the spectro files. - strptime_format: str + strptime_format: str | list[str] | None The strptime format of the timestamps in the spectro file names. begin: Timestamp | None The begin of the spectro dataset. diff --git a/src/osekit/core/spectro_file.py b/src/osekit/core/spectro_file.py index 141de35f..041e6db8 100644 --- a/src/osekit/core/spectro_file.py +++ b/src/osekit/core/spectro_file.py @@ -51,7 +51,7 @@ def __init__( If it is not provided, ``strptime_format`` is mandatory. If both ``begin`` and ``strptime_format`` are provided, ``begin`` will overrule the timestamp embedded in the filename. - strptime_format: str | None + strptime_format: str | list[str] | None The strptime format used in the text. It should use valid strftime codes (https://strftime.org/). Example: ``'%y%m%d_%H:%M:%S'``. diff --git a/src/osekit/public/project.py b/src/osekit/public/project.py index 23288dc2..a9235ee4 100644 --- a/src/osekit/public/project.py +++ b/src/osekit/public/project.py @@ -53,7 +53,7 @@ class Project: def __init__( # noqa: PLR0913 self, folder: Path, - strptime_format: str | None, + strptime_format: str | list[str] | None, gps_coordinates: str | list | tuple = (0, 0), depth: float = 0.0, timezone: str | None = None, @@ -68,7 +68,7 @@ def __init__( # noqa: PLR0913 ---------- folder: Path Path to the folder containing the original audio files. - strptime_format: str | None + strptime_format: str | list[str] | None The strptime format used in the filenames. It should use valid strftime codes (https://strftime.org/). If ``None``, the first audio file of the folder will start diff --git a/tests/test_audio.py b/tests/test_audio.py index 3f9c6122..c15422b9 100644 --- a/tests/test_audio.py +++ b/tests/test_audio.py @@ -965,6 +965,23 @@ def test_normalize_audio_data( generate_sample_audio(1, 48_000), id="one_entire_file", ), + pytest.param( + { + "duration": 1, + "sample_rate": 48_000, + "nb_files": 1, + "date_begin": pd.Timestamp("2024-01-01 12:00:00"), + "series_type": "increase", + }, + None, + None, + "timedelta_total", + ["%y%H%M__%s", TIMESTAMP_FORMAT_EXPORTED_FILES_UNLOCALIZED, "%Y%H%m__%f"], + None, + None, + generate_sample_audio(1, 48_000), + id="first_valid_strptime_format_is_used", + ), pytest.param( { "duration": 1, @@ -1149,7 +1166,7 @@ def test_audio_dataset_from_folder( begin: pd.Timestamp | None, end: pd.Timestamp | None, mode: Literal["files", "timedelta_total", "timedelta_file"], - strptime_format: str, + strptime_format: str | list[str], first_file_begin: Timestamp | None, duration: pd.Timedelta | None, expected_audio_data: list[np.ndarray], diff --git a/tests/test_core_api_base.py b/tests/test_core_api_base.py index 8a3ba3c4..c437afc9 100644 --- a/tests/test_core_api_base.py +++ b/tests/test_core_api_base.py @@ -456,6 +456,28 @@ def test_base_dataset_from_files_overlap_errors(overlap: float, mode: str) -> No ], id="one_file_default", ), + pytest.param( + ["%Y%m%d%H%M%S%f", "%y%m%d%H%M%S", "%y%m%d%H%M%S%z"], + None, + None, + None, + "files", + 0.0, + None, + None, + None, + [Path(r"231201000000")], + [ + ( + Event( + begin=Timestamp("2023-12-01 00:00:00"), + end=Timestamp("2023-12-01 00:00:01"), + ), + [Path(r"231201000000")], + ), + ], + id="first_matching_strptime_format_is_used", + ), pytest.param( None, None, @@ -735,7 +757,7 @@ def test_base_dataset_from_files_overlap_errors(overlap: float, mode: str) -> No ) def test_base_dataset_from_folder( monkeypatch: pytest.monkeypatch, - strptime_format: str | None, + strptime_format: str | list[str] | None, begin: Timestamp | None, end: Timestamp | None, timezone: str | None, diff --git a/tests/test_files.py b/tests/test_files.py index d3bbe0eb..6f78810f 100644 --- a/tests/test_files.py +++ b/tests/test_files.py @@ -73,11 +73,18 @@ Timestamp("2004-05-03 19:12:12", tz="UTC+01:00"), id="aware_strptime_converted_to_provided_utc_offset", ), + pytest.param( + "040503121212_-0600.foo", + ["%y%m%d%H%M%S_%f", "%y%m%d%H%M%S_%z", "%y%m%d%H%M%S"], + "+0100", + Timestamp("2004-05-03 19:12:12", tz="UTC+01:00"), + id="first_matching_format_is_used", + ), ], ) def test_file_localization( file_name: str, - strptime_format: str, + strptime_format: str | list[str], timezone: str | pytz.timezone | None, expected_begin: Timestamp, ) -> None: @@ -163,7 +170,7 @@ def test_file_localization( def test_dataset_localization( tmp_path: Path, file_names: list[str], - strptime_format: str, + strptime_format: str | list[str], timezone: str | pytz.timezone | None, expected_begins: list[Timestamp], ) -> None: