diff --git a/climanet/st_encoder_decoder.py b/climanet/st_encoder_decoder.py index 7df74ad..7603d93 100644 --- a/climanet/st_encoder_decoder.py +++ b/climanet/st_encoder_decoder.py @@ -549,6 +549,22 @@ def __init__( num_months=num_months, ) self.patch_size = patch_size + + # Store config for easy model replication + self.config = { + 'in_chans': in_chans, + 'embed_dim': embed_dim, + 'patch_size': patch_size, + 'max_days': max_days, + 'max_months': max_months, + 'num_months': num_months, + 'hidden': hidden, + 'overlap': overlap, + 'max_H': max_H, + 'max_W': max_W, + 'spatial_depth': spatial_depth, + 'spatial_heads': spatial_heads, + } def forward(self, daily_data, daily_mask, land_mask_patch, padded_days_mask=None): """Forward pass of the Spatio-Temporal model. diff --git a/notebooks/example.ipynb b/notebooks/example.ipynb index c6d2e0a..733fdc5 100644 --- a/notebooks/example.ipynb +++ b/notebooks/example.ipynb @@ -8,6 +8,7 @@ "outputs": [], "source": [ "from pathlib import Path\n", + "import dask\n", "import xarray as xr\n", "import torch\n", "import torch.nn.functional\n", @@ -27,22 +28,1746 @@ }, { "cell_type": "code", - "execution_count": 2, - "id": "13a3b0c8-1d92-460d-84a4-a3a59ca081af", + "execution_count": null, + "id": "5b1b1129", "metadata": {}, "outputs": [], "source": [ + "# # Normal reading: load full dataset in memory\n", + "\n", + "# # Data folder\n", + "# data_folder = Path(\"../../data/output/\")\n", + "\n", + "# # Training patch size\n", + "# patch_size_training = 80\n", + "\n", + "# # Get all files\n", + "# daily_files = sorted(data_folder.rglob(\"20*_day_ERA5_masked_ts.nc\"))\n", + "# monthly_files = sorted(data_folder.rglob(\"20*_mon_ERA5_full_ts.nc\"))\n", + "\n", + "# daily_data = xr.open_mfdataset(daily_files)\n", + "# monthly_data = xr.open_mfdataset(monthly_files)\n", + " \n", + "# lsm_mask = xr.open_dataset(data_folder / \"era5_lsm_bool.nc\" ) # downloaded from ERA5 and regridded\n", + "# lsm_mask = lsm_mask.rename({\"latitude\": \"lat\", \"longitude\": \"lon\"})[[\"lsm\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13a3b0c8-1d92-460d-84a4-a3a59ca081af", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:20: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " daily_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lat\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n", + "/tmp/ipykernel_31814/1980159491.py:31: UserWarning: The specified chunks separate the stored chunks along dimension \"lon\" starting at index 40. This could degrade performance. Instead, consider rechunking after loading.\n", + " monthly_data = xr.open_mfdataset(\n" + ] + } + ], + "source": [ + "# For debug, load part of the two year dataset\n", + "\n", + "# Data folder\n", "data_folder = Path(\"../../data/output/\")\n", "\n", - "file_names = [data_folder / \"202001_day_ERA5_masked_ts.nc\", data_folder / \"202002_day_ERA5_masked_ts.nc\"]\n", - "daily_data = xr.open_mfdataset(file_names)\n", + "# (Only for local debug) Subset the data while loading\n", + "# Define ROI once so subsetting happens during file open (not after)\n", + "lon_subset = slice(-10, 10)\n", + "lat_subset = slice(-5, 5)\n", + "\n", + "# Training patch size\n", + "patch_size_training = 20\n", "\n", - "file_names = [data_folder / \"202001_mon_ERA5_full_ts.nc\", data_folder / \"202002_mon_ERA5_full_ts.nc\"]\n", - "monthly_data = xr.open_mfdataset(file_names)\n", + "# Keep only required variable + spatial subset while reading each file\n", + "def _preprocess_roi(ds):\n", + " return ds[[\"ts\"]].sel(lon=lon_subset, lat=lat_subset)\n", "\n", - "file_name = data_folder / \"era5_lsm_bool.nc\" # downloded from era5 and regridded using the function `regrid_to_boundary_centered_grid`\n", + "daily_files = sorted(data_folder.rglob(\"20*_day_ERA5_masked_ts.nc\"))\n", + "monthly_files = sorted(data_folder.rglob(\"20*_mon_ERA5_full_ts.nc\"))\n", + "\n", + "# Use smaller spatial chunks to reduce peak memory per task\n", + "daily_data = xr.open_mfdataset(\n", + " daily_files,\n", + " combine=\"by_coords\",\n", + " preprocess=_preprocess_roi,\n", + " chunks={\"time\": 1, \"lat\": patch_size_training*2, \"lon\": patch_size_training*2},\n", + " data_vars=\"minimal\",\n", + " coords=\"minimal\",\n", + " compat=\"override\",\n", + " parallel=False,\n", + ")\n", + "\n", + "monthly_data = xr.open_mfdataset(\n", + " monthly_files,\n", + " combine=\"by_coords\",\n", + " preprocess=_preprocess_roi,\n", + " chunks={\"time\": 1, \"lat\": patch_size_training*2, \"lon\": patch_size_training*2},\n", + " data_vars=\"minimal\",\n", + " coords=\"minimal\",\n", + " compat=\"override\",\n", + " parallel=False,\n", + ")\n", + "\n", + "file_name = data_folder / \"era5_lsm_bool.nc\" # downloaded from ERA5 and regridded\n", "lsm_mask = xr.open_dataset(file_name)\n", - "lsm_mask = lsm_mask.rename({'latitude': 'lat', 'longitude': 'lon'})" + "lsm_mask = lsm_mask.rename({\"latitude\": \"lat\", \"longitude\": \"lon\"})[[\"lsm\"]].sel(lon=lon_subset, lat=lat_subset)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "7d13e24a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
<xarray.Dataset> Size: 9MB\n", + "Dimensions: (time: 731, lat: 40, lon: 80)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 6kB 2020-01-01T11:30:00 ... 2021-12-31T11:...\n", + " * lat (lat) float32 160B -4.875 -4.625 -4.375 ... 4.375 4.625 4.875\n", + " * lon (lon) float32 320B -9.875 -9.625 -9.375 ... 9.375 9.625 9.875\n", + "Data variables:\n", + " ts (time, lat, lon) float32 9MB dask.array<chunksize=(1, 20, 40), meta=np.ndarray>\n", + "Attributes:\n", + " CDI: Climate Data Interface version 2.2.4 (https://mpimet.mpg.de...\n", + " Conventions: CF-1.6\n", + " history: Tue Feb 03 08:53:20 2026: cdo daymean /work/bd0854/b380103/...\n", + " frequency: day\n", + " CDO: Climate Data Operators version 2.2.2 (https://mpimet.mpg.de...
<xarray.Dataset> Size: 308kB\n", + "Dimensions: (time: 24, lat: 40, lon: 80)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 192B 2020-01-16T11:30:00 ... 2021-12-16T11...\n", + " * lat (lat) float32 160B -4.875 -4.625 -4.375 ... 4.375 4.625 4.875\n", + " * lon (lon) float32 320B -9.875 -9.625 -9.375 ... 9.375 9.625 9.875\n", + "Data variables:\n", + " ts (time, lat, lon) float32 307kB dask.array<chunksize=(1, 20, 40), meta=np.ndarray>\n", + "Attributes:\n", + " CDI: Climate Data Interface version 2.2.4 (https://mpimet.mpg.de...\n", + " Conventions: CF-1.6\n", + " history: Tue Feb 03 08:53:10 2026: cdo monmean /work/bd0854/b380103/...\n", + " frequency: mon\n", + " CDO: Climate Data Operators version 2.2.2 (https://mpimet.mpg.de...