diff --git a/Changelog.rst b/Changelog.rst index ba2327e61e..74b2b043c7 100644 --- a/Changelog.rst +++ b/Changelog.rst @@ -3,6 +3,10 @@ Version NEXTVERSION **2026-??-??** +* New keyword parameter to `cf.Data.compute`: ``persist`` + (https://github.com/NCAS-CMS/cf-python/issues/929) +* New function to control the persistence of computed data: + `cf.persist_data` (https://github.com/NCAS-CMS/cf-python/issues/929) * New default backend for netCDF-4 in `cf.write`: ``h5netcdf-h5py``, that allows control of the internal file metadata via the new ``h5py_options`` parameter diff --git a/cf/functions.py b/cf/functions.py index c527a649d6..ac1c7ac5db 100644 --- a/cf/functions.py +++ b/cf/functions.py @@ -157,6 +157,7 @@ def configuration( chunksize=None, log_level=None, display_data=None, + persist_data=None, regrid_logging=None, relaxed_identities=None, bounds_combination_mode=None, @@ -179,6 +180,7 @@ def configuration( * `chunksize` * `log_level` * `display_data` + * `persist_data` * `regrid_logging` * `relaxed_identities` * `bounds_combination_mode` @@ -203,9 +205,10 @@ def configuration( .. seealso:: `atol`, `rtol`, `tempdir`, `chunksize`, `total_memory`, `log_level`, `display_data`, - `regrid_logging`, `relaxed_identities`, - `bounds_combination_mode`, `active_storage`, - `active_storage_url`, `active_storage_max_requests` + `persist_data`, `regrid_logging`, + `relaxed_identities`, `bounds_combination_mode`, + `active_storage`, `active_storage_url`, + `active_storage_max_requests` :Parameters: @@ -247,12 +250,18 @@ def configuration( * ``'DETAIL'`` (``3``); * ``'DEBUG'`` (``-1``). - display_data `bool` or `Constant`, optional + display_data: `bool` or `Constant`, optional The new display data option. The default is to not change the current behaviour. .. versionadded:: 3.19.0 + persist_data: `bool` or `Constant`, optional + The new persist data option. The default is to not change + the current behaviour. + + .. versionadded:: NEXTVERSION + regrid_logging: `bool` or `Constant`, optional The new value (either True to enable logging or False to disable it). The default is to not change the current @@ -312,6 +321,7 @@ def configuration( 'bounds_combination_mode': 'AND', 'chunksize': 82873466.88000001, 'display_data': True, + 'persist_data': False, 'active_storage': False, 'active_storage_url': None, 'active_storage_max_requests': 100} @@ -330,6 +340,7 @@ def configuration( 'bounds_combination_mode': 'AND', 'chunksize': 75000000.0, 'display_data': True, + 'persist_data': False, 'active_storage': False, 'active_storage_url': None, 'active_storage_max_requests': 100} @@ -358,6 +369,7 @@ def configuration( 'bounds_combination_mode': 'AND', 'chunksize': 75000000.0, 'display_data': True, + 'persist_data': False, 'active_storage': False, 'active_storage_url': None} >>> with cf.configuration(atol=9, rtol=10): @@ -372,6 +384,7 @@ def configuration( 'bounds_combination_mode': 'AND', 'chunksize': 75000000.0, 'display_data': True, + 'persist_data': False, 'active_storage': False, 'active_storage_url': None, 'active_storage_max_requests': 100} @@ -385,6 +398,7 @@ def configuration( 'bounds_combination_mode': 'AND', 'chunksize': 75000000.0, 'display_data': True, + 'persist_data': False, 'active_storage': False, 'active_storage_url': None, 'active_storage_max_requests': 100} @@ -416,6 +430,7 @@ def configuration( new_chunksize=chunksize, new_log_level=log_level, new_display_data=display_data, + new_persist_data=persist_data, new_regrid_logging=regrid_logging, new_relaxed_identities=relaxed_identities, bounds_combination_mode=bounds_combination_mode, @@ -460,6 +475,7 @@ def _configuration(_Configuration, **kwargs): "new_chunksize": chunksize, "new_log_level": log_level, "new_display_data": display_data, + "new_persist_data": persist_data, "new_regrid_logging": regrid_logging, "new_relaxed_identities": relaxed_identities, "bounds_combination_mode": bounds_combination_mode, @@ -590,6 +606,10 @@ class display_data(ConstantAccess, cfdm.display_data): pass +class persist_data(ConstantAccess, cfdm.persist_data): + pass + + class regrid_logging(ConstantAccess): """Whether or not to enable `esmpy` regridding logging. diff --git a/cf/test/test_Data.py b/cf/test/test_Data.py index 9fe41f0282..02951b4d65 100644 --- a/cf/test/test_Data.py +++ b/cf/test/test_Data.py @@ -17,6 +17,8 @@ faulthandler.enable() # to debug seg faults and timeouts +import cfdm + import cf n_tmpfiles = 2 @@ -3050,8 +3052,6 @@ def test_Data_where(self): def test_Data__init__compression(self): """Test Data initialised from compressed data sources.""" - import cfdm - # Ragged for f in cfdm.read("DSG_timeSeries_contiguous.nc"): f = f.data @@ -3200,6 +3200,59 @@ def test_Data_compute(self): d.compute() self.assertEqual(d.get_cached_elements(), {0: 1, 1: 2, -1: 2}) + # Persist + f = cf.read(self.filename, dask_chunks=3)[0] + d0 = f.data + npartitions = d0.npartitions + self.assertGreater(npartitions, 1) + + with cf.persist_data(False): + d = d0.copy() + a = d.compute() + self.assertEqual(len(d.get_filenames()), 1) + b = d.compute() + self.assertEqual(len(d.get_filenames()), 1) + self.assertTrue(np.allclose(a, b)) + + d = d0.copy() + a = d.compute() + self.assertEqual(len(d.get_filenames()), 1) + b = d.compute(persist=None) + self.assertEqual(len(d.get_filenames()), 1) + self.assertTrue(np.allclose(a, b)) + + d = d0.copy() + a = d.compute() + self.assertEqual(len(d.get_filenames()), 1) + b = d.compute(persist=False) + self.assertEqual(len(d.get_filenames()), 1) + self.assertTrue(np.allclose(a, b)) + + d = d0.copy() + a = d.compute() + self.assertEqual(len(d.get_filenames()), 1) + b = d.compute(persist=True) + self.assertEqual(len(d.get_filenames()), 0) + self.assertEqual(d.npartitions, npartitions) + self.assertTrue(np.allclose(a, b)) + + with cf.persist_data(True): + d = d0.copy() + d.compute() + self.assertEqual(len(d.get_filenames()), 0) + + d = d0.copy() + d.compute(persist=None) + self.assertEqual(len(d.get_filenames()), 0) + + d = d0.copy() + d.compute(persist=False) + self.assertEqual(len(d.get_filenames()), 1) + + d = d0.copy() + d.compute(persist=True) + self.assertEqual(len(d.get_filenames()), 0) + def test_Data_persist(self): """Test Data.persist.""" d = cf.Data(9, "km") @@ -4149,8 +4202,6 @@ def test_Data_masked_invalid(self): def test_Data_uncompress(self): """Test the `uncompress` Data method.""" - import cfdm - f = cfdm.read("DSG_timeSeries_contiguous.nc")[0] a = f.data.array d = cf.Data(cf.RaggedContiguousArray(source=f.data.source())) @@ -4274,8 +4325,6 @@ def test_Data_soften_mask(self): def test_Data_compressed_array(self): """Test the `compressed_array` Data property.""" - import cfdm - f = cfdm.read("DSG_timeSeries_contiguous.nc")[0] f = f.data d = cf.Data(cf.RaggedContiguousArray(source=f.source())) @@ -4305,8 +4354,6 @@ def test_Data_fits_in_memory(self): def test_Data_get_compressed(self): """Test the Data methods which get compression properties.""" - import cfdm - # Compressed f = cfdm.read("DSG_timeSeries_contiguous.nc")[0] f = f.data @@ -4365,8 +4412,6 @@ def test_Data_get_data(self): def test_Data_get_count(self): """Test the `get_count` Data method.""" - import cfdm - f = cfdm.read("DSG_timeSeries_contiguous.nc")[0] f = f.data d = cf.Data(cf.RaggedContiguousArray(source=f.source())) @@ -4378,8 +4423,6 @@ def test_Data_get_count(self): def test_Data_get_index(self): """Test the `get_index` Data method.""" - import cfdm - f = cfdm.read("DSG_timeSeries_indexed.nc")[0] f = f.data d = cf.Data(cf.RaggedIndexedArray(source=f.source())) @@ -4391,8 +4434,6 @@ def test_Data_get_index(self): def test_Data_get_list(self): """Test the `get_list` Data method.""" - import cfdm - f = cfdm.read("gathered.nc")[0] f = f.data d = cf.Data(cf.GatheredArray(source=f.source())) diff --git a/cf/test/test_functions.py b/cf/test/test_functions.py index 4ca7750e5f..544d6f8b5b 100644 --- a/cf/test/test_functions.py +++ b/cf/test/test_functions.py @@ -55,7 +55,7 @@ def test_configuration(self): self.assertIsInstance(org, dict) # Check all keys that should be there are, with correct value type: - self.assertEqual(len(org), 12) # update expected len if add new key(s) + self.assertEqual(len(org), 13) # update expected len if add new key(s) # Types expected: self.assertIsInstance(org["atol"], float) @@ -71,6 +71,7 @@ def test_configuration(self): # equiv. string self.assertIsInstance(org["log_level"], str) self.assertIsInstance(org["display_data"], bool) + self.assertIsInstance(org["persist_data"], bool) # Store some sensible values to reset items to for testing, ensuring: # 1) they are kept different to the defaults (i.e. org values); and diff --git a/docs/source/function.rst b/docs/source/function.rst index 31de3bce8e..21a710d981 100644 --- a/docs/source/function.rst +++ b/docs/source/function.rst @@ -135,6 +135,8 @@ Resource management cf.configuration cf.chunksize + cf.display_data + cf.persist_data cf.free_memory cf.regrid_logging cf.tempdir