From 2c7a7cbd5f8ecdd6ab94fccdbdbd9a2efcd43398 Mon Sep 17 00:00:00 2001 From: robjmcgibbon Date: Tue, 14 Apr 2026 16:11:01 +0100 Subject: [PATCH 1/3] Handle DM in mass table --- misc/convert_eagle.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/misc/convert_eagle.py b/misc/convert_eagle.py index 3f818361..a52010e3 100644 --- a/misc/convert_eagle.py +++ b/misc/convert_eagle.py @@ -569,14 +569,13 @@ ] = conversion_factor # DM mass can be a special case + properties['dm_mass_in_table'] = False if "Mass" in properties.get(f"PartType1", {}): if "Mass" not in infile["PartType1"]: # Load DM mass from mass table dm_mass = infile["Header"].attrs["MassTable"][1] / h properties["PartType1"]["Mass"]["conversion_factor"] = dm_mass - else: - # Treat DM mass as any other property - dm_mass = 0 + properties['dm_mass_in_table'] = True # Get list of elements for ElementMassFractions if "ElementMassFractions" in properties.get(f"PartType0", {}): @@ -674,7 +673,7 @@ if comm_rank == 0: print(f"Converting PartType{ptype}/{prop}") - if (ptype == 1) and (prop == "Mass") and (dm_mass == 0): + if (ptype == 1) and (prop == "Mass") and properties['dm_mass_in_table']: # DM particles all have the same mass, so are not saved in the snapshots arr = np.ones(pos.shape[0]) else: From d4ce2fd884e364443068ad887ca3ae3dc7027f66 Mon Sep 17 00:00:00 2001 From: robjmcgibbon Date: Mon, 15 Jun 2026 11:02:26 +0100 Subject: [PATCH 2/3] Various fixes --- SOAP/catalogue_readers/read_subfind_eagle.py | 3 +- SOAP/compression/make_virtual_snapshot.py | 2 + misc/convert_eagle.py | 124 ++++++++++++------- parameter_files/EAGLE.yml | 2 +- scripts/EAGLE.sh | 68 ++++++++-- 5 files changed, 138 insertions(+), 61 deletions(-) diff --git a/SOAP/catalogue_readers/read_subfind_eagle.py b/SOAP/catalogue_readers/read_subfind_eagle.py index 31ff7fb0..4bc86ec0 100644 --- a/SOAP/catalogue_readers/read_subfind_eagle.py +++ b/SOAP/catalogue_readers/read_subfind_eagle.py @@ -117,7 +117,8 @@ def read_subfind_catalogue(comm, basename, a_unit, registry, boxsize): ) # Store initial search radius - search_radius = (5 * data["Subhalo/VmaxRadius"] / h) * swift_cmpc + search_radius_cmpc = np.minimum((5 * data["Subhalo/VmaxRadius"] / h), 5) + search_radius = search_radius_cmpc * swift_cmpc local_halo = { "cofp": cofp, diff --git a/SOAP/compression/make_virtual_snapshot.py b/SOAP/compression/make_virtual_snapshot.py index 2311840e..ea30ea1c 100644 --- a/SOAP/compression/make_virtual_snapshot.py +++ b/SOAP/compression/make_virtual_snapshot.py @@ -360,3 +360,5 @@ def replace_path(old_path): absolute_paths=args.absolute_paths, discard_duplicate_datasets=args.discard_duplicate_datasets, ) + + print(f"Done!") diff --git a/misc/convert_eagle.py b/misc/convert_eagle.py index a52010e3..01ec97e0 100644 --- a/misc/convert_eagle.py +++ b/misc/convert_eagle.py @@ -8,11 +8,15 @@ mpirun -- python convert_eagle.py \ --snapshot-basename=SNAPSHOT \ + --particledata-basename=PARTICLE_DATA \ + --subfind-basename=SUBFIND \ --output-basename=OUTPUT \ --membership-basename=MEMBERSHIP -where SNAPSHOT is the EAGLE snapshot (use the particledata_*** files, -since the normal snapshots don't store SubGroupNumber), and OUTPUT & +where SNAPSHOT is the EAGLE snapshot (the snapshots_*** files), +PARTICLE_DATA are the EAGLE membership files (the particledata_*** files, +since the normal snapshots don't store SubGroupNumber), SUBFIND are +the SubFind catalogues (the subfind_tab_*** files), and OUTPUT & MEMBERSHIP are the names of the output files. You must run with the same number of ranks as input files. @@ -82,11 +86,21 @@ "name without the .{file_nr}.hdf5 suffix)" ), ) +parser.add_argument( + "--particledata-basename", + type=str, + required=True, + help=( + "The basename for the particle data files (the files which " + "contain the GroupNumber and SubGroupNumber values for bound " + "particles. For EAGLE these are separate to the snapshots." + ), +) parser.add_argument( "--subfind-basename", type=str, required=True, - help=("The basename for the subfind files"), + help="The basename for the subfind files", ) parser.add_argument( "--output-basename", @@ -102,6 +116,7 @@ ) args = parser.parse_args() snap_filename = args.snap_basename + ".{file_nr}.hdf5" +particledata_filename = args.particledata_basename + ".{file_nr}.hdf5" subfind_filename = args.subfind_basename + ".{file_nr}.hdf5" output_filename = args.output_basename + ".{file_nr}.hdf5" membership_filename = args.membership_basename + ".{file_nr}.hdf5" @@ -132,7 +147,7 @@ h = comm.bcast(h) box_size_cmpc = comm.bcast(box_size_cmpc) -assert comm_size == n_file +assert comm_size <= n_file # Specify the unit system of the output SWIFT snapshot if comm_rank == 0: @@ -281,13 +296,6 @@ "description": None, "conversion_factor": None, }, - "GroupNumber": { - "swift_name": "FOFGroupIDs", - "exponents": {"L": 0, "M": 0, "T": 0, "t": 0}, - "a_exponent": None, - "description": None, - "conversion_factor": None, - }, "ParticleIDs": { "swift_name": "ParticleIDs", "exponents": {"L": 0, "M": 0, "T": 0, "t": 0}, @@ -380,13 +388,6 @@ "description": "Particle mass", "conversion_factor": None, }, - "GroupNumber": { - "swift_name": "FOFGroupIDs", - "exponents": {"L": 0, "M": 0, "T": 0, "t": 0}, - "a_exponent": None, - "description": None, - "conversion_factor": None, - }, "ParticleIDs": { "swift_name": "ParticleIDs", "exponents": {"L": 0, "M": 0, "T": 0, "t": 0}, @@ -417,13 +418,6 @@ "description": None, "conversion_factor": None, }, - "GroupNumber": { - "swift_name": "FOFGroupIDs", - "exponents": {"L": 0, "M": 0, "T": 0, "t": 0}, - "a_exponent": None, - "description": None, - "conversion_factor": None, - }, "ParticleIDs": { "swift_name": "ParticleIDs", "exponents": {"L": 0, "M": 0, "T": 0, "t": 0}, @@ -506,13 +500,6 @@ "description": None, "conversion_factor": None, }, - "GroupNumber": { - "swift_name": "FOFGroupIDs", - "exponents": {"L": 0, "M": 0, "T": 0, "t": 0}, - "a_exponent": None, - "description": None, - "conversion_factor": None, - }, "ParticleIDs": { "swift_name": "ParticleIDs", "exponents": {"L": 0, "M": 0, "T": 0, "t": 0}, @@ -607,6 +594,9 @@ snap_file = phdf5.MultiFile( snap_filename, file_nr_attr=("Header", "NumFilesPerSnapshot"), comm=comm ) +particledata_file = phdf5.MultiFile( + particledata_filename, file_nr_attr=("Header", "NumFilesPerSnapshot"), comm=comm +) # Load the SubFind catalogue and create an array that links the GroupNumber # and SubGroupNumber of a subhalo to its index within the subhalo catalogue # (for creating the membership files) @@ -656,6 +646,15 @@ elements_per_file[1:] -= elements_per_file[:-1] assert np.sum(elements_per_file) == np.sum(cell_counts[ptype]) + # Each rank writes the files assigned to it by MultiFile, so it + # must hold the concatenation of the data of those files. + elements_per_rank = np.zeros(comm_size, dtype=elements_per_file.dtype) + for rank in range(comm_size): + first = snap_file.first_file_on_rank[rank] + num = snap_file.num_files_on_rank[rank] + elements_per_rank[rank] = np.sum(elements_per_file[first : first + num]) + assert np.sum(elements_per_rank) == np.sum(elements_per_file) + # Calculate offsets of the first particle in each cell cell_files[ptype] = np.repeat(np.arange(n_file), cells_per_file) absolute_offset = np.cumsum(cell_counts[ptype]) - cell_counts[ptype] @@ -703,7 +702,7 @@ attrs.update(unit_attrs) # Write to the output file - arr = psort.repartition(arr, elements_per_file, comm=comm) + arr = psort.repartition(arr, elements_per_rank, comm=comm) if create_output_file: mode = "w" create_output_file = False @@ -750,7 +749,7 @@ attrs.update(unit_attrs) # Write to the output file - arr = psort.repartition(arr, elements_per_file, comm=comm) + arr = psort.repartition(arr, elements_per_rank, comm=comm) if create_output_file: mode = "w" create_output_file = False @@ -765,44 +764,77 @@ attrs={"ElementMassFractions": attrs}, ) + + # Load the GroupNumber and SubGroupNumber of particles by matching + # the snapshot_* files with the particledata_* files + snap_ids = snap_file.read(f"PartType{ptype}/ParticleIDs") + particledata_ids = particledata_file.read(f"PartType{ptype}/ParticleIDs") + idx = psort.parallel_match(snap_ids, particledata_ids, comm=comm) + + # EAGLE uses a value of 2^30 to indicate unbound particles + # Particles missing from the particledata_* files are always unbound + particledata_sub_group_nr = particledata_file.read(f"PartType{ptype}/SubGroupNumber") + sub_group_nr = 1073741824 * np.ones(snap_ids.shape[0], dtype=np.int32) + sub_group_nr[idx != -1] = psort.fetch_elements( + particledata_sub_group_nr, + idx[idx != -1], + comm=comm, + ) + + # Negative values indicate that the particle is not part of a FoF, + # but is within the SO group of a FoF (the FoF it is part of is the positive + # value, e.g. -10 means it is within the SO of FoF 10) + particledata_group_nr = particledata_file.read(f"PartType{ptype}/GroupNumber") + particledata_group_nr[particledata_group_nr < 0] = 1073741824 + group_nr = 1073741824 * np.ones(snap_ids.shape[0], dtype=np.int32) + group_nr[idx != -1] = psort.fetch_elements( + particledata_group_nr, + idx[idx != -1], + comm=comm, + ) + # Create a subhalo id for each particle by combining the # group number and subgroup number - sub_group = snap_file.read(f"PartType{ptype}/SubGroupNumber") - subhalo = snap_file.read(f"PartType{ptype}/GroupNumber").astype(np.int64) + subhalo = group_nr.astype(np.int64) subhalo <<= 32 - subhalo += sub_group.astype(np.int64) - # Indicate unbound particles with -1 - bound = sub_group != 1073741824 + subhalo += sub_group_nr.astype(np.int64) + # For SOAP we want unbound particles to be indicated with a value of -1 + bound = sub_group_nr != 1073741824 subhalo[np.logical_not(bound)] = -1 # Get SubFind index of bound particles subhalo[bound] = psort.parallel_match(subhalo[bound], subfind_id, comm=comm) assert np.all(subhalo[bound] != -1) - # Sort, add units, and write to file (same as for other properties) + # Sort values spatially (same as for other properties) subhalo = psort.fetch_elements(subhalo, order, comm=comm) + group_nr = psort.fetch_elements(group_nr, order, comm=comm) + # Add units, and write to file units = unyt.Unit("dimensionless", registry=reg) unit_attrs = swift_units.attributes_from_units(units, False, 0) - attrs = { + subhalo_attrs = { "Description": ( "Unique identifier of the subhalo this particle is " "bound to. This is a combination of the GroupNumber and" "the SubGroupNumber. -1 if the particle is not bound" ) } - attrs.update(unit_attrs) - subhalo = psort.repartition(subhalo, elements_per_file, comm=comm) + subhalo_attrs.update(unit_attrs) + fof_attrs = {"Description": "FoF group number particle is in"} + fof_attrs.update(unit_attrs) + subhalo = psort.repartition(subhalo, elements_per_rank, comm=comm) + group_nr = psort.repartition(group_nr, elements_per_rank, comm=comm) if create_membership_file: mode = "w" create_membership_file = False else: mode = "r+" snap_file.write( - {"GroupNr_bound": subhalo}, + {"GroupNr_bound": subhalo, "FOFGroupIDs": group_nr}, elements_per_file, filenames=membership_filename, mode=mode, group=f"PartType{ptype}", - attrs={"GroupNr_bound": attrs}, + attrs={"GroupNr_bound": subhalo_attrs, "FOFGroupIDs": fof_attrs}, ) # Add headers to the snapshots @@ -813,7 +845,7 @@ header = outfile.create_group("Header") for name, value in swift_header.items(): header.attrs[name] = value - n_part = np.zeros(max(ptypes) + 1) + n_part = np.zeros(max(ptypes) + 1, dtype=np.int64) for ptype in ptypes: n_part[ptype] = outfile[f"PartType{ptype}/Coordinates"].shape[0] header.attrs["NumPart_ThisFile"] = n_part diff --git a/parameter_files/EAGLE.yml b/parameter_files/EAGLE.yml index 5a1d05d8..2b825167 100644 --- a/parameter_files/EAGLE.yml +++ b/parameter_files/EAGLE.yml @@ -9,7 +9,7 @@ Parameters: # Location of the Swift snapshots: Snapshots: # Use {snap_nr:04d} for the snapshot number and {file_nr} for the file number. - filename: "{sim_dir}/{sim_name}/swift_snapshots/swift_{snap_nr:03d}/snap_{snap_nr:03d}.{file_nr}.hdf5" + filename: "{sim_dir}/{sim_name}/snapshots/snap_{snap_nr:03d}/snap_{snap_nr:03d}.{file_nr}.hdf5" # Which halo finder we're using, and base name for halo finder output files HaloFinder: diff --git a/scripts/EAGLE.sh b/scripts/EAGLE.sh index 72bc0511..3ae44945 100755 --- a/scripts/EAGLE.sh +++ b/scripts/EAGLE.sh @@ -9,7 +9,9 @@ #SBATCH --exclusive #SBATCH -t 02:00:00 # -# For L0025N0752 set ntasks=16 +# For L0025N0752/REFERENCE set ntasks=16 +# For L0025N0752/RECALIBRATED set ntasks=32 +# For L0025N0752/WDM set ntasks=32 # For L0100N1504 set ntasks=256 # # Install Hdecompose with: @@ -20,7 +22,18 @@ # # Submit this script from the main SOAP directory ($ sbatch scripts/EAGLE.sh) -sim_name='L0100N1504' +# sim_name='L0025N0752/PE/REFERENCE' +# sim_name='L0025N0752/PE/RECALIBRATED' +sim_name='L0100N1504/PE/REFERENCE' +# sim_name='L0025N0752/EAGLE_WDM' + +sim_dir="/cosma7/data/Eagle/ScienceRuns/Planck1/${sim_name}/data" +# sim_dir="/snap7/scratch/dp004/dc-mcgi1/EAGLE_WDM/data" + +# Note that if you update the output directory, you will +# also need to update the SOAP parameter file +output_dir="/snap7/scratch/dp004/dc-mcgi1/SOAP_EAGLE/${sim_name}" + snap_nr="028" z_suffix="z000p000" @@ -30,13 +43,19 @@ source openmpi-5.0.3-hdf5-1.12.3-env/bin/activate ######## Link files to snap (to remove awful z suffix) -sim_dir="/cosma7/data/Eagle/ScienceRuns/Planck1/${sim_name}/PE/REFERENCE/data" -# Note that if you update the output directory, you will also need to -# update the SOAP parameter file -output_dir="/snap7/scratch/dp004/dc-mcgi1/SOAP_EAGLE/${sim_name}" +sim_snap_dir="${sim_dir}/snapshot_${snap_nr}_${z_suffix}" +output_snap_dir="${output_dir}/gadget_snapshots/snapshot_${snap_nr}" +mkdir -p $output_snap_dir +i=0 +while [[ -e "${sim_snap_dir}/snap_${snap_nr}_${z_suffix}.${i}.hdf5" ]]; do + old_name="${sim_snap_dir}/snap_${snap_nr}_${z_suffix}.${i}.hdf5" + new_name="${output_snap_dir}/snap_${snap_nr}.${i}.hdf5" + ln -s $old_name $new_name + ((i++)) +done sim_snap_dir="${sim_dir}/particledata_${snap_nr}_${z_suffix}" -output_snap_dir="${output_dir}/gadget_snapshots/snapshot_${snap_nr}" +output_snap_dir="${output_dir}/gadget_membership/snapshot_${snap_nr}" mkdir -p $output_snap_dir i=0 while [[ -e "${sim_snap_dir}/eagle_subfind_particles_${snap_nr}_${z_suffix}.${i}.hdf5" ]]; do @@ -63,28 +82,41 @@ set -e mpirun -- python -u misc/convert_eagle.py \ --snap-basename "${output_dir}/gadget_snapshots/snapshot_${snap_nr}/snap_${snap_nr}" \ + --particledata-basename "${output_dir}/gadget_membership/snapshot_${snap_nr}/snap_${snap_nr}" \ --subfind-basename "${output_group_dir}/subfind_tab_${snap_nr}" \ - --output-basename "${output_dir}/swift_snapshots/swift_${snap_nr}/snap_${snap_nr}" \ + --output-basename "${output_dir}/snapshots/snap_${snap_nr}/snap_${snap_nr}" \ --membership-basename "${output_dir}/SOAP_uncompressed/membership_${snap_nr}/membership_${snap_nr}" ######### Estimate SpeciesFraction of hydrogen mpirun -- python -u misc/hdecompose_hydrogen_fractions.py \ - --snap-basename "${output_dir}/swift_snapshots/swift_${snap_nr}/snap_${snap_nr}" \ + --snap-basename "${output_dir}/snapshots/snap_${snap_nr}/snap_${snap_nr}" \ --output-basename "${output_dir}/species_fractions/swift_${snap_nr}/snap_${snap_nr}" ######### Create virtual snapshot # Must be run from the snapshot directory itself or there will be issues with paths soap_dir=$(pwd) -cd "${output_dir}/swift_snapshots/swift_${snap_nr}" +cd "${output_dir}/snapshots/snap_${snap_nr}" python "${soap_dir}/create_virtual_snapshot.py" "snap_${snap_nr}.0.hdf5" cd - +######### Repack memebership files and create virtual snapshot + +input_filename="${output_dir}/SOAP_uncompressed/membership_${snap_nr}/membership_${snap_nr}" +output_filename="${output_dir}/SOAP/membership_${snap_nr}/membership_${snap_nr}" +mkdir -p "${output_dir}/SOAP/membership_${snap_nr}" + +nr_files=`ls -1 ${input_filename}.*.hdf5 | wc -l` +nr_files_minus_one=$(( ${nr_files} - 1 )) + +seq 0 ${nr_files_minus_one} | xargs -I {} -P 16 bash -c \ + "h5repack -i ${input_filename}.{}.hdf5 -o ${output_filename}.{}.hdf5 -l CHUNK=10000 -f GZIP=4" + python SOAP/compression/make_virtual_snapshot.py \ - --virtual-snapshot "${output_dir}/swift_snapshots/swift_${snap_nr}/snap_${snap_nr}.hdf5" \ - --auxiliary-snapshots "${output_dir}/SOAP_uncompressed/membership_${snap_nr}/membership_${snap_nr}.{file_nr}.hdf5" \ - --output-file "${output_dir}/SOAP_uncompressed/snap_${snap_nr}.hdf5" + --virtual-snapshot "${output_dir}/snapshots/snap_${snap_nr}/snap_${snap_nr}.hdf5" \ + --auxiliary-snapshots "${output_dir}/SOAP/membership_${snap_nr}/membership_${snap_nr}.{file_nr}.hdf5" "${output_dir}/species_fractions/swift_${snap_nr}/snap_${snap_nr}.{file_nr}.hdf5" \ + --output-file "${output_dir}/SOAP/snap_with_SOAP_membership_${snap_nr}.hdf5" ######### Run SOAP @@ -94,6 +126,16 @@ mpirun -- python3 -u -m mpi4py SOAP/compute_halo_properties.py \ parameter_files/EAGLE.yml \ --sim-name=${sim_name} --snap-nr=${snap_nr} --chunks=${chunks} +######### Compress SOAP (run on single node) + +module purge +module load python/3.12.4 + +mpirun -np 28 python -u SOAP/compression/compress_soap_catalogue.py \ + "${output_dir}/SOAP_uncompressed/halo_properties_${snap_nr}.hdf5" \ + "${output_dir}/SOAP/halo_properties_${snap_nr}.hdf5" \ + "${output_dir}/SOAP_compression_tmp" + ############## echo "Job complete!" From 63c7c0099b85d6f750f87aa5f83ea2ba9334129b Mon Sep 17 00:00:00 2001 From: robjmcgibbon Date: Mon, 15 Jun 2026 11:06:05 +0100 Subject: [PATCH 3/3] Format --- misc/convert_eagle.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/misc/convert_eagle.py b/misc/convert_eagle.py index 01ec97e0..cd468169 100644 --- a/misc/convert_eagle.py +++ b/misc/convert_eagle.py @@ -556,13 +556,13 @@ ] = conversion_factor # DM mass can be a special case - properties['dm_mass_in_table'] = False + properties["dm_mass_in_table"] = False if "Mass" in properties.get(f"PartType1", {}): if "Mass" not in infile["PartType1"]: # Load DM mass from mass table dm_mass = infile["Header"].attrs["MassTable"][1] / h properties["PartType1"]["Mass"]["conversion_factor"] = dm_mass - properties['dm_mass_in_table'] = True + properties["dm_mass_in_table"] = True # Get list of elements for ElementMassFractions if "ElementMassFractions" in properties.get(f"PartType0", {}): @@ -646,7 +646,7 @@ elements_per_file[1:] -= elements_per_file[:-1] assert np.sum(elements_per_file) == np.sum(cell_counts[ptype]) - # Each rank writes the files assigned to it by MultiFile, so it + # Each rank writes the files assigned to it by MultiFile, so it # must hold the concatenation of the data of those files. elements_per_rank = np.zeros(comm_size, dtype=elements_per_file.dtype) for rank in range(comm_size): @@ -672,7 +672,7 @@ if comm_rank == 0: print(f"Converting PartType{ptype}/{prop}") - if (ptype == 1) and (prop == "Mass") and properties['dm_mass_in_table']: + if (ptype == 1) and (prop == "Mass") and properties["dm_mass_in_table"]: # DM particles all have the same mass, so are not saved in the snapshots arr = np.ones(pos.shape[0]) else: @@ -764,7 +764,6 @@ attrs={"ElementMassFractions": attrs}, ) - # Load the GroupNumber and SubGroupNumber of particles by matching # the snapshot_* files with the particledata_* files snap_ids = snap_file.read(f"PartType{ptype}/ParticleIDs") @@ -773,7 +772,9 @@ # EAGLE uses a value of 2^30 to indicate unbound particles # Particles missing from the particledata_* files are always unbound - particledata_sub_group_nr = particledata_file.read(f"PartType{ptype}/SubGroupNumber") + particledata_sub_group_nr = particledata_file.read( + f"PartType{ptype}/SubGroupNumber" + ) sub_group_nr = 1073741824 * np.ones(snap_ids.shape[0], dtype=np.int32) sub_group_nr[idx != -1] = psort.fetch_elements( particledata_sub_group_nr,