88from django .db .models .query import QuerySet
99from pulp_python .app .models import PythonPackageContent , PythonRepository
1010from pulp_python .app .utils import (
11+ artifact_to_metadata_artifact ,
1112 artifact_to_python_content_data ,
1213 fetch_json_release_metadata ,
1314 parse_metadata ,
1415)
15- from pulpcore .plugin .models import ContentArtifact , ProgressReport
16+ from pulpcore .plugin .models import Artifact , ContentArtifact , ProgressReport
1617from pulpcore .plugin .util import get_domain
1718
1819log = logging .getLogger (__name__ )
@@ -41,16 +42,25 @@ def repair(repository_pk: UUID) -> None:
4142 content_set = repository .latest_version ().content .values_list ("pk" , flat = True )
4243 content = PythonPackageContent .objects .filter (pk__in = content_set )
4344
44- num_repaired , pkgs_not_repaired = repair_metadata (content )
45+ num_repaired , pkgs_not_repaired , num_metadata_repaired , pkgs_metadata_not_repaired = (
46+ repair_metadata (content )
47+ )
48+ # Convert set() to 0
49+ if not pkgs_not_repaired :
50+ pkgs_not_repaired = 0
51+ if not pkgs_metadata_not_repaired :
52+ pkgs_metadata_not_repaired = 0
53+
4554 log .info (
4655 _ (
4756 "{} packages' metadata repaired. Not repaired packages due to either "
48- "inaccessible URL or mismatched sha256: {}."
49- ).format (num_repaired , pkgs_not_repaired )
57+ "inaccessible URL or mismatched sha256: {}. "
58+ "{} metadata files repaired. Packages whose metadata files could not be repaired: {}."
59+ ).format (num_repaired , pkgs_not_repaired , num_metadata_repaired , pkgs_metadata_not_repaired )
5060 )
5161
5262
53- def repair_metadata (content : QuerySet [PythonPackageContent ]) -> tuple [int , set [str ]]:
63+ def repair_metadata (content : QuerySet [PythonPackageContent ]) -> tuple [int , set [str ], int , set [ str ] ]:
5464 """
5565 Repairs metadata for a queryset of PythonPackageContent objects
5666 and updates the progress report.
@@ -59,9 +69,11 @@ def repair_metadata(content: QuerySet[PythonPackageContent]) -> tuple[int, set[s
5969 content (QuerySet[PythonPackageContent]): The queryset of items to repair.
6070
6171 Returns:
62- tuple[int, set[str]]: A tuple containing:
72+ tuple[int, set[str], int, set[str] ]: A tuple containing:
6373 - The number of packages that were repaired.
6474 - A set of packages' PKs that were not repaired.
75+ - The number of metadata files that were repaired.
76+ - A set of packages' PKs without repaired metadata artifacts.
6577 """
6678 immediate_content = (
6779 content .filter (contentartifact__artifact__isnull = False )
@@ -87,6 +99,11 @@ def repair_metadata(content: QuerySet[PythonPackageContent]) -> tuple[int, set[s
8799 # Keep track of on-demand packages that were not repaired
88100 pkgs_not_repaired = set ()
89101
102+ # Metadata artifacts and content artifacts
103+ metadata_batch = []
104+ total_metadata_repaired = 0
105+ pkgs_metadata_not_repaired = set ()
106+
90107 progress_report = ProgressReport (
91108 message = "Repairing packages' metadata" ,
92109 code = "repair.metadata" ,
@@ -102,6 +119,13 @@ def repair_metadata(content: QuerySet[PythonPackageContent]) -> tuple[int, set[s
102119 .artifact
103120 )
104121 new_data = artifact_to_python_content_data (package .filename , main_artifact , domain )
122+ total_metadata_repaired += update_metadata_artifact_if_needed (
123+ package ,
124+ new_data .get ("metadata_sha256" ),
125+ main_artifact ,
126+ metadata_batch ,
127+ pkgs_metadata_not_repaired ,
128+ )
105129 total_repaired += update_package_if_needed (
106130 package , new_data , batch , set_of_update_fields
107131 )
@@ -163,7 +187,12 @@ def repair_metadata(content: QuerySet[PythonPackageContent]) -> tuple[int, set[s
163187 total_repaired += len (batch )
164188 PythonPackageContent .objects .bulk_update (batch , set_of_update_fields )
165189
166- return total_repaired , pkgs_not_repaired
190+ if metadata_batch :
191+ not_repaired = _process_metadata_batch (metadata_batch )
192+ pkgs_metadata_not_repaired .update (not_repaired )
193+ total_metadata_repaired += len (metadata_batch ) - len (not_repaired )
194+
195+ return total_repaired , pkgs_not_repaired , total_metadata_repaired , pkgs_metadata_not_repaired
167196
168197
169198def update_package_if_needed (
@@ -202,3 +231,89 @@ def update_package_if_needed(
202231 set_of_update_fields .clear ()
203232
204233 return total_repaired
234+
235+
236+ def update_metadata_artifact_if_needed (
237+ package : PythonPackageContent ,
238+ new_metadata_sha256 : str | None ,
239+ main_artifact : Artifact ,
240+ metadata_batch : list [tuple ],
241+ pkgs_metadata_not_repaired : set [str ],
242+ ) -> int :
243+ """
244+ Repairs metadata artifacts for wheel packages by creating missing metadata artifacts
245+ or updating existing ones when the metadata_sha256 differs. Only processes wheel files
246+ that have a valid new_metadata_sha256. Queues operations for batch processing.
247+
248+ Args:
249+ package: Package to check for metadata changes.
250+ new_metadata_sha256: The correct metadata_sha256 extracted from the main artifact, or None.
251+ main_artifact: The main package artifact used to generate metadata.
252+ metadata_batch: List of tuples for batch processing (updated in-place).
253+ pkgs_metadata_not_repaired: Set of package PKs that failed repair (updated in-place).
254+
255+ Returns:
256+ Number of repaired metadata artifacts (only when batch is flushed at BULK_SIZE).
257+ """
258+ total_metadata_repaired = 0
259+
260+ if not package .filename .endswith (".whl" ) or not new_metadata_sha256 :
261+ return total_metadata_repaired
262+
263+ original_metadata_sha256 = package .metadata_sha256
264+ cas = package .contentartifact_set .filter (relative_path__endswith = ".metadata" )
265+
266+ # Create missing
267+ if not cas :
268+ metadata_batch .append ((package , main_artifact ))
269+ # Fix existing
270+ elif new_metadata_sha256 != original_metadata_sha256 :
271+ ca = cas .first ()
272+ metadata_artifact = ca .artifact
273+ if metadata_artifact is None or (metadata_artifact .sha256 != new_metadata_sha256 ):
274+ metadata_batch .append ((package , main_artifact ))
275+
276+ if len (metadata_batch ) == BULK_SIZE :
277+ not_repaired = _process_metadata_batch (metadata_batch )
278+ pkgs_metadata_not_repaired .update (not_repaired )
279+ total_metadata_repaired += BULK_SIZE - len (not_repaired )
280+ metadata_batch .clear ()
281+
282+ return total_metadata_repaired
283+
284+
285+ def _process_metadata_batch (metadata_batch : list [tuple ]) -> set [str ]:
286+ """
287+ Processes a batch of metadata repair operations by creating metadata artifacts
288+ and their corresponding ContentArtifacts.
289+
290+ Args:
291+ metadata_batch: List of (package, main_artifact) tuples.
292+
293+ Returns:
294+ Set of package PKs for which metadata artifacts could not be created.
295+ """
296+ not_repaired = set ()
297+ content_artifacts = []
298+
299+ for package , main_artifact in metadata_batch :
300+ metadata_artifact = artifact_to_metadata_artifact (package .filename , main_artifact )
301+ if metadata_artifact :
302+ ca = ContentArtifact (
303+ artifact = metadata_artifact ,
304+ content = package ,
305+ relative_path = f"{ package .filename } .metadata" ,
306+ )
307+ content_artifacts .append (ca )
308+ else :
309+ not_repaired .add (package .pk )
310+
311+ if content_artifacts :
312+ ContentArtifact .objects .bulk_create (
313+ content_artifacts ,
314+ update_conflicts = True ,
315+ update_fields = ["artifact" ],
316+ unique_fields = ["content" , "relative_path" ],
317+ )
318+
319+ return not_repaired
0 commit comments