From 31ca84c8cbddcc4a84b8d4eeb12ce29273cda703 Mon Sep 17 00:00:00 2001 From: Jono Yang Date: Fri, 10 Apr 2026 18:31:11 -0700 Subject: [PATCH 1/5] Add function for creating api package metadata file paths Signed-off-by: Jono Yang --- src/aboutcode/federated/__init__.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/aboutcode/federated/__init__.py b/src/aboutcode/federated/__init__.py index dcd29cf..a111d2d 100644 --- a/src/aboutcode/federated/__init__.py +++ b/src/aboutcode/federated/__init__.py @@ -383,6 +383,7 @@ KIND_PURLS_FILENAME = "purls.yml" KIND_VULNERABILITIES_FILENAME = "vulnerabilities.yml" +KIND_API_PACKAGE_METADATA_FILENAME = "api_package_metadata.yml" def get_package_purls_yml_file_path(purl: Union[PackageURL, str]): @@ -399,6 +400,13 @@ def get_package_vulnerabilities_yml_file_path(purl: Union[PackageURL, str]): return get_package_base_dir(purl) / KIND_VULNERABILITIES_FILENAME +def get_api_package_metadata_file_path(purl: Union[PackageURL, str]): + """ + Return the path to a Package api_package_metadata.yml YAML for a purl. + """ + return get_package_base_dir(purl) / KIND_API_PACKAGE_METADATA_FILENAME + + def get_package_base_dir(purl: Union[PackageURL, str]): """ Return the base path to a Package directory (ignoring version) for a purl @@ -586,7 +594,7 @@ def from_yaml_config( if data["name"] != name: raise TypeError( - f"Inconsistent federation name {name!r} with YAML config text: {text!r}" + f"Inconsistent federation name {name!r} " f"with YAML config text: {text!r}" ) lrd = local_root_dir and Path(local_root_dir) or None @@ -930,7 +938,7 @@ def __post_init__(self): ) if not is_valid_power_of_two(self.number_of_dirs): - raise TypeError(f"number_of_dirs must be a power of 2, not {self.number_of_dirs!r}") + raise TypeError(f"number_of_dirs must be a power of 2, " f"not {self.number_of_dirs!r}") if not self.number_of_repos or self.number_of_repos > self.number_of_dirs: raise TypeError( @@ -939,7 +947,9 @@ def __post_init__(self): ) if not is_valid_power_of_two(self.number_of_repos): - raise TypeError(f"number_of_repos must be a power of 2, not {self.number_of_repos!r}") + raise TypeError( + f"number_of_repos must be a power of 2, " f"not {self.number_of_repos!r}" + ) @property def numbers_of_dirs_per_repo(self) -> int: From 022e8c6ed71638aa7471f4ba63607545a2524e22 Mon Sep 17 00:00:00 2001 From: Jono Yang Date: Tue, 14 Apr 2026 14:55:15 -0700 Subject: [PATCH 2/5] Add function to create api package version response file path * Update github actions Signed-off-by: Jono Yang --- .github/workflows/docs-ci.yml | 2 ++ .github/workflows/pypi-release.yml | 16 ++++++++++------ src/aboutcode/federated/__init__.py | 10 +++++++++- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/.github/workflows/docs-ci.yml b/.github/workflows/docs-ci.yml index 0d8907d..fd407fd 100644 --- a/.github/workflows/docs-ci.yml +++ b/.github/workflows/docs-ci.yml @@ -5,6 +5,8 @@ on: [push, pull_request] jobs: build: runs-on: ubuntu-24.04 + permissions: + contents: read strategy: max-parallel: 4 diff --git a/.github/workflows/pypi-release.yml b/.github/workflows/pypi-release.yml index 1fb1749..e0f29d0 100644 --- a/.github/workflows/pypi-release.yml +++ b/.github/workflows/pypi-release.yml @@ -42,7 +42,7 @@ jobs: run: python -m twine check dist/* - name: Upload built archives - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: pypi_archives path: dist/* @@ -56,15 +56,16 @@ jobs: steps: - name: Download built archives - uses: actions/download-artifact@v4 + uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0 with: name: pypi_archives path: dist - name: Create GH release - uses: softprops/action-gh-release@v2 + uses: softprops/action-gh-release@a06a81a03ee405af7f2048a818ed3f03bbf83c7b # v2.5.0 with: draft: true + generate_release_notes: true files: dist/* @@ -79,11 +80,14 @@ jobs: steps: - name: Download built archives - uses: actions/download-artifact@v4 + uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0 with: name: pypi_archives path: dist - name: Publish to PyPI - if: startsWith(github.ref, 'refs/tags') - uses: pypa/gh-action-pypi-publish@release/v1 \ No newline at end of file + if: startsWith(github.ref, 'refs/tags/') + uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0 + with: + verbose: true + password: ${{ secrets.PYPI_API_TOKEN_ABOUTCODE_FEDERATED }} diff --git a/src/aboutcode/federated/__init__.py b/src/aboutcode/federated/__init__.py index a111d2d..e9a1c67 100644 --- a/src/aboutcode/federated/__init__.py +++ b/src/aboutcode/federated/__init__.py @@ -383,7 +383,8 @@ KIND_PURLS_FILENAME = "purls.yml" KIND_VULNERABILITIES_FILENAME = "vulnerabilities.yml" -KIND_API_PACKAGE_METADATA_FILENAME = "api_package_metadata.yml" +KIND_API_PACKAGE_METADATA_FILENAME = "api_package_metadata.json" +KIND_API_VERSION_RESPONSE_FILENAME = "api_package_version_response.json" def get_package_purls_yml_file_path(purl: Union[PackageURL, str]): @@ -407,6 +408,13 @@ def get_api_package_metadata_file_path(purl: Union[PackageURL, str]): return get_package_base_dir(purl) / KIND_API_PACKAGE_METADATA_FILENAME +def get_api_package_version_response_file_path(purl: Union[PackageURL, str]): + """ + Return the path to a Package api_package_version_response.yml YAML for a purl. + """ + return get_package_base_dir(purl) / KIND_API_VERSION_RESPONSE_FILENAME + + def get_package_base_dir(purl: Union[PackageURL, str]): """ Return the base path to a Package directory (ignoring version) for a purl From 62405bef0d00a8b6aa8270fe70677bf44ff96d90 Mon Sep 17 00:00:00 2001 From: Jono Yang Date: Fri, 17 Apr 2026 17:57:44 -0700 Subject: [PATCH 3/5] Be able to specify which branch of a DataFederation repo to clone Signed-off-by: Jono Yang --- src/aboutcode/federated/__init__.py | 88 +++++++++++++++++------------ 1 file changed, 51 insertions(+), 37 deletions(-) diff --git a/src/aboutcode/federated/__init__.py b/src/aboutcode/federated/__init__.py index e9a1c67..bc4e96d 100644 --- a/src/aboutcode/federated/__init__.py +++ b/src/aboutcode/federated/__init__.py @@ -566,6 +566,7 @@ def from_url( name: str, remote_root_url: str, local_root_dir: Path = None, + branch: str = "main", ) -> "DataFederation": """ Return a DataFederation loaded from a remote configuration file. @@ -574,6 +575,7 @@ def from_url( remote_root_url=remote_root_url, federation_name=name, config_filename=cls.CONFIG_FILENAME, + branch=branch, ) headers = {"User-Agent": "AboutCode/FederatedCode"} response = requests.get(url=rcf_url, headers=headers) @@ -602,7 +604,7 @@ def from_yaml_config( if data["name"] != name: raise TypeError( - f"Inconsistent federation name {name!r} " f"with YAML config text: {text!r}" + f"Inconsistent federation name {name!r} with YAML config text: {text!r}" ) lrd = local_root_dir and Path(local_root_dir) or None @@ -715,6 +717,9 @@ class DataCluster: # this is the name of cluster data_kind: str + # The filename used when saving data. + datafile_name: str + # a URI template to build the path to the datafile for this data kind. # this is the path relative to the root of a cluster directory. It does not # include directory and repository. @@ -800,6 +805,7 @@ def from_dict(cls, data: dict) -> "DataCluster": return cls( data_kind=data["data_kind"], + datafile_name=data.get("datafile_name"), datafile_path_template=data.get("datafile_path_template"), purl_type_configs=ptcs, data_schema_url=data.get("data_schema_url"), @@ -812,6 +818,7 @@ def from_dict(cls, data: dict) -> "DataCluster": def to_dict(self): return dict( data_kind=self.data_kind, + datafile_name=self.datafile_name, datafile_path_template=self.datafile_path_template, purl_type_configs=[pt.to_dict() for pt in self.purl_type_configs], data_schema_url=self.data_schema_url, @@ -834,19 +841,6 @@ def get_datafile_download_url(self, purl: Union[str, PackageURL]) -> str: """ raise NotImplementedError() - purl = as_purl(purl) - # FIXME: create as member - purl_type_config_by_type = {ptc.purl_type: ptc for ptc in self.purl_type_configs} - purl_type_config = purl_type_config_by_type(purl.type, self.default_config()) - - ppe = package_path_elements(purl, max_value=purl_type_config.number_of_dirs) - purl_hash, core_path, version, extra_path = ppe - - direct_url = None - # construct a path based on path template - # construct a URL - return direct_url - def get_local_datafile(self, purl: Union[str, PackageURL]) -> LocalDataFile: """ Return a LocalDataFile of the data kind stored in this cluster given a @@ -862,7 +856,7 @@ def get_config(self, purl_type: str) -> "PurlTypeConfig": return self._configs_by_purl_type["default"] return self._configs_by_purl_type[purl_type] - def get_datafile_relative_path(self, purl: Union[str, PackageURL]) -> str: + def get_datafile_relative_path(self, purl: Union[str, PackageURL], datafile_name=None) -> str: """ Return the datfile path relative to the root of a cluster directory given a PURL. @@ -874,11 +868,15 @@ def get_datafile_relative_path(self, purl: Union[str, PackageURL]) -> str: f"DataCluster '{self.data_kind}' needs PackageURL with version to generate path." ) + if not datafile_name: + datafile_name = self.datafile_name + template = uritemplate.URITemplate(self.datafile_path_template) return template.expand( namespace=purl.namespace, name=purl.name, version=purl.version, + datafile_name=datafile_name, ) def get_repo_and_dir_hash(self, purl: Union[str, PackageURL]) -> Tuple[str, str]: @@ -892,14 +890,16 @@ def get_repo_and_dir_hash(self, purl: Union[str, PackageURL]) -> Tuple[str, str] repo_hash = purl_hash - (purl_hash % ptc.numbers_of_dirs_per_repo) return f"{repo_hash:04}", purl_hashid - def get_datafile_repo_and_path(self, purl: Union[str, PackageURL]) -> Tuple[str, str]: + def get_datafile_repo_and_path( + self, purl: Union[str, PackageURL], datafile_name=None + ) -> Tuple[str, str]: """ Return the repository name and relative path to the datafile of the data kind stored in this cluster given a PURL. """ purl = as_purl(purl) repo_hash, dir_hash = self.get_repo_and_dir_hash(purl) - relative_datafile_path = self.get_datafile_relative_path(purl) + relative_datafile_path = self.get_datafile_relative_path(purl, datafile_name=datafile_name) directory_name = f"{purl.type}-{dir_hash}" repository_name = f"{self.data_kind}-{purl.type}-{repo_hash}" @@ -946,7 +946,7 @@ def __post_init__(self): ) if not is_valid_power_of_two(self.number_of_dirs): - raise TypeError(f"number_of_dirs must be a power of 2, " f"not {self.number_of_dirs!r}") + raise TypeError(f"number_of_dirs must be a power of 2, not {self.number_of_dirs!r}") if not self.number_of_repos or self.number_of_repos > self.number_of_dirs: raise TypeError( @@ -955,9 +955,7 @@ def __post_init__(self): ) if not is_valid_power_of_two(self.number_of_repos): - raise TypeError( - f"number_of_repos must be a power of 2, " f"not {self.number_of_repos!r}" - ) + raise TypeError(f"number_of_repos must be a power of 2, not {self.number_of_repos!r}") @property def numbers_of_dirs_per_repo(self) -> int: @@ -1143,7 +1141,8 @@ def cluster_preset(): DataCluster( data_kind="purls", description="List of fully qualified PURL strings for a package, sorted by version.", - datafile_path_template="{/namespace}/{name}/purls.yml", + datafile_name="purls.yml", + datafile_path_template="{/namespace}/{name}/{datafile_name}", purl_type_configs=PurlTypeConfig.small_size_configs(), data_schema_url="", documentation_url="https://github.com/package-url/purl-spec/", @@ -1155,7 +1154,8 @@ def cluster_preset(): "Each datafile path and schema is PURL type-specific " "and not documented here.", # FIXME: a POM is in XML, some metadata files may be code - datafile_path_template="", + datafile_name="api_package_metadata.json", + datafile_path_template="{/namespace}/{name}/{datafile_name}", purl_type_configs=PurlTypeConfig.large_size_configs(), data_schema_url="", documentation_url="", @@ -1167,7 +1167,8 @@ def cluster_preset(): "Each datafile path and schema is PURL type-specific " "and not documented here.", # FIXME: a POM is in XML, some metadata files may be code - datafile_path_template="", + datafile_name="api_package_version_responses.json", + datafile_path_template="{/namespace}/{name}/{version}/{datafile_name}", purl_type_configs=PurlTypeConfig.large_size_configs(), data_schema_url="", documentation_url="", @@ -1177,7 +1178,8 @@ def cluster_preset(): data_kind="purldb", description="PurlDB normalized metadata datafiles for each package " "versions. Does not include fingerprints and symbols.", - datafile_path_template="{/namespace}/{name}/{version}/purldb.json", + datafile_name="purldb.json", + datafile_path_template="{/namespace}/{name}/{version}/{datafile_name}", purl_type_configs=PurlTypeConfig.large_size_configs(), data_schema_url="", documentation_url="", @@ -1188,7 +1190,8 @@ def cluster_preset(): data_kind="vulnerabilities", description="VulnerableCode vulnerabilities for each package. " "Also includes a separate vulnerabilities directory/", - datafile_path_template="{/namespace}/{name}/vulnerabilities.json", + datafile_name="vulnerabilities.json", + datafile_path_template="{/namespace}/{name}/{datafile_name}", purl_type_configs=[PurlTypeConfig.default_config()], data_schema_url="", documentation_url="", @@ -1197,7 +1200,8 @@ def cluster_preset(): DataCluster( data_kind="security_advisories", description="VulnerableCode security advisories for each package version.", - datafile_path_template="{/namespace}/{name}/{version}/advisories.yml", + datafile_name="advisories.yml", + datafile_path_template="{/namespace}/{name}/{version}/{datafile_name}", purl_type_configs=[PurlTypeConfig.default_config()], data_schema_url="", documentation_url="", @@ -1206,7 +1210,8 @@ def cluster_preset(): DataCluster( data_kind="scancode_toolkit_scans", description="scancode toolkit scans for each package version.", - datafile_path_template="{/namespace}/{name}/{version}/scancode-toolkit.json", + datafile_name="scancode-toolkit.json", + datafile_path_template="{/namespace}/{name}/{version}/{datafile_name}", purl_type_configs=PurlTypeConfig.large_size_configs(), data_schema_url="", documentation_url="", @@ -1215,7 +1220,8 @@ def cluster_preset(): DataCluster( data_kind="scancode_fingerprints", description="scancode_fingerprints for each package version.", - datafile_path_template="{/namespace}/{name}/{version}/scancode-fingerprints.json", + datafile_name="scancode-fingerprints.json", + datafile_path_template="{/namespace}/{name}/{version}/{datafile_name}", purl_type_configs=PurlTypeConfig.large_size_configs(), data_schema_url="", documentation_url="", @@ -1224,7 +1230,8 @@ def cluster_preset(): DataCluster( data_kind="cyclonedx14_sboms", description="CycloneDX v1.4 sboms for each package version", - datafile_path_template="{/namespace}/{name}/{version}/cyclonedx-14.json", + datafile_name="cyclonedx-14.json", + datafile_path_template="{/namespace}/{name}/{version}/{datafile_name}", purl_type_configs=PurlTypeConfig.large_size_configs(), data_schema_url="", documentation_url="", @@ -1233,7 +1240,8 @@ def cluster_preset(): DataCluster( data_kind="cyclonedx15_sboms", description="CycloneDX v1.5 sboms for each package version", - datafile_path_template="{/namespace}/{name}/{version}/cyclonedx-15.json", + datafile_name="cyclonedx-15.json", + datafile_path_template="{/namespace}/{name}/{version}/{datafile_name}", purl_type_configs=PurlTypeConfig.large_size_configs(), data_schema_url="", documentation_url="", @@ -1242,7 +1250,8 @@ def cluster_preset(): DataCluster( data_kind="cyclonedx16_sboms", description="CycloneDX v1.6 sboms for each package version", - datafile_path_template="{/namespace}/{name}/{version}/cyclonedx-16.json", + datafile_name="cyclonedx-16.json", + datafile_path_template="{/namespace}/{name}/{version}/{datafile_name}", purl_type_configs=PurlTypeConfig.large_size_configs(), data_schema_url="", documentation_url="", @@ -1251,7 +1260,8 @@ def cluster_preset(): DataCluster( data_kind="spdx2_sboms", description="SPDX version 2.x sboms for each package version", - datafile_path_template="{/namespace}/{name}/{version}/spdx-2.json", + datafile_name="spdx-2.json", + datafile_path_template="{/namespace}/{name}/{version}/{datafile_name}", purl_type_configs=PurlTypeConfig.large_size_configs(), data_schema_url="", documentation_url="", @@ -1260,7 +1270,8 @@ def cluster_preset(): DataCluster( data_kind="atom_slices", description="Atom slices for each package version", - datafile_path_template="{/namespace}/{name}/{version}/atom.json", + datafile_name="atom.json", + datafile_path_template="{/namespace}/{name}/{version}/{datafile_name}", purl_type_configs=PurlTypeConfig.large_size_configs(), data_schema_url="", documentation_url="", @@ -1270,7 +1281,8 @@ def cluster_preset(): data_kind="atom_vulnerable_slices", description="Atom vulnerable_slices for each vulnerable package version", # FIXME: need to qualify these with an advisory / CVE? - datafile_path_template="{/namespace}/{name}/{version}/atom-vulnerable.json", + datafile_name="atom-vulnerable.json", + datafile_path_template="{/namespace}/{name}/{version}/{datafile_name}", purl_type_configs=PurlTypeConfig.large_size_configs(), data_schema_url="", documentation_url="", @@ -1280,7 +1292,8 @@ def cluster_preset(): data_kind="openssf_security_scorecards", description="OpenSSf security_scorecards for package", # FIXME: need to qualify these with an advisory / CVE? - datafile_path_template="{/namespace}/{name}/security_scorecard.json", + datafile_name="security_scorecard.json", + datafile_path_template="{/namespace}/{name}/{datafile_name}", purl_type_configs=PurlTypeConfig.medium_size_configs(), data_schema_url="", documentation_url="", @@ -1433,6 +1446,7 @@ def build_direct_federation_config_file_url( remote_root_url: str, federation_name: str, config_filename: str, + branch: str, ): """ Return the URL to download a remote config file for a federation @@ -1441,7 +1455,7 @@ def build_direct_federation_config_file_url( root_url=remote_root_url, repo=federation_name, path=config_filename, - branch="main", + branch=branch, ) From 6e80db49db215bac2686cb6ac1498f33f8a39da6 Mon Sep 17 00:00:00 2001 From: Jono Yang Date: Tue, 12 May 2026 16:06:36 -0700 Subject: [PATCH 4/5] Update test expectations Signed-off-by: Jono Yang --- src/aboutcode/federated/__init__.py | 20 +++++--- .../foo/aboutcode-federated-config.yml | 47 ++++++++++++------- tests/test_federated.py | 2 +- 3 files changed, 45 insertions(+), 24 deletions(-) diff --git a/src/aboutcode/federated/__init__.py b/src/aboutcode/federated/__init__.py index bc4e96d..3d892f6 100644 --- a/src/aboutcode/federated/__init__.py +++ b/src/aboutcode/federated/__init__.py @@ -403,14 +403,14 @@ def get_package_vulnerabilities_yml_file_path(purl: Union[PackageURL, str]): def get_api_package_metadata_file_path(purl: Union[PackageURL, str]): """ - Return the path to a Package api_package_metadata.yml YAML for a purl. + Return the path to a Package api_package_metadata.json file for a purl. """ return get_package_base_dir(purl) / KIND_API_PACKAGE_METADATA_FILENAME def get_api_package_version_response_file_path(purl: Union[PackageURL, str]): """ - Return the path to a Package api_package_version_response.yml YAML for a purl. + Return the path to a Package api_package_version_response.json file for a purl. """ return get_package_base_dir(purl) / KIND_API_VERSION_RESPONSE_FILENAME @@ -488,12 +488,14 @@ def remote_config_file_url( cls, remote_root_url: str, federation_name: str, + branch: str = "main", ): """Return a URL to directly download the federation config file""" return build_direct_federation_config_file_url( remote_root_url=remote_root_url, federation_name=federation_name, config_filename=cls.CONFIG_FILENAME, + branch=branch, ) @property @@ -856,7 +858,9 @@ def get_config(self, purl_type: str) -> "PurlTypeConfig": return self._configs_by_purl_type["default"] return self._configs_by_purl_type[purl_type] - def get_datafile_relative_path(self, purl: Union[str, PackageURL], datafile_name=None) -> str: + def get_datafile_relative_path( + self, purl: Union[str, PackageURL], datafile_name: str = None + ) -> str: """ Return the datfile path relative to the root of a cluster directory given a PURL. @@ -891,7 +895,9 @@ def get_repo_and_dir_hash(self, purl: Union[str, PackageURL]) -> Tuple[str, str] return f"{repo_hash:04}", purl_hashid def get_datafile_repo_and_path( - self, purl: Union[str, PackageURL], datafile_name=None + self, + purl: Union[str, PackageURL], + datafile_name: str = None, ) -> Tuple[str, str]: """ Return the repository name and relative path to the datafile of the data kind stored @@ -1162,12 +1168,12 @@ def cluster_preset(): data_license="CC-BY-4.0", ), DataCluster( - data_kind="api_package_version_responses", + data_kind="api_package_version_response", description="Raw API response datafiles for a package versions. " "Each datafile path and schema is PURL type-specific " "and not documented here.", # FIXME: a POM is in XML, some metadata files may be code - datafile_name="api_package_version_responses.json", + datafile_name="api_package_version_response.json", datafile_path_template="{/namespace}/{name}/{version}/{datafile_name}", purl_type_configs=PurlTypeConfig.large_size_configs(), data_schema_url="", @@ -1446,7 +1452,7 @@ def build_direct_federation_config_file_url( remote_root_url: str, federation_name: str, config_filename: str, - branch: str, + branch: str = "main", ): """ Return the URL to download a remote config file for a federation diff --git a/tests/test_data/all-presets/foo/aboutcode-federated-config.yml b/tests/test_data/all-presets/foo/aboutcode-federated-config.yml index ca4204b..6524904 100644 --- a/tests/test_data/all-presets/foo/aboutcode-federated-config.yml +++ b/tests/test_data/all-presets/foo/aboutcode-federated-config.yml @@ -6,7 +6,8 @@ data_license: data_maintainers: [] data_clusters: - data_kind: api_package_metadata - datafile_path_template: + datafile_name: api_package_metadata.json + datafile_path_template: '{/namespace}/{name}/{datafile_name}' purl_type_configs: - purl_type: github number_of_repos: 1024 @@ -83,8 +84,9 @@ data_clusters: documentation_url: data_license: CC-BY-4.0 data_maintainers: [] - - data_kind: api_package_version_responses - datafile_path_template: + - data_kind: api_package_version_response + datafile_name: api_package_version_response.json + datafile_path_template: '{/namespace}/{name}/{version}/{datafile_name}' purl_type_configs: - purl_type: github number_of_repos: 1024 @@ -162,7 +164,8 @@ data_clusters: data_license: CC-BY-4.0 data_maintainers: [] - data_kind: atom_slices - datafile_path_template: '{/namespace}/{name}/{version}/atom.json' + datafile_name: atom.json + datafile_path_template: '{/namespace}/{name}/{version}/{datafile_name}' purl_type_configs: - purl_type: github number_of_repos: 1024 @@ -239,7 +242,8 @@ data_clusters: data_license: CC-BY-4.0 data_maintainers: [] - data_kind: atom_vulnerable_slices - datafile_path_template: '{/namespace}/{name}/{version}/atom-vulnerable.json' + datafile_name: atom-vulnerable.json + datafile_path_template: '{/namespace}/{name}/{version}/{datafile_name}' purl_type_configs: - purl_type: github number_of_repos: 1024 @@ -316,7 +320,8 @@ data_clusters: data_license: CC-BY-4.0 data_maintainers: [] - data_kind: cyclonedx14_sboms - datafile_path_template: '{/namespace}/{name}/{version}/cyclonedx-14.json' + datafile_name: cyclonedx-14.json + datafile_path_template: '{/namespace}/{name}/{version}/{datafile_name}' purl_type_configs: - purl_type: github number_of_repos: 1024 @@ -393,7 +398,8 @@ data_clusters: data_license: CC-BY-4.0 data_maintainers: [] - data_kind: cyclonedx15_sboms - datafile_path_template: '{/namespace}/{name}/{version}/cyclonedx-15.json' + datafile_name: cyclonedx-15.json + datafile_path_template: '{/namespace}/{name}/{version}/{datafile_name}' purl_type_configs: - purl_type: github number_of_repos: 1024 @@ -470,7 +476,8 @@ data_clusters: data_license: CC-BY-4.0 data_maintainers: [] - data_kind: cyclonedx16_sboms - datafile_path_template: '{/namespace}/{name}/{version}/cyclonedx-16.json' + datafile_name: cyclonedx-16.json + datafile_path_template: '{/namespace}/{name}/{version}/{datafile_name}' purl_type_configs: - purl_type: github number_of_repos: 1024 @@ -547,7 +554,8 @@ data_clusters: data_license: CC-BY-4.0 data_maintainers: [] - data_kind: openssf_security_scorecards - datafile_path_template: '{/namespace}/{name}/security_scorecard.json' + datafile_name: security_scorecard.json + datafile_path_template: '{/namespace}/{name}/{datafile_name}' purl_type_configs: - purl_type: github number_of_repos: 256 @@ -624,7 +632,8 @@ data_clusters: data_license: CC-BY-4.0 data_maintainers: [] - data_kind: purldb - datafile_path_template: '{/namespace}/{name}/{version}/purldb.json' + datafile_name: purldb.json + datafile_path_template: '{/namespace}/{name}/{version}/{datafile_name}' purl_type_configs: - purl_type: github number_of_repos: 1024 @@ -702,7 +711,8 @@ data_clusters: data_license: CC-BY-4.0 data_maintainers: [] - data_kind: purls - datafile_path_template: '{/namespace}/{name}/purls.yml' + datafile_name: purls.yml + datafile_path_template: '{/namespace}/{name}/{datafile_name}' purl_type_configs: - purl_type: github number_of_repos: 128 @@ -779,7 +789,8 @@ data_clusters: data_license: CC-BY-4.0 data_maintainers: [] - data_kind: scancode_fingerprints - datafile_path_template: '{/namespace}/{name}/{version}/scancode-fingerprints.json' + datafile_name: scancode-fingerprints.json + datafile_path_template: '{/namespace}/{name}/{version}/{datafile_name}' purl_type_configs: - purl_type: github number_of_repos: 1024 @@ -856,7 +867,8 @@ data_clusters: data_license: CC-BY-4.0 data_maintainers: [] - data_kind: scancode_toolkit_scans - datafile_path_template: '{/namespace}/{name}/{version}/scancode-toolkit.json' + datafile_name: scancode-toolkit.json + datafile_path_template: '{/namespace}/{name}/{version}/{datafile_name}' purl_type_configs: - purl_type: github number_of_repos: 1024 @@ -933,7 +945,8 @@ data_clusters: data_license: CC-BY-4.0 data_maintainers: [] - data_kind: security_advisories - datafile_path_template: '{/namespace}/{name}/{version}/advisories.yml' + datafile_name: advisories.yml + datafile_path_template: '{/namespace}/{name}/{version}/{datafile_name}' purl_type_configs: - purl_type: default number_of_repos: 1 @@ -944,7 +957,8 @@ data_clusters: data_license: CC-BY-4.0 data_maintainers: [] - data_kind: spdx2_sboms - datafile_path_template: '{/namespace}/{name}/{version}/spdx-2.json' + datafile_name: spdx-2.json + datafile_path_template: '{/namespace}/{name}/{version}/{datafile_name}' purl_type_configs: - purl_type: github number_of_repos: 1024 @@ -1021,7 +1035,8 @@ data_clusters: data_license: CC-BY-4.0 data_maintainers: [] - data_kind: vulnerabilities - datafile_path_template: '{/namespace}/{name}/vulnerabilities.json' + datafile_name: vulnerabilities.json + datafile_path_template: '{/namespace}/{name}/{datafile_name}' purl_type_configs: - purl_type: default number_of_repos: 1 diff --git a/tests/test_federated.py b/tests/test_federated.py index 32b9f6c..bc2f8de 100644 --- a/tests/test_federated.py +++ b/tests/test_federated.py @@ -283,7 +283,7 @@ def test_purl_hash(purl, purl_hash): def test_federation_with_all_cluster_preset(): df = DataFederation(name="foo", data_clusters=sorted(cluster_preset().values())) local_root_dir = TEST_DATA / "all-presets" - if False: + if REGEN: df.local_root_dir = local_root_dir df.dump() df2 = DataFederation.load(name="foo", local_root_dir=local_root_dir) From 85d41668fb1a0e24dfe88ab68fb1155842051e82 Mon Sep 17 00:00:00 2001 From: Jono Yang Date: Tue, 12 May 2026 17:35:29 -0700 Subject: [PATCH 5/5] Update CHANGELOG.rst Signed-off-by: Jono Yang --- CHANGELOG.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index c3e9cc7..3ffc383 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,6 +2,14 @@ Changelog ============= +v1.0.0 (May 12, 2026) +--------------------------- + +- Add new DataKind for ``api_package_metadata`` and + ``api_package_version_response``. Add field ``datafile_name`` to DataCluster + and modify ``datafile_path_template`` to use ``datafile_name`` instead of + specifying the file name in ``datafile_path_template``. + v0.1.0 (October 20, 2025) ---------------------------