Skip to content

Commit cd76fa5

Browse files
mprpicclaude
andcommitted
feat(sbom): build purls with packageurl-python and add upstream source identification
Use the packageurl-python library to construct purls instead of manual string building. Introduce a PurlConfig model that consolidates all purl-related per-package settings into a single field. When set to a string, it is used as the full downstream purl. When set to a PurlConfig object, individual fields (type, namespace, name, version, repository_url, upstream) override specific purl components while defaulting the rest from global SbomSettings. Add upstream source identification to the SBOM. Each document now contains two package entries linked by a GENERATED_FROM relationship: - SPDXRef-wheel: the downstream wheel with repository_url qualifier - SPDXRef-upstream: the original source package without qualifiers The upstream purl is auto-derived by stripping repository_url from the downstream purl. For packages sourced from GitHub/GitLab, an explicit upstream purl can be set via PurlConfig.upstream. Add repository_url to SbomSettings as a global purl qualifier (e.g. ?repository_url=https://packages.redhat.com) added to every downstream purl. Per-package PurlConfig.repository_url overrides it. Refactored tests to use static test data for both global and per-package settings files. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> Signed-off-by: Martin Prpič <mprpic@redhat.com>
1 parent c5c3afc commit cd76fa5

File tree

9 files changed

+266
-76
lines changed

9 files changed

+266
-76
lines changed

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ dependencies = [
3535
"elfdeps>=0.2.0",
3636
"license-expression",
3737
"packaging",
38+
"packageurl-python",
3839
"psutil",
3940
"pydantic",
4041
"pypi_simple",

src/fromager/packagesettings/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
GitOptions,
88
PackageSettings,
99
ProjectOverride,
10+
PurlConfig,
1011
ResolverDist,
1112
SbomSettings,
1213
VariantInfo,
@@ -46,6 +47,7 @@
4647
"PackageVersion",
4748
"PatchMap",
4849
"ProjectOverride",
50+
"PurlConfig",
4951
"RawAnnotations",
5052
"ResolverDist",
5153
"SbomSettings",

src/fromager/packagesettings/_models.py

Lines changed: 64 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ class SbomSettings(pydantic.BaseModel):
3737
sbom:
3838
supplier: "Organization: ExampleCo"
3939
namespace: "https://www.example.com"
40+
purl_type: pypi
41+
repository_url: "https://example.com/simple"
4042
creators:
4143
- "Organization: ExampleCo"
4244
"""
@@ -55,6 +57,64 @@ class SbomSettings(pydantic.BaseModel):
5557
The fromager tool creator entry is always added automatically.
5658
"""
5759

60+
purl_type: str = "pypi"
61+
"""Default purl type for all packages (e.g. ``pypi``, ``generic``)"""
62+
63+
repository_url: str | None = None
64+
"""Default purl ``repository_url`` qualifier for all packages
65+
66+
When set, this URL is added to every purl as a qualifier
67+
(e.g. ``pkg:pypi/flask@2.0?repository_url=https://example.com/simple``).
68+
Can be overridden per-package in the package settings file.
69+
"""
70+
71+
72+
class PurlConfig(pydantic.BaseModel):
73+
"""Per-package purl configuration for SBOM generation.
74+
75+
Allows overriding individual purl components or specifying an
76+
upstream purl for packages sourced from GitHub/GitLab.
77+
78+
::
79+
80+
purl:
81+
type: generic
82+
name: custom-name
83+
repository_url: "https://example.com/simple"
84+
upstream: "pkg:github/org/repo@v1.0.0"
85+
"""
86+
87+
model_config = MODEL_CONFIG
88+
89+
type: str | None = None
90+
"""Override the purl type (e.g. ``generic`` instead of ``pypi``)"""
91+
92+
namespace: str | None = None
93+
"""Override the purl namespace component"""
94+
95+
name: str | None = None
96+
"""Override the purl name component (defaults to the package name)"""
97+
98+
version: str | None = None
99+
"""Override the purl version component (defaults to the resolved version)"""
100+
101+
repository_url: str | None = None
102+
"""Per-package override for the purl ``repository_url`` qualifier.
103+
104+
Overrides the global ``sbom.repository_url`` setting for this package.
105+
"""
106+
107+
upstream: str | None = None
108+
"""Full purl string identifying the upstream source package.
109+
110+
When set, this is used as the upstream identity in the SBOM's
111+
GENERATED_FROM relationship. Used for packages sourced from
112+
GitHub/GitLab rather than PyPI.
113+
114+
When absent, the upstream purl is auto-derived from the downstream
115+
purl without the ``repository_url`` qualifier.
116+
"""
117+
58118

59119
class ResolverDist(pydantic.BaseModel):
60120
"""Packages resolver dist
@@ -351,12 +411,11 @@ class PackageSettings(pydantic.BaseModel):
351411
download_source: DownloadSource = Field(default_factory=DownloadSource)
352412
"""Alternative source download settings"""
353413

354-
purl: str | None = None
355-
"""Package URL (purl) override for SBOM generation
414+
purl: PurlConfig | None = None
415+
"""Purl configuration for SBOM generation.
356416
357-
When set, this value is used instead of the default ``pkg:pypi/<name>@<version>``
358-
purl. Useful for packages that are not on PyPI or are midstream forks.
359-
Supports ``{name}`` and ``{version}`` format substitution.
417+
A ``PurlConfig`` object with individual field overrides and upstream
418+
source identification.
360419
"""
361420

362421
resolver_dist: ResolverDist = Field(default_factory=ResolverDist)

src/fromager/packagesettings/_pbi.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
GitOptions,
1919
PackageSettings,
2020
ProjectOverride,
21+
PurlConfig,
2122
VariantInfo,
2223
)
2324
from ._templates import _resolve_template, substitute_template
@@ -70,8 +71,8 @@ def variant(self) -> Variant:
7071
return self._variant
7172

7273
@property
73-
def purl(self) -> str | None:
74-
"""Package URL (purl) override for SBOM generation."""
74+
def purl_config(self) -> PurlConfig | None:
75+
"""Per-package purl configuration for SBOM generation."""
7576
return self._ps.purl
7677

7778
@property

src/fromager/sbom.py

Lines changed: 99 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -13,39 +13,73 @@
1313
import typing
1414
from datetime import UTC, datetime
1515

16+
from packageurl import PackageURL
1617
from packaging.requirements import Requirement
1718
from packaging.utils import canonicalize_name
1819
from packaging.version import Version
1920

2021
if typing.TYPE_CHECKING:
2122
from . import context
23+
from .packagesettings import PackageBuildInfo, SbomSettings
2224

2325
logger = logging.getLogger(__name__)
2426

2527
SBOM_FILENAME = "fromager.spdx.json"
2628

2729

28-
def _build_purl(
30+
def _build_downstream_purl(
2931
*,
30-
package_name: str,
31-
package_version: Version,
32-
purl_override: str | None,
33-
) -> str:
34-
"""Build a package URL for the SBOM.
35-
36-
Returns ``pkg:pypi/<name>@<version>`` by default. If a purl override
37-
is set in per-package settings, it is used instead with
38-
``str.format()`` substitution for ``{name}`` and ``{version}``.
32+
name: str,
33+
version: Version,
34+
pbi: PackageBuildInfo,
35+
sbom_settings: SbomSettings,
36+
) -> PackageURL:
37+
"""Build the downstream package URL for the wheel.
38+
39+
A purl is constructed from ``PurlConfig`` field overrides
40+
(per-package) falling back to global defaults.
3941
"""
40-
if purl_override:
41-
try:
42-
return purl_override.format(name=package_name, version=package_version)
43-
except (KeyError, ValueError) as err:
44-
raise ValueError(
45-
f"invalid purl template {purl_override!r}: "
46-
"only {name} and {version} are supported"
47-
) from err
48-
return f"pkg:pypi/{package_name}@{package_version}"
42+
pc = pbi.purl_config
43+
purl_type = (pc.type if pc else None) or sbom_settings.purl_type
44+
qualifiers: dict[str, str] = {}
45+
repo_url = (pc.repository_url if pc else None) or sbom_settings.repository_url
46+
if repo_url:
47+
qualifiers["repository_url"] = repo_url
48+
49+
return PackageURL(
50+
type=purl_type,
51+
namespace=pc.namespace if pc else None,
52+
name=(pc.name if pc else None) or name,
53+
version=(pc.version if pc else None) or str(version),
54+
qualifiers=qualifiers or None,
55+
)
56+
57+
58+
def _build_upstream_purl(
59+
*,
60+
name: str,
61+
version: Version,
62+
pbi: PackageBuildInfo,
63+
sbom_settings: SbomSettings,
64+
) -> PackageURL:
65+
"""Build the upstream source package URL.
66+
67+
If ``upstream`` is set in the per-package ``PurlConfig``, it is
68+
used as-is. Otherwise, the upstream purl is derived from the same
69+
base as the downstream purl but without the ``repository_url``
70+
qualifier.
71+
"""
72+
pc = pbi.purl_config
73+
if pc and pc.upstream:
74+
return PackageURL.from_string(pc.upstream)
75+
76+
purl_type = (pc.type if pc else None) or sbom_settings.purl_type
77+
return PackageURL(
78+
type=purl_type,
79+
namespace=pc.namespace if pc else None,
80+
name=(pc.name if pc else None) or name,
81+
version=(pc.version if pc else None) or str(version),
82+
)
4983

5084

5185
def generate_sbom(
@@ -56,8 +90,9 @@ def generate_sbom(
5690
) -> dict[str, typing.Any]:
5791
"""Generate a minimal SPDX 2.3 JSON document for a wheel.
5892
59-
The document contains the wheel as the primary package and a
60-
DESCRIBES relationship from the document to the package.
93+
The document contains the downstream wheel as the primary package,
94+
the upstream source as a second package, and DESCRIBES /
95+
GENERATED_FROM relationships.
6196
"""
6297
sbom_settings = ctx.settings.sbom_settings
6398
if sbom_settings is None:
@@ -73,26 +108,48 @@ def generate_sbom(
73108

74109
namespace = f"{sbom_settings.namespace}/{name}-{version}.spdx.json"
75110

76-
package_entry: dict[str, typing.Any] = {
111+
downstream = _build_downstream_purl(
112+
name=name,
113+
version=version,
114+
pbi=pbi,
115+
sbom_settings=sbom_settings,
116+
)
117+
upstream = _build_upstream_purl(
118+
name=name,
119+
version=version,
120+
pbi=pbi,
121+
sbom_settings=sbom_settings,
122+
)
123+
124+
wheel_entry: dict[str, typing.Any] = {
77125
"SPDXID": "SPDXRef-wheel",
78-
"name": name,
79-
"versionInfo": str(version),
126+
"name": downstream.name,
127+
"versionInfo": downstream.version or str(version),
80128
"downloadLocation": "NOASSERTION",
81129
"supplier": sbom_settings.supplier,
130+
"externalRefs": [
131+
{
132+
"referenceCategory": "PACKAGE-MANAGER",
133+
"referenceType": "purl",
134+
"referenceLocator": downstream.to_string(),
135+
}
136+
],
82137
}
83138

84-
purl = _build_purl(
85-
package_name=name,
86-
package_version=version,
87-
purl_override=pbi.purl,
88-
)
89-
package_entry["externalRefs"] = [
90-
{
91-
"referenceCategory": "PACKAGE-MANAGER",
92-
"referenceType": "purl",
93-
"referenceLocator": purl,
94-
}
95-
]
139+
upstream_entry: dict[str, typing.Any] = {
140+
"SPDXID": "SPDXRef-upstream",
141+
"name": upstream.name,
142+
"versionInfo": upstream.version or str(version),
143+
"downloadLocation": "NOASSERTION",
144+
"supplier": "NOASSERTION",
145+
"externalRefs": [
146+
{
147+
"referenceCategory": "PACKAGE-MANAGER",
148+
"referenceType": "purl",
149+
"referenceLocator": upstream.to_string(),
150+
}
151+
],
152+
}
96153

97154
doc: dict[str, typing.Any] = {
98155
"spdxVersion": "SPDX-2.3",
@@ -104,13 +161,18 @@ def generate_sbom(
104161
"created": timestamp,
105162
"creators": creators,
106163
},
107-
"packages": [package_entry],
164+
"packages": [wheel_entry, upstream_entry],
108165
"relationships": [
109166
{
110167
"spdxElementId": "SPDXRef-DOCUMENT",
111168
"relationshipType": "DESCRIBES",
112169
"relatedSpdxElement": "SPDXRef-wheel",
113170
},
171+
{
172+
"spdxElementId": "SPDXRef-wheel",
173+
"relationshipType": "GENERATED_FROM",
174+
"relatedSpdxElement": "SPDXRef-upstream",
175+
},
114176
],
115177
}
116178
return doc

tests/conftest.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def testdata_context(
8686
def make_sbom_ctx(
8787
tmp_path: pathlib.Path,
8888
sbom_settings: SbomSettings | None = None,
89-
purl: str | None = None,
89+
package_overrides: dict[str, typing.Any] | None = None,
9090
) -> context.WorkContext:
9191
"""Create a minimal WorkContext with SBOM settings."""
9292
settings_file = packagesettings.SettingsFile(sbom=sbom_settings)
@@ -97,10 +97,10 @@ def make_sbom_ctx(
9797
variant="cpu",
9898
max_jobs=None,
9999
)
100-
if purl is not None:
100+
if package_overrides is not None:
101101
ps = packagesettings.PackageSettings.from_mapping(
102102
"test-pkg",
103-
{"purl": purl},
103+
package_overrides,
104104
source="test",
105105
has_config=True,
106106
)

0 commit comments

Comments
 (0)