Skip to content

Commit 4b20c18

Browse files
authored
feat: add pypi attestation discovery (#1067)
This PR adds discovery of PyPI attestation files for software components. Signed-off-by: Ben Selwyn-Smith <[email protected]>
1 parent bbf80bf commit 4b20c18

File tree

21 files changed

+811
-173
lines changed

21 files changed

+811
-173
lines changed

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ dependencies = [
3636
"cyclonedx-python-lib[validation] >=7.3.4,<8.0.0",
3737
"beautifulsoup4 >= 4.12.0,<5.0.0",
3838
"problog >= 2.2.6,<3.0.0",
39+
"cryptography >=44.0.0,<45.0.0",
3940
]
4041
keywords = []
4142
# https://pypi.org/classifiers/

src/macaron/config/defaults.ini

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -542,7 +542,8 @@ inspector_url_scheme = https
542542
[deps_dev]
543543
url_netloc = api.deps.dev
544544
url_scheme = https
545-
purl_endpoint = v3alpha/purl
545+
api_endpoint = v3alpha
546+
purl_endpoint = purl
546547

547548
[osv_dev]
548549
url_netloc = api.osv.dev

src/macaron/provenance/provenance_extractor.py

Lines changed: 68 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,11 @@ def extract_repo_and_commit_from_provenance(payload: InTotoPayload) -> tuple[str
4343
If the extraction process fails for any reason.
4444
"""
4545
predicate_type = payload.statement.get("predicateType")
46-
if isinstance(payload, InTotoV1Payload) and predicate_type == "https://slsa.dev/provenance/v1":
47-
return _extract_from_slsa_v1(payload)
46+
if isinstance(payload, InTotoV1Payload):
47+
if predicate_type == "https://slsa.dev/provenance/v1":
48+
return _extract_from_slsa_v1(payload)
49+
if predicate_type == "https://docs.pypi.org/attestations/publish/v1":
50+
return _extract_from_pypi_v1(payload)
4851

4952
if isinstance(payload, InTotoV01Payload):
5053
if predicate_type == "https://slsa.dev/provenance/v0.2":
@@ -195,6 +198,32 @@ def _extract_from_slsa_v1(payload: InTotoV1Payload) -> tuple[str | None, str | N
195198
return repo, commit or None
196199

197200

201+
def _extract_from_pypi_v1(payload: InTotoV1Payload) -> tuple[str | None, str | None]:
202+
"""Extract the repository and commit metadata from the pypi provenance file found at the passed path.
203+
204+
This payload represents a custom predicate created from the certificate of a PyPI v1 attestation file.
205+
By design, these attestations come without a predicate.
206+
207+
Parameters
208+
----------
209+
payload: InTotoPayload
210+
The payload to extract from.
211+
212+
Returns
213+
-------
214+
tuple[str, str]
215+
The repository URL and commit hash if found, a pair of empty strings otherwise.
216+
"""
217+
predicate: dict[str, JsonType] | None = payload.statement.get("predicate")
218+
if not predicate:
219+
logger.debug("No predicate in payload statement.")
220+
return None, None
221+
222+
repo = json_extract(predicate, ["sourceUri"], str)
223+
digest = json_extract(predicate, ["sourceDigest"], str)
224+
return repo, digest
225+
226+
198227
def _extract_from_witness_provenance(payload: InTotoV01Payload) -> tuple[str | None, str | None]:
199228
"""Extract the repository and commit metadata from the witness provenance file found at the passed path.
200229
@@ -300,7 +329,7 @@ def check_if_input_purl_provenance_conflict(
300329
provenance_repo_url: str | None,
301330
purl: PackageURL,
302331
) -> bool:
303-
"""Test if the input repository type PURL's repo and commit match the contents of the provenance.
332+
"""Test if the input repository type PURL's repo matches the contents of the provenance.
304333
305334
Parameters
306335
----------
@@ -620,6 +649,41 @@ def get_build_invocation(self, statement: InTotoV01Statement | InTotoV1Statement
620649
return gl_workflow, gl_job_url
621650

622651

652+
class PyPICertificateDefinition(ProvenanceBuildDefinition):
653+
"""Class representing the derived PyPI certificate build definition.
654+
655+
This class implements the abstract methods from the `ProvenanceBuildDefinition`
656+
to extract build invocation details specific to the GitHub Actions build type.
657+
"""
658+
659+
#: Determines the expected ``buildType`` field in the provenance predicate.
660+
expected_build_type = "pypi_certificate"
661+
662+
def get_build_invocation(self, statement: InTotoV01Statement | InTotoV1Statement) -> tuple[str | None, str | None]:
663+
"""Retrieve the build invocation information from the given statement.
664+
665+
Parameters
666+
----------
667+
statement : InTotoV1Statement | InTotoV01Statement
668+
The provenance statement from which to extract the build invocation
669+
details. This statement contains the metadata about the build process
670+
and its associated artifacts.
671+
672+
Returns
673+
-------
674+
tuple[str | None, str | None]
675+
A tuple containing two elements:
676+
- The first element is the build invocation entry point (e.g., workflow name), or None if not found.
677+
- The second element is the invocation URL or identifier (e.g., job URL), or None if not found.
678+
"""
679+
if statement["predicate"] is None:
680+
return None, None
681+
682+
gha_workflow = json_extract(statement["predicate"], ["workflow"], str)
683+
invocation_url = json_extract(statement["predicate"], ["invocationUrl"], str)
684+
return gha_workflow, invocation_url
685+
686+
623687
class ProvenancePredicate:
624688
"""Class providing utility methods for handling provenance predicates.
625689
@@ -685,6 +749,7 @@ def find_build_def(statement: InTotoV01Statement | InTotoV1Statement) -> Provena
685749
SLSAGCBBuildDefinitionV1(),
686750
SLSAOCIBuildDefinitionV1(),
687751
WitnessGitLabBuildDefinitionV01(),
752+
PyPICertificateDefinition(),
688753
]
689754

690755
for build_def in build_defs:

src/macaron/provenance/provenance_finder.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
33

44
"""This module contains methods for finding provenance files."""
5+
import json
56
import logging
67
import os
78
import tempfile
@@ -12,6 +13,7 @@
1213

1314
from macaron.config.defaults import defaults
1415
from macaron.repo_finder.commit_finder import AbstractPurlType, determine_abstract_purl_type
16+
from macaron.repo_finder.repo_finder_deps_dev import DepsDevRepoFinder
1517
from macaron.slsa_analyzer.analyze_context import AnalyzeContext
1618
from macaron.slsa_analyzer.checks.provenance_available_check import ProvenanceAvailableException
1719
from macaron.slsa_analyzer.ci_service import GitHubActions
@@ -78,6 +80,10 @@ def find_provenance(self, purl: PackageURL) -> list[InTotoPayload]:
7880
discovery_functions = [partial(find_gav_provenance, purl, self.jfrog_registry)]
7981
return self._find_provenance(discovery_functions)
8082

83+
if purl.type == "pypi":
84+
discovery_functions = [partial(find_pypi_provenance, purl)]
85+
return self._find_provenance(discovery_functions)
86+
8187
# TODO add other possible discovery functions.
8288
logger.debug("Provenance finding not supported for PURL type: %s", purl.type)
8389
return []
@@ -275,6 +281,37 @@ def find_gav_provenance(purl: PackageURL, registry: JFrogMavenRegistry) -> list[
275281
return provenances[:1]
276282

277283

284+
def find_pypi_provenance(purl: PackageURL) -> list[InTotoPayload]:
285+
"""Find and download the PyPI based provenance for the passed PURL.
286+
287+
Parameters
288+
----------
289+
purl: PackageURL
290+
The PURL of the analysis target.
291+
292+
Returns
293+
-------
294+
list[InTotoPayload] | None
295+
The provenance payload if found, or an empty list otherwise.
296+
"""
297+
attestation, verified = DepsDevRepoFinder.get_attestation(purl)
298+
if not attestation:
299+
return []
300+
301+
with tempfile.TemporaryDirectory() as temp_dir:
302+
file_name = os.path.join(temp_dir, f"{purl.name}")
303+
with open(file_name, "w", encoding="utf-8") as file:
304+
json.dump(attestation, file)
305+
306+
try:
307+
payload = load_provenance_payload(file_name)
308+
payload.verified = verified
309+
return [payload]
310+
except LoadIntotoAttestationError as load_error:
311+
logger.error("Error while loading provenance: %s", load_error)
312+
return []
313+
314+
278315
def find_provenance_from_ci(
279316
analyze_ctx: AnalyzeContext, git_obj: Git | None, download_path: str
280317
) -> InTotoPayload | None:

src/macaron/provenance/provenance_verifier.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,10 +82,12 @@ def verify_npm_provenance(purl: PackageURL, provenance: list[InTotoPayload]) ->
8282

8383
signed_subjects = provenance[1].statement.get("subject")
8484
if not signed_subjects:
85+
logger.debug("Missing signed subjects.")
8586
return False
8687

8788
unsigned_subjects = provenance[0].statement.get("subject")
8889
if not unsigned_subjects:
90+
logger.debug("Missing unsigned subjects.")
8991
return False
9092

9193
found_signed_subject = None
@@ -97,6 +99,7 @@ def verify_npm_provenance(purl: PackageURL, provenance: list[InTotoPayload]) ->
9799
break
98100

99101
if not found_signed_subject:
102+
logger.debug("Missing signed subject.")
100103
return False
101104

102105
found_unsigned_subject = None
@@ -108,15 +111,18 @@ def verify_npm_provenance(purl: PackageURL, provenance: list[InTotoPayload]) ->
108111
break
109112

110113
if not found_unsigned_subject:
114+
logger.debug("Missing unsigned subject.")
111115
return False
112116

113117
signed_digest = found_signed_subject.get("digest")
114118
unsigned_digest = found_unsigned_subject.get("digest")
115119
if not (signed_digest and unsigned_digest):
120+
logger.debug("Missing %ssigned digest.", "un" if signed_digest else "")
116121
return False
117122

118123
# For signed and unsigned to match, the digests must be identical.
119124
if signed_digest != unsigned_digest:
125+
logger.debug("Signed and unsigned digests do not match.")
120126
return False
121127

122128
key = list(signed_digest.keys())[0]

0 commit comments

Comments
 (0)