Skip to content

Commit c0e88a4

Browse files
committed
chore: refactor provenance level 3 check
Signed-off-by: Ben Selwyn-Smith <[email protected]>
1 parent f4fd86f commit c0e88a4

File tree

15 files changed

+483
-616
lines changed

15 files changed

+483
-616
lines changed

src/macaron/provenance/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
2+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
3+
4+
"""This package contains the provenance tools for software components."""

src/macaron/repo_finder/provenance_extractor.py renamed to src/macaron/provenance/provenance_extractor.py

Lines changed: 1 addition & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,10 @@
66
import urllib.parse
77

88
from packageurl import PackageURL
9-
from pydriller import Git
109

1110
from macaron.errors import ProvenanceError
1211
from macaron.json_tools import JsonType, json_extract
13-
from macaron.repo_finder.commit_finder import (
14-
AbstractPurlType,
15-
determine_abstract_purl_type,
16-
extract_commit_from_version,
17-
)
12+
from macaron.repo_finder.commit_finder import AbstractPurlType, determine_abstract_purl_type
1813
from macaron.repo_finder.repo_finder import to_domain_from_known_purl_types
1914
from macaron.slsa_analyzer.provenance.intoto import InTotoPayload, InTotoV1Payload, InTotoV01Payload
2015

@@ -275,27 +270,18 @@ def check_if_input_repo_provenance_conflict(
275270

276271

277272
def check_if_input_purl_provenance_conflict(
278-
git_obj: Git,
279273
repo_path_input: bool,
280-
digest_input: bool,
281274
provenance_repo_url: str | None,
282-
provenance_commit_digest: str | None,
283275
purl: PackageURL,
284276
) -> bool:
285277
"""Test if the input repository type PURL's repo and commit match the contents of the provenance.
286278
287279
Parameters
288280
----------
289-
git_obj: Git
290-
The Git object.
291281
repo_path_input: bool
292282
True if there is a repo as input.
293-
digest_input: str
294-
True if there is a commit as input.
295283
provenance_repo_url: str | None
296284
The repo url from provenance.
297-
provenance_commit_digest: str | None
298-
The commit digest from provenance.
299285
purl: PackageURL
300286
The input repository PURL.
301287
@@ -318,18 +304,6 @@ def check_if_input_purl_provenance_conflict(
318304
)
319305
return True
320306

321-
# Check the PURL commit against the provenance.
322-
if not digest_input and provenance_commit_digest and purl.version:
323-
purl_commit = extract_commit_from_version(git_obj, purl.version)
324-
if purl_commit and purl_commit != provenance_commit_digest:
325-
logger.debug(
326-
"The commit digest passed via purl input does not match what exists in the "
327-
"provenance. Purl Commit: %s, Provenance Commit: %s.",
328-
purl_commit,
329-
provenance_commit_digest,
330-
)
331-
return True
332-
333307
return False
334308

335309

src/macaron/repo_finder/provenance_finder.py renamed to src/macaron/provenance/provenance_finder.py

Lines changed: 45 additions & 137 deletions
Original file line numberDiff line numberDiff line change
@@ -107,38 +107,6 @@ def _find_provenance(self, discovery_functions: list[partial[list[InTotoPayload]
107107
logger.debug("No provenance found.")
108108
return []
109109

110-
def verify_provenance(self, purl: PackageURL, provenance: list[InTotoPayload]) -> bool:
111-
"""Verify the passed provenance.
112-
113-
Parameters
114-
----------
115-
purl: PackageURL
116-
The PURL of the analysis target.
117-
provenance: list[InTotoPayload]
118-
The list of provenance.
119-
120-
Returns
121-
-------
122-
bool
123-
True if the provenance could be verified, or False otherwise.
124-
"""
125-
if determine_abstract_purl_type(purl) == AbstractPurlType.REPOSITORY:
126-
# Do not perform default verification for repository type targets.
127-
return False
128-
129-
verification_function = None
130-
131-
if purl.type == "npm":
132-
verification_function = partial(verify_npm_provenance, purl, provenance)
133-
134-
# TODO other verification functions go here.
135-
136-
if verification_function:
137-
return verification_function()
138-
139-
logger.debug("Provenance verification not supported for PURL type: %s", purl.type)
140-
return False
141-
142110

143111
def find_npm_provenance(purl: PackageURL, registry: NPMRegistry) -> list[InTotoPayload]:
144112
"""Find and download the NPM based provenance for the passed PURL.
@@ -213,72 +181,6 @@ def find_npm_provenance(purl: PackageURL, registry: NPMRegistry) -> list[InTotoP
213181
return []
214182

215183

216-
def verify_npm_provenance(purl: PackageURL, provenance: list[InTotoPayload]) -> bool:
217-
"""Compare the unsigned payload subject digest with the signed payload digest, if available.
218-
219-
Parameters
220-
----------
221-
purl: PackageURL
222-
The PURL of the analysis target.
223-
provenance: list[InTotoPayload]
224-
The provenances to verify.
225-
226-
Returns
227-
-------
228-
bool
229-
True if the provenance was verified, or False otherwise.
230-
"""
231-
if len(provenance) != 2:
232-
logger.debug("Expected unsigned and signed provenance.")
233-
return False
234-
235-
signed_subjects = provenance[1].statement.get("subject")
236-
if not signed_subjects:
237-
return False
238-
239-
unsigned_subjects = provenance[0].statement.get("subject")
240-
if not unsigned_subjects:
241-
return False
242-
243-
found_signed_subject = None
244-
for signed_subject in signed_subjects:
245-
name = signed_subject.get("name")
246-
if name and name == str(purl):
247-
found_signed_subject = signed_subject
248-
break
249-
250-
if not found_signed_subject:
251-
return False
252-
253-
found_unsigned_subject = None
254-
for unsigned_subject in unsigned_subjects:
255-
name = unsigned_subject.get("name")
256-
if name and name == str(purl):
257-
found_unsigned_subject = unsigned_subject
258-
break
259-
260-
if not found_unsigned_subject:
261-
return False
262-
263-
signed_digest = found_signed_subject.get("digest")
264-
unsigned_digest = found_unsigned_subject.get("digest")
265-
if not (signed_digest and unsigned_digest):
266-
return False
267-
268-
# For signed and unsigned to match, the digests must be identical.
269-
if signed_digest != unsigned_digest:
270-
return False
271-
272-
key = list(signed_digest.keys())[0]
273-
logger.debug(
274-
"Verified provenance against signed companion. Signed: %s, Unsigned: %s.",
275-
signed_digest[key][:7],
276-
unsigned_digest[key][:7],
277-
)
278-
279-
return True
280-
281-
282184
def find_gav_provenance(purl: PackageURL, registry: JFrogMavenRegistry) -> list[InTotoPayload]:
283185
"""Find and download the GAV based provenance for the passed PURL.
284186
@@ -373,7 +275,9 @@ def find_gav_provenance(purl: PackageURL, registry: JFrogMavenRegistry) -> list[
373275
return provenances[:1]
374276

375277

376-
def find_provenance_from_ci(analyze_ctx: AnalyzeContext, git_obj: Git | None) -> InTotoPayload | None:
278+
def find_provenance_from_ci(
279+
analyze_ctx: AnalyzeContext, git_obj: Git | None, download_path: str
280+
) -> InTotoPayload | None:
377281
"""Try to find provenance from CI services of the repository.
378282
379283
Note that we stop going through the CI services once we encounter a CI service
@@ -385,9 +289,11 @@ def find_provenance_from_ci(analyze_ctx: AnalyzeContext, git_obj: Git | None) ->
385289
Parameters
386290
----------
387291
analyze_ctx: AnalyzeContext
388-
The contenxt of the ongoing analysis.
292+
The context of the ongoing analysis.
389293
git_obj: Git | None
390294
The Pydriller Git object representing the repository, if any.
295+
download_path: str
296+
The pre-existing location to download discovered files to.
391297
392298
Returns
393299
-------
@@ -463,9 +369,7 @@ def find_provenance_from_ci(analyze_ctx: AnalyzeContext, git_obj: Git | None) ->
463369
ci_info["provenance_assets"].extend(provenance_assets)
464370

465371
# Download the provenance assets and load the provenance payloads.
466-
download_provenances_from_github_actions_ci_service(
467-
ci_info,
468-
)
372+
download_provenances_from_ci_service(ci_info, download_path)
469373

470374
# TODO consider how to handle multiple payloads here.
471375
return ci_info["provenances"][0].payload if ci_info["provenances"] else None
@@ -476,56 +380,60 @@ def find_provenance_from_ci(analyze_ctx: AnalyzeContext, git_obj: Git | None) ->
476380
return None
477381

478382

479-
def download_provenances_from_github_actions_ci_service(ci_info: CIInfo) -> None:
383+
def download_provenances_from_ci_service(ci_info: CIInfo, download_path: str) -> None:
480384
"""Download provenances from GitHub Actions.
481385
482386
Parameters
483387
----------
484388
ci_info: CIInfo,
485389
A ``CIInfo`` instance that holds a GitHub Actions git service object.
390+
download_path: str
391+
The pre-existing location to download discovered files to.
486392
"""
487393
ci_service = ci_info["service"]
488394
prov_assets = ci_info["provenance_assets"]
489-
395+
if not os.path.isdir(download_path):
396+
logger.debug("Download location is not a valid directory.")
397+
return
490398
try:
491-
with tempfile.TemporaryDirectory() as temp_path:
492-
downloaded_provs = []
493-
for prov_asset in prov_assets:
494-
# Check the size before downloading.
495-
if prov_asset.size_in_bytes > defaults.getint(
496-
"slsa.verifier",
497-
"max_download_size",
498-
fallback=1000000,
499-
):
500-
logger.info(
501-
"Skip verifying the provenance %s: asset size too large.",
502-
prov_asset.name,
503-
)
504-
continue
399+
downloaded_provs = []
400+
for prov_asset in prov_assets:
401+
# Check the size before downloading.
402+
if prov_asset.size_in_bytes > defaults.getint(
403+
"slsa.verifier",
404+
"max_download_size",
405+
fallback=1000000,
406+
):
407+
logger.info(
408+
"Skip verifying the provenance %s: asset size too large.",
409+
prov_asset.name,
410+
)
411+
continue
505412

506-
provenance_filepath = os.path.join(temp_path, prov_asset.name)
413+
provenance_filepath = os.path.join(download_path, prov_asset.name)
507414

508-
if not ci_service.api_client.download_asset(
509-
prov_asset.url,
510-
provenance_filepath,
511-
):
512-
logger.debug(
513-
"Could not download the provenance %s. Skip verifying...",
514-
prov_asset.name,
515-
)
516-
continue
415+
if not ci_service.api_client.download_asset(
416+
prov_asset.url,
417+
provenance_filepath,
418+
):
419+
logger.debug(
420+
"Could not download the provenance %s. Skip verifying...",
421+
prov_asset.name,
422+
)
423+
continue
517424

518-
# Read the provenance.
519-
try:
520-
payload = load_provenance_payload(provenance_filepath)
521-
except LoadIntotoAttestationError as error:
522-
logger.error("Error logging provenance: %s", error)
523-
continue
425+
# Read the provenance.
426+
try:
427+
payload = load_provenance_payload(provenance_filepath)
428+
except LoadIntotoAttestationError as error:
429+
logger.error("Error logging provenance: %s", error)
430+
continue
524431

525-
# Add the provenance file.
526-
downloaded_provs.append(SLSAProvenanceData(payload=payload, asset=prov_asset))
432+
# Add the provenance file.
433+
downloaded_provs.append(SLSAProvenanceData(payload=payload, asset=prov_asset))
527434

528435
# Persist the provenance payloads into the CIInfo object.
529436
ci_info["provenances"] = downloaded_provs
437+
530438
except OSError as error:
531439
logger.error("Error while storing provenance in the temporary directory: %s", error)

0 commit comments

Comments
 (0)