Skip to content

Commit f4fd86f

Browse files
authored
chore: refactor provenance available check (#791)
Signed-off-by: Ben Selwyn-Smith <[email protected]>
1 parent ac8de70 commit f4fd86f

40 files changed

+675
-860
lines changed

src/macaron/repo_finder/provenance_extractor.py

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -243,24 +243,18 @@ def _clean_spdx(uri: str) -> str:
243243
return url
244244

245245

246-
def check_if_input_repo_commit_provenance_conflict(
246+
def check_if_input_repo_provenance_conflict(
247247
repo_path_input: str | None,
248-
digest_input: str | None,
249248
provenance_repo_url: str | None,
250-
provenance_commit_digest: str | None,
251249
) -> bool:
252250
"""Test if the input repo and commit match the contents of the provenance.
253251
254252
Parameters
255253
----------
256254
repo_path_input: str | None
257255
The repo URL from input.
258-
digest_input: str | None
259-
The digest from input.
260256
provenance_repo_url: str | None
261257
The repo URL from provenance.
262-
provenance_commit_digest: str | None
263-
The commit digest from provenance.
264258
265259
Returns
266260
-------
@@ -277,16 +271,6 @@ def check_if_input_repo_commit_provenance_conflict(
277271
)
278272
return True
279273

280-
# Check the provenance commit against the input commit.
281-
if digest_input and provenance_commit_digest and digest_input != provenance_commit_digest:
282-
logger.debug(
283-
"The commit digest from input does not match what exists in the provenance. "
284-
"Input Commit: %s, Provenance Commit: %s.",
285-
digest_input,
286-
provenance_commit_digest,
287-
)
288-
return True
289-
290274
return False
291275

292276

src/macaron/repo_finder/provenance_finder.py

Lines changed: 178 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,22 @@
88
from functools import partial
99

1010
from packageurl import PackageURL
11+
from pydriller import Git
1112

1213
from macaron.config.defaults import defaults
1314
from macaron.repo_finder.commit_finder import AbstractPurlType, determine_abstract_purl_type
15+
from macaron.slsa_analyzer.analyze_context import AnalyzeContext
1416
from macaron.slsa_analyzer.checks.provenance_available_check import ProvenanceAvailableException
17+
from macaron.slsa_analyzer.ci_service import GitHubActions
18+
from macaron.slsa_analyzer.ci_service.base_ci_service import NoneCIService
1519
from macaron.slsa_analyzer.package_registry import PACKAGE_REGISTRIES, JFrogMavenRegistry, NPMRegistry
1620
from macaron.slsa_analyzer.package_registry.npm_registry import NPMAttestationAsset
1721
from macaron.slsa_analyzer.provenance.intoto import InTotoPayload
1822
from macaron.slsa_analyzer.provenance.intoto.errors import LoadIntotoAttestationError
1923
from macaron.slsa_analyzer.provenance.loader import load_provenance_payload
24+
from macaron.slsa_analyzer.provenance.slsa import SLSAProvenanceData
2025
from macaron.slsa_analyzer.provenance.witness import is_witness_provenance_payload, load_witness_verifier_config
26+
from macaron.slsa_analyzer.specs.ci_spec import CIInfo
2127

2228
logger: logging.Logger = logging.getLogger(__name__)
2329

@@ -49,6 +55,8 @@ def find_provenance(self, purl: PackageURL) -> list[InTotoPayload]:
4955
list[InTotoPayload]
5056
The provenance payload, or an empty list if not found.
5157
"""
58+
logger.debug("Seeking provenance of: %s", purl)
59+
5260
if determine_abstract_purl_type(purl) == AbstractPurlType.REPOSITORY:
5361
# Do not perform default discovery for repository type targets.
5462
return []
@@ -331,7 +339,8 @@ def find_gav_provenance(purl: PackageURL, registry: JFrogMavenRegistry) -> list[
331339
logger.error(msg)
332340
raise ProvenanceAvailableException(msg)
333341

334-
provenance_filepaths = []
342+
provenances = []
343+
witness_verifier_config = load_witness_verifier_config()
335344
try:
336345
with tempfile.TemporaryDirectory() as temp_dir:
337346
for provenance_asset in provenance_assets:
@@ -342,28 +351,181 @@ def find_gav_provenance(purl: PackageURL, registry: JFrogMavenRegistry) -> list[
342351
provenance_asset.name,
343352
)
344353
continue
345-
provenance_filepaths.append(provenance_filepath)
346-
except OSError as error:
347-
logger.error("Error while storing provenance in the temporary directory: %s", error)
348-
349-
provenances = []
350-
witness_verifier_config = load_witness_verifier_config()
351354

352-
for provenance_filepath in provenance_filepaths:
353-
try:
354-
provenance_payload = load_provenance_payload(provenance_filepath)
355-
except LoadIntotoAttestationError as error:
356-
logger.error("Error while loading provenance: %s", error)
357-
continue
355+
try:
356+
provenance_payload = load_provenance_payload(provenance_filepath)
357+
except LoadIntotoAttestationError as load_error:
358+
logger.error("Error while loading provenance: %s", load_error)
359+
continue
358360

359-
if not is_witness_provenance_payload(provenance_payload, witness_verifier_config.predicate_types):
360-
continue
361+
if not is_witness_provenance_payload(provenance_payload, witness_verifier_config.predicate_types):
362+
continue
361363

362-
provenances.append(provenance_payload)
364+
provenances.append(provenance_payload)
365+
except OSError as error:
366+
logger.error("Error while storing provenance in the temporary directory: %s", error)
363367

364368
if not provenances:
365369
logger.debug("No payloads found in provenance files.")
366370
return []
367371

368372
# We assume that there is only one provenance per GAV.
369373
return provenances[:1]
374+
375+
376+
def find_provenance_from_ci(analyze_ctx: AnalyzeContext, git_obj: Git | None) -> InTotoPayload | None:
377+
"""Try to find provenance from CI services of the repository.
378+
379+
Note that we stop going through the CI services once we encounter a CI service
380+
that does host provenance assets.
381+
382+
This method also loads the provenance payloads into the ``CIInfo`` object where
383+
the provenance assets are found.
384+
385+
Parameters
386+
----------
387+
analyze_ctx: AnalyzeContext
388+
The contenxt of the ongoing analysis.
389+
git_obj: Git | None
390+
The Pydriller Git object representing the repository, if any.
391+
392+
Returns
393+
-------
394+
InTotoPayload | None
395+
The provenance payload, or None if not found.
396+
"""
397+
provenance_extensions = defaults.get_list(
398+
"slsa.verifier",
399+
"provenance_extensions",
400+
fallback=["intoto.jsonl"],
401+
)
402+
component = analyze_ctx.component
403+
ci_info_entries = analyze_ctx.dynamic_data["ci_services"]
404+
405+
if not component.repository:
406+
logger.debug("Unable to find a provenance because a repository was not found for %s.", component.purl)
407+
return None
408+
409+
repo_full_name = component.repository.full_name
410+
for ci_info in ci_info_entries:
411+
ci_service = ci_info["service"]
412+
413+
if isinstance(ci_service, NoneCIService):
414+
continue
415+
416+
if isinstance(ci_service, GitHubActions):
417+
# Find the release for the software component version being analyzed.
418+
digest = component.repository.commit_sha
419+
tag = None
420+
if git_obj:
421+
# Use the software component commit to find the tag.
422+
if not digest:
423+
logger.debug("Cannot retrieve asset provenance without commit digest.")
424+
return None
425+
tags = git_obj.repo.tags
426+
for _tag in tags:
427+
try:
428+
tag_commit = str(_tag.commit)
429+
except ValueError as error:
430+
logger.debug("Commit of tag is a blob or tree: %s", error)
431+
continue
432+
if tag_commit and tag_commit == digest:
433+
tag = str(_tag)
434+
break
435+
436+
if not tag:
437+
logger.debug("Could not find the tag matching commit: %s", digest)
438+
return None
439+
440+
# Get the correct release using the tag.
441+
release_payload = ci_service.api_client.get_release_by_tag(repo_full_name, tag)
442+
if not release_payload:
443+
logger.debug("Failed to find release matching tag: %s", tag)
444+
return None
445+
446+
# Store the release data for other checks.
447+
ci_info["release"] = release_payload
448+
449+
# Get the provenance assets.
450+
for prov_ext in provenance_extensions:
451+
provenance_assets = ci_service.api_client.fetch_assets(
452+
release_payload,
453+
ext=prov_ext,
454+
)
455+
if not provenance_assets:
456+
continue
457+
458+
logger.info("Found the following provenance assets:")
459+
for provenance_asset in provenance_assets:
460+
logger.info("* %s", provenance_asset.url)
461+
462+
# Store the provenance assets for other checks.
463+
ci_info["provenance_assets"].extend(provenance_assets)
464+
465+
# Download the provenance assets and load the provenance payloads.
466+
download_provenances_from_github_actions_ci_service(
467+
ci_info,
468+
)
469+
470+
# TODO consider how to handle multiple payloads here.
471+
return ci_info["provenances"][0].payload if ci_info["provenances"] else None
472+
473+
else:
474+
logger.debug("CI service not supported for provenance finding: %s", ci_service.name)
475+
476+
return None
477+
478+
479+
def download_provenances_from_github_actions_ci_service(ci_info: CIInfo) -> None:
480+
"""Download provenances from GitHub Actions.
481+
482+
Parameters
483+
----------
484+
ci_info: CIInfo,
485+
A ``CIInfo`` instance that holds a GitHub Actions git service object.
486+
"""
487+
ci_service = ci_info["service"]
488+
prov_assets = ci_info["provenance_assets"]
489+
490+
try:
491+
with tempfile.TemporaryDirectory() as temp_path:
492+
downloaded_provs = []
493+
for prov_asset in prov_assets:
494+
# Check the size before downloading.
495+
if prov_asset.size_in_bytes > defaults.getint(
496+
"slsa.verifier",
497+
"max_download_size",
498+
fallback=1000000,
499+
):
500+
logger.info(
501+
"Skip verifying the provenance %s: asset size too large.",
502+
prov_asset.name,
503+
)
504+
continue
505+
506+
provenance_filepath = os.path.join(temp_path, prov_asset.name)
507+
508+
if not ci_service.api_client.download_asset(
509+
prov_asset.url,
510+
provenance_filepath,
511+
):
512+
logger.debug(
513+
"Could not download the provenance %s. Skip verifying...",
514+
prov_asset.name,
515+
)
516+
continue
517+
518+
# Read the provenance.
519+
try:
520+
payload = load_provenance_payload(provenance_filepath)
521+
except LoadIntotoAttestationError as error:
522+
logger.error("Error logging provenance: %s", error)
523+
continue
524+
525+
# Add the provenance file.
526+
downloaded_provs.append(SLSAProvenanceData(payload=payload, asset=prov_asset))
527+
528+
# Persist the provenance payloads into the CIInfo object.
529+
ci_info["provenances"] = downloaded_provs
530+
except OSError as error:
531+
logger.error("Error while storing provenance in the temporary directory: %s", error)

src/macaron/slsa_analyzer/analyze_context.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ def provenances(self) -> dict[str, list[InTotoV01Statement | InTotoV1Statement]]
157157
result: dict[str, list[InTotoV01Statement | InTotoV1Statement]] = defaultdict(list)
158158
for ci_info in ci_services:
159159
result[ci_info["service"].name].extend(
160-
prov_asset.payload.statement for prov_asset in ci_info["provenances"]
160+
provenance.payload.statement for provenance in ci_info["provenances"]
161161
)
162162
package_registry_entries = self.dynamic_data["package_registries"]
163163
for package_registry_entry in package_registry_entries:

0 commit comments

Comments
 (0)