8
8
from functools import partial
9
9
10
10
from packageurl import PackageURL
11
+ from pydriller import Git
11
12
12
13
from macaron .config .defaults import defaults
13
14
from macaron .repo_finder .commit_finder import AbstractPurlType , determine_abstract_purl_type
15
+ from macaron .slsa_analyzer .analyze_context import AnalyzeContext
14
16
from macaron .slsa_analyzer .checks .provenance_available_check import ProvenanceAvailableException
17
+ from macaron .slsa_analyzer .ci_service import GitHubActions
18
+ from macaron .slsa_analyzer .ci_service .base_ci_service import NoneCIService
15
19
from macaron .slsa_analyzer .package_registry import PACKAGE_REGISTRIES , JFrogMavenRegistry , NPMRegistry
16
20
from macaron .slsa_analyzer .package_registry .npm_registry import NPMAttestationAsset
17
21
from macaron .slsa_analyzer .provenance .intoto import InTotoPayload
18
22
from macaron .slsa_analyzer .provenance .intoto .errors import LoadIntotoAttestationError
19
23
from macaron .slsa_analyzer .provenance .loader import load_provenance_payload
24
+ from macaron .slsa_analyzer .provenance .slsa import SLSAProvenanceData
20
25
from macaron .slsa_analyzer .provenance .witness import is_witness_provenance_payload , load_witness_verifier_config
26
+ from macaron .slsa_analyzer .specs .ci_spec import CIInfo
21
27
22
28
logger : logging .Logger = logging .getLogger (__name__ )
23
29
@@ -49,6 +55,8 @@ def find_provenance(self, purl: PackageURL) -> list[InTotoPayload]:
49
55
list[InTotoPayload]
50
56
The provenance payload, or an empty list if not found.
51
57
"""
58
+ logger .debug ("Seeking provenance of: %s" , purl )
59
+
52
60
if determine_abstract_purl_type (purl ) == AbstractPurlType .REPOSITORY :
53
61
# Do not perform default discovery for repository type targets.
54
62
return []
@@ -331,7 +339,8 @@ def find_gav_provenance(purl: PackageURL, registry: JFrogMavenRegistry) -> list[
331
339
logger .error (msg )
332
340
raise ProvenanceAvailableException (msg )
333
341
334
- provenance_filepaths = []
342
+ provenances = []
343
+ witness_verifier_config = load_witness_verifier_config ()
335
344
try :
336
345
with tempfile .TemporaryDirectory () as temp_dir :
337
346
for provenance_asset in provenance_assets :
@@ -342,28 +351,181 @@ def find_gav_provenance(purl: PackageURL, registry: JFrogMavenRegistry) -> list[
342
351
provenance_asset .name ,
343
352
)
344
353
continue
345
- provenance_filepaths .append (provenance_filepath )
346
- except OSError as error :
347
- logger .error ("Error while storing provenance in the temporary directory: %s" , error )
348
-
349
- provenances = []
350
- witness_verifier_config = load_witness_verifier_config ()
351
354
352
- for provenance_filepath in provenance_filepaths :
353
- try :
354
- provenance_payload = load_provenance_payload (provenance_filepath )
355
- except LoadIntotoAttestationError as error :
356
- logger .error ("Error while loading provenance: %s" , error )
357
- continue
355
+ try :
356
+ provenance_payload = load_provenance_payload (provenance_filepath )
357
+ except LoadIntotoAttestationError as load_error :
358
+ logger .error ("Error while loading provenance: %s" , load_error )
359
+ continue
358
360
359
- if not is_witness_provenance_payload (provenance_payload , witness_verifier_config .predicate_types ):
360
- continue
361
+ if not is_witness_provenance_payload (provenance_payload , witness_verifier_config .predicate_types ):
362
+ continue
361
363
362
- provenances .append (provenance_payload )
364
+ provenances .append (provenance_payload )
365
+ except OSError as error :
366
+ logger .error ("Error while storing provenance in the temporary directory: %s" , error )
363
367
364
368
if not provenances :
365
369
logger .debug ("No payloads found in provenance files." )
366
370
return []
367
371
368
372
# We assume that there is only one provenance per GAV.
369
373
return provenances [:1 ]
374
+
375
+
376
+ def find_provenance_from_ci (analyze_ctx : AnalyzeContext , git_obj : Git | None ) -> InTotoPayload | None :
377
+ """Try to find provenance from CI services of the repository.
378
+
379
+ Note that we stop going through the CI services once we encounter a CI service
380
+ that does host provenance assets.
381
+
382
+ This method also loads the provenance payloads into the ``CIInfo`` object where
383
+ the provenance assets are found.
384
+
385
+ Parameters
386
+ ----------
387
+ analyze_ctx: AnalyzeContext
388
+ The contenxt of the ongoing analysis.
389
+ git_obj: Git | None
390
+ The Pydriller Git object representing the repository, if any.
391
+
392
+ Returns
393
+ -------
394
+ InTotoPayload | None
395
+ The provenance payload, or None if not found.
396
+ """
397
+ provenance_extensions = defaults .get_list (
398
+ "slsa.verifier" ,
399
+ "provenance_extensions" ,
400
+ fallback = ["intoto.jsonl" ],
401
+ )
402
+ component = analyze_ctx .component
403
+ ci_info_entries = analyze_ctx .dynamic_data ["ci_services" ]
404
+
405
+ if not component .repository :
406
+ logger .debug ("Unable to find a provenance because a repository was not found for %s." , component .purl )
407
+ return None
408
+
409
+ repo_full_name = component .repository .full_name
410
+ for ci_info in ci_info_entries :
411
+ ci_service = ci_info ["service" ]
412
+
413
+ if isinstance (ci_service , NoneCIService ):
414
+ continue
415
+
416
+ if isinstance (ci_service , GitHubActions ):
417
+ # Find the release for the software component version being analyzed.
418
+ digest = component .repository .commit_sha
419
+ tag = None
420
+ if git_obj :
421
+ # Use the software component commit to find the tag.
422
+ if not digest :
423
+ logger .debug ("Cannot retrieve asset provenance without commit digest." )
424
+ return None
425
+ tags = git_obj .repo .tags
426
+ for _tag in tags :
427
+ try :
428
+ tag_commit = str (_tag .commit )
429
+ except ValueError as error :
430
+ logger .debug ("Commit of tag is a blob or tree: %s" , error )
431
+ continue
432
+ if tag_commit and tag_commit == digest :
433
+ tag = str (_tag )
434
+ break
435
+
436
+ if not tag :
437
+ logger .debug ("Could not find the tag matching commit: %s" , digest )
438
+ return None
439
+
440
+ # Get the correct release using the tag.
441
+ release_payload = ci_service .api_client .get_release_by_tag (repo_full_name , tag )
442
+ if not release_payload :
443
+ logger .debug ("Failed to find release matching tag: %s" , tag )
444
+ return None
445
+
446
+ # Store the release data for other checks.
447
+ ci_info ["release" ] = release_payload
448
+
449
+ # Get the provenance assets.
450
+ for prov_ext in provenance_extensions :
451
+ provenance_assets = ci_service .api_client .fetch_assets (
452
+ release_payload ,
453
+ ext = prov_ext ,
454
+ )
455
+ if not provenance_assets :
456
+ continue
457
+
458
+ logger .info ("Found the following provenance assets:" )
459
+ for provenance_asset in provenance_assets :
460
+ logger .info ("* %s" , provenance_asset .url )
461
+
462
+ # Store the provenance assets for other checks.
463
+ ci_info ["provenance_assets" ].extend (provenance_assets )
464
+
465
+ # Download the provenance assets and load the provenance payloads.
466
+ download_provenances_from_github_actions_ci_service (
467
+ ci_info ,
468
+ )
469
+
470
+ # TODO consider how to handle multiple payloads here.
471
+ return ci_info ["provenances" ][0 ].payload if ci_info ["provenances" ] else None
472
+
473
+ else :
474
+ logger .debug ("CI service not supported for provenance finding: %s" , ci_service .name )
475
+
476
+ return None
477
+
478
+
479
+ def download_provenances_from_github_actions_ci_service (ci_info : CIInfo ) -> None :
480
+ """Download provenances from GitHub Actions.
481
+
482
+ Parameters
483
+ ----------
484
+ ci_info: CIInfo,
485
+ A ``CIInfo`` instance that holds a GitHub Actions git service object.
486
+ """
487
+ ci_service = ci_info ["service" ]
488
+ prov_assets = ci_info ["provenance_assets" ]
489
+
490
+ try :
491
+ with tempfile .TemporaryDirectory () as temp_path :
492
+ downloaded_provs = []
493
+ for prov_asset in prov_assets :
494
+ # Check the size before downloading.
495
+ if prov_asset .size_in_bytes > defaults .getint (
496
+ "slsa.verifier" ,
497
+ "max_download_size" ,
498
+ fallback = 1000000 ,
499
+ ):
500
+ logger .info (
501
+ "Skip verifying the provenance %s: asset size too large." ,
502
+ prov_asset .name ,
503
+ )
504
+ continue
505
+
506
+ provenance_filepath = os .path .join (temp_path , prov_asset .name )
507
+
508
+ if not ci_service .api_client .download_asset (
509
+ prov_asset .url ,
510
+ provenance_filepath ,
511
+ ):
512
+ logger .debug (
513
+ "Could not download the provenance %s. Skip verifying..." ,
514
+ prov_asset .name ,
515
+ )
516
+ continue
517
+
518
+ # Read the provenance.
519
+ try :
520
+ payload = load_provenance_payload (provenance_filepath )
521
+ except LoadIntotoAttestationError as error :
522
+ logger .error ("Error logging provenance: %s" , error )
523
+ continue
524
+
525
+ # Add the provenance file.
526
+ downloaded_provs .append (SLSAProvenanceData (payload = payload , asset = prov_asset ))
527
+
528
+ # Persist the provenance payloads into the CIInfo object.
529
+ ci_info ["provenances" ] = downloaded_provs
530
+ except OSError as error :
531
+ logger .error ("Error while storing provenance in the temporary directory: %s" , error )
0 commit comments