Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Full fledged processing of protection profiles #466

Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/sec_certs/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ class Configuration(BaseSettings):
"https://sec-certs.org/cc/cc.tar.gz",
description="URL from where to fetch the latest full archive of fully processed CC dataset.",
)
pp_latest_full_archive: AnyHttpUrl = Field(
"https://sec-certs.org/cc/pp.tar.gz",
description="URL from where to fetch the latest full archive of fully processed PP dataset.",
)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The pp_latest_snapshot config also needs to change. It will no longer live on the /static/ subdir. But have the same layout as the CC and FIPS datasets. Could you make the change pls?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, I wanted to discuss this first before changing this.

cc_maintenances_latest_snapshot: AnyHttpUrl = Field(
"https://sec-certs.org/cc/maintenance_updates.json",
description="URL from where to fetch the latest snapshot of CC maintenance updates",
Expand Down
38 changes: 38 additions & 0 deletions src/sec_certs/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,44 @@

CPE_VERSION_NA = "-"

CC_CAT_ABBREVIATIONS = [
"AC",
"BD",
"BP",
"DP",
"DB",
"DD",
"IC",
"KM",
"MD",
"MF",
"NS",
"OS",
"OD",
"DG",
"TC",
]

CC_CATEGORIES = [
"Access Control Devices and Systems",
"Biometric Systems and Devices",
"Boundary Protection Devices and Systems",
"Data Protection",
"Databases",
"Detection Devices and Systems",
"ICs, Smart Cards and Smart Card-Related Devices and Systems",
"Key Management Systems",
"Mobility",
"Multi-Function Devices",
"Network and Network-Related Devices and Systems",
"Operating Systems",
"Other Devices and Systems",
"Products for Digital Signatures",
"Trusted Computing",
]

CC_PORTAL_BASE_URL = "https://www.commoncriteriaportal.org"

RELEASE_CANDIDATE_REGEX: re.Pattern = re.compile(r"rc\d{0,2}$", re.IGNORECASE)

FIPS_BASE_URL = "https://csrc.nist.gov"
Expand Down
69 changes: 45 additions & 24 deletions src/sec_certs/dataset/auxiliary_dataset_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,14 @@
from abc import ABC, abstractmethod
from collections.abc import Iterable
from pathlib import Path
from typing import Any
from typing import Any, ClassVar

from sec_certs import constants
from sec_certs.configuration import config
from sec_certs.dataset.cc_scheme import CCSchemeDataset
from sec_certs.dataset.cpe import CPEDataset
from sec_certs.dataset.cve import CVEDataset
from sec_certs.dataset.fips_algorithm import FIPSAlgorithmDataset
from sec_certs.dataset.protection_profile import ProtectionProfileDataset
from sec_certs.sample.cc import CCCertificate
from sec_certs.sample.cc_maintenance_update import CCMaintenanceUpdate
from sec_certs.utils import helpers
Expand All @@ -25,17 +24,25 @@


class AuxiliaryDatasetHandler(ABC):
def __init__(self, root_dir: str | Path) -> None:
self.root_dir = Path(root_dir)
RELATIVE_DIR: ClassVar[str | None] = None

def __init__(self, aux_datasets_dir: str | Path) -> None:
self.aux_datasets_dir = Path(aux_datasets_dir)
self.dset: Any

@property
def root_dir(self) -> Path:
if self.RELATIVE_DIR:
return self.aux_datasets_dir / Path(self.RELATIVE_DIR)
return self.aux_datasets_dir

@property
@abstractmethod
def dset_path(self) -> Path:
raise NotImplementedError("Not meant to be implemented by base class")

Check warning on line 42 in src/sec_certs/dataset/auxiliary_dataset_handling.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/auxiliary_dataset_handling.py#L42

Added line #L42 was not covered by tests

def set_local_paths(self, new_root_dir: str | Path) -> None:
self.root_dir = Path(new_root_dir)
def set_local_paths(self, aux_datasets_dir: str | Path) -> None:
self.aux_datasets_dir = Path(aux_datasets_dir)

def process_dataset(self, download_fresh: bool = False) -> None:
self.root_dir.mkdir(parents=True, exist_ok=True)
Expand All @@ -43,94 +50,94 @@

@abstractmethod
def load_dataset(self) -> None:
raise NotImplementedError("Not meant to be implemented by base class")

Check warning on line 53 in src/sec_certs/dataset/auxiliary_dataset_handling.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/auxiliary_dataset_handling.py#L53

Added line #L53 was not covered by tests

@abstractmethod
def _process_dataset_body(self, download_fresh: bool = False) -> None:
raise NotImplementedError("Not meant to be implemented by base class")

Check warning on line 57 in src/sec_certs/dataset/auxiliary_dataset_handling.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/auxiliary_dataset_handling.py#L57

Added line #L57 was not covered by tests


class CPEDatasetHandler(AuxiliaryDatasetHandler):
@property
def dset_path(self) -> Path:
return self.root_dir / "cpe_dataset.json"

Check warning on line 63 in src/sec_certs/dataset/auxiliary_dataset_handling.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/auxiliary_dataset_handling.py#L63

Added line #L63 was not covered by tests

@staged(logger, "Processing CPE dataset")
def _process_dataset_body(self, download_fresh: bool = False) -> None:
if self.dset_path.exists():
logger.info("Preparing CPEDataset from json.")
self.dset = CPEDataset.from_json(self.dset_path)

Check warning on line 69 in src/sec_certs/dataset/auxiliary_dataset_handling.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/auxiliary_dataset_handling.py#L67-L69

Added lines #L67 - L69 were not covered by tests
else:
self.dset = CPEDataset(json_path=self.dset_path)
download_fresh = True

Check warning on line 72 in src/sec_certs/dataset/auxiliary_dataset_handling.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/auxiliary_dataset_handling.py#L71-L72

Added lines #L71 - L72 were not covered by tests

if download_fresh:
if config.preferred_source_nvd_datasets == "api":
logger.info("Fetching new CPE records from NVD API")
with CpeNvdDatasetBuilder(api_key=config.nvd_api_key) as builder:
self.dset = builder.build_dataset(self.dset)

Check warning on line 78 in src/sec_certs/dataset/auxiliary_dataset_handling.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/auxiliary_dataset_handling.py#L74-L78

Added lines #L74 - L78 were not covered by tests
else:
logger.info("Preparing CPEDataset from sec-certs.org.")
self.dset = CPEDataset.from_web(self.dset_path)
self.dset.to_json()

Check warning on line 82 in src/sec_certs/dataset/auxiliary_dataset_handling.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/auxiliary_dataset_handling.py#L80-L82

Added lines #L80 - L82 were not covered by tests

def load_dataset(self) -> None:
self.dset = CPEDataset.from_json(self.dset_path)

Check warning on line 85 in src/sec_certs/dataset/auxiliary_dataset_handling.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/auxiliary_dataset_handling.py#L85

Added line #L85 was not covered by tests


class CVEDatasetHandler(AuxiliaryDatasetHandler):
@property
def dset_path(self) -> Path:
return self.root_dir / "cve_dataset.json"

Check warning on line 91 in src/sec_certs/dataset/auxiliary_dataset_handling.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/auxiliary_dataset_handling.py#L91

Added line #L91 was not covered by tests

@staged(logger, "Processing CVE dataset")
def _process_dataset_body(self, download_fresh: bool = False) -> None:
if self.dset_path.exists():
logger.info("Preparing CVEDataset from json.")
self.dset = CVEDataset.from_json(self.dset_path)

Check warning on line 97 in src/sec_certs/dataset/auxiliary_dataset_handling.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/auxiliary_dataset_handling.py#L95-L97

Added lines #L95 - L97 were not covered by tests
else:
self.dset = CVEDataset(json_path=self.dset_path)
download_fresh = True

Check warning on line 100 in src/sec_certs/dataset/auxiliary_dataset_handling.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/auxiliary_dataset_handling.py#L99-L100

Added lines #L99 - L100 were not covered by tests

if download_fresh:
if config.preferred_source_nvd_datasets == "api":
logger.info("Fetching new CVE records from NVD API.")
with CveNvdDatasetBuilder(api_key=config.nvd_api_key) as builder:
self.dset = builder.build_dataset(self.dset)

Check warning on line 106 in src/sec_certs/dataset/auxiliary_dataset_handling.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/auxiliary_dataset_handling.py#L102-L106

Added lines #L102 - L106 were not covered by tests
else:
logger.info("Preparing CVEDataset from sec-certs.org")
self.dset = CVEDataset.from_web(self.dset_path)
self.dset.to_json()

Check warning on line 110 in src/sec_certs/dataset/auxiliary_dataset_handling.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/auxiliary_dataset_handling.py#L108-L110

Added lines #L108 - L110 were not covered by tests

def load_dataset(self):
self.dset = CVEDataset.from_json(self.dset_path)

Check warning on line 113 in src/sec_certs/dataset/auxiliary_dataset_handling.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/auxiliary_dataset_handling.py#L113

Added line #L113 was not covered by tests


class CPEMatchDictHandler(AuxiliaryDatasetHandler):
@property
def dset_path(self) -> Path:
return self.root_dir / "cpe_match.json"

Check warning on line 119 in src/sec_certs/dataset/auxiliary_dataset_handling.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/auxiliary_dataset_handling.py#L119

Added line #L119 was not covered by tests

@staged(logger, "Processing CPE Match dictionary")
def _process_dataset_body(self, download_fresh: bool = False) -> None:
if self.dset_path.exists():
logger.info("Preparing CPE Match feed from json.")
with self.dset_path.open("r") as handle:
self.dset = json.load(handle)

Check warning on line 126 in src/sec_certs/dataset/auxiliary_dataset_handling.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/auxiliary_dataset_handling.py#L123-L126

Added lines #L123 - L126 were not covered by tests
else:
self.dset = CpeMatchNvdDatasetBuilder._init_new_dataset()
download_fresh = True

Check warning on line 129 in src/sec_certs/dataset/auxiliary_dataset_handling.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/auxiliary_dataset_handling.py#L128-L129

Added lines #L128 - L129 were not covered by tests

if download_fresh:
if config.preferred_source_nvd_datasets == "api":
logger.info("Fetchnig CPE Match feed from NVD APi.")
with CpeMatchNvdDatasetBuilder(api_key=config.nvd_api_key) as builder:
self.dset = builder.build_dataset(self.dset)

Check warning on line 135 in src/sec_certs/dataset/auxiliary_dataset_handling.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/auxiliary_dataset_handling.py#L131-L135

Added lines #L131 - L135 were not covered by tests
else:
logger.info("Preparing CPE Match feed from sec-certs.org.")
with tempfile.TemporaryDirectory() as tmp_dir:
dset_path = Path(tmp_dir) / "cpe_match_feed.json.gz"
if (

Check warning on line 140 in src/sec_certs/dataset/auxiliary_dataset_handling.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/auxiliary_dataset_handling.py#L137-L140

Added lines #L137 - L140 were not covered by tests
not helpers.download_file(
config.cpe_match_latest_snapshot,
dset_path,
Expand All @@ -138,40 +145,44 @@
)
== constants.RESPONSE_OK
):
raise RuntimeError(

Check warning on line 148 in src/sec_certs/dataset/auxiliary_dataset_handling.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/auxiliary_dataset_handling.py#L148

Added line #L148 was not covered by tests
f"Could not download CPE Match feed from {config.cpe_match_latest_snapshot}."
)
with gzip.open(str(dset_path)) as handle:
json_str = handle.read().decode("utf-8")
self.dset = json.loads(json_str)
with self.dset_path.open("w") as handle:
json.dump(self.dset, handle, indent=4)

Check warning on line 155 in src/sec_certs/dataset/auxiliary_dataset_handling.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/auxiliary_dataset_handling.py#L151-L155

Added lines #L151 - L155 were not covered by tests

def load_dataset(self):
with self.dset_path.open("r") as handle:
self.dset = json.load(handle)

Check warning on line 159 in src/sec_certs/dataset/auxiliary_dataset_handling.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/auxiliary_dataset_handling.py#L158-L159

Added lines #L158 - L159 were not covered by tests


class FIPSAlgorithmDatasetHandler(AuxiliaryDatasetHandler):
@property
def dset_path(self) -> Path:
return self.root_dir / "algorithms.json"

Check warning on line 165 in src/sec_certs/dataset/auxiliary_dataset_handling.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/auxiliary_dataset_handling.py#L165

Added line #L165 was not covered by tests

@staged(logger, "Processing FIPS Algorithms")
def _process_dataset_body(self, download_fresh: bool = False) -> None:
if not self.dset_path.exists() or download_fresh:
self.dset = FIPSAlgorithmDataset.from_web(self.dset_path)
self.dset.to_json()

Check warning on line 171 in src/sec_certs/dataset/auxiliary_dataset_handling.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/auxiliary_dataset_handling.py#L169-L171

Added lines #L169 - L171 were not covered by tests
else:
self.dset = FIPSAlgorithmDataset.from_json(self.dset_path)

Check warning on line 173 in src/sec_certs/dataset/auxiliary_dataset_handling.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/auxiliary_dataset_handling.py#L173

Added line #L173 was not covered by tests

def load_dataset(self):
self.dset = FIPSAlgorithmDataset.from_json(self.dset_path)

Check warning on line 176 in src/sec_certs/dataset/auxiliary_dataset_handling.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/auxiliary_dataset_handling.py#L176

Added line #L176 was not covered by tests


class CCSchemeDatasetHandler(AuxiliaryDatasetHandler):
def __init__(self, root_dir: str | Path = constants.DUMMY_NONEXISTING_PATH, only_schemes: set[str] | None = None):
self.root_dir = Path(root_dir)
def __init__(
self,
aux_datasets_dir: str | Path = constants.DUMMY_NONEXISTING_PATH,
only_schemes: set[str] | None = None,
):
self.aux_datasets_dir = Path(aux_datasets_dir)
self.only_schemes = only_schemes
self.dset: Any

Expand All @@ -185,68 +196,78 @@
self.dset = CCSchemeDataset.from_web(self.dset_path, self.only_schemes)
self.dset.to_json()
else:
self.dset = CCSchemeDataset.from_json(self.dset_path)

Check warning on line 199 in src/sec_certs/dataset/auxiliary_dataset_handling.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/auxiliary_dataset_handling.py#L199

Added line #L199 was not covered by tests

def load_dataset(self):
self.dset = CCSchemeDataset.from_json(self.dset_path)

Check warning on line 202 in src/sec_certs/dataset/auxiliary_dataset_handling.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/auxiliary_dataset_handling.py#L202

Added line #L202 was not covered by tests


class CCMaintenanceUpdateDatasetHandler(AuxiliaryDatasetHandler):
RELATIVE_DIR: ClassVar[str] = "maintenances"

def __init__(
self, root_dir: str | Path = constants.DUMMY_NONEXISTING_PATH, certs_with_updates: Iterable[CCCertificate] = []
self,
aux_datasets_dir: str | Path = constants.DUMMY_NONEXISTING_PATH,
certs_with_updates: Iterable[CCCertificate] = [],
) -> None:
self.root_dir = Path(root_dir)
self.aux_datasets_dir = Path(aux_datasets_dir)
self.certs_with_updates = certs_with_updates
self.dset: Any

@property
def dset_path(self) -> Path:
return self.root_dir / "maintenances"

@property
def _dset_json_path(self) -> Path:
return self.dset_path / "maintenance_updates.json"
return self.root_dir / "maintenance_updates.json"

Check warning on line 219 in src/sec_certs/dataset/auxiliary_dataset_handling.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/auxiliary_dataset_handling.py#L219

Added line #L219 was not covered by tests

def load_dataset(self) -> None:
from sec_certs.dataset.cc import CCDatasetMaintenanceUpdates

Check warning on line 222 in src/sec_certs/dataset/auxiliary_dataset_handling.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/auxiliary_dataset_handling.py#L222

Added line #L222 was not covered by tests

self.dset = CCDatasetMaintenanceUpdates.from_json(self._dset_json_path)
self.dset = CCDatasetMaintenanceUpdates.from_json(self.dset_path)

Check warning on line 224 in src/sec_certs/dataset/auxiliary_dataset_handling.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/auxiliary_dataset_handling.py#L224

Added line #L224 was not covered by tests

@staged(logger, "Processing CC Maintenance updates")
def _process_dataset_body(self, download_fresh: bool = False):
from sec_certs.dataset.cc import CCDatasetMaintenanceUpdates

Check warning on line 228 in src/sec_certs/dataset/auxiliary_dataset_handling.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/auxiliary_dataset_handling.py#L228

Added line #L228 was not covered by tests

if not self.dset_path.exists() or download_fresh:
updates = list(

Check warning on line 231 in src/sec_certs/dataset/auxiliary_dataset_handling.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/auxiliary_dataset_handling.py#L230-L231

Added lines #L230 - L231 were not covered by tests
itertools.chain.from_iterable(
CCMaintenanceUpdate.get_updates_from_cc_cert(x) for x in self.certs_with_updates
)
)
self.dset = CCDatasetMaintenanceUpdates(

Check warning on line 236 in src/sec_certs/dataset/auxiliary_dataset_handling.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/auxiliary_dataset_handling.py#L236

Added line #L236 was not covered by tests
{x.dgst: x for x in updates}, root_dir=self.dset_path, name="maintenance_updates"
{x.dgst: x for x in updates}, root_dir=self.dset_path.parent, name="maintenance_updates"
)
else:
self.dset = CCDatasetMaintenanceUpdates.from_json(self._dset_json_path)

if not self.dset.state.artifacts_downloaded:
self.dset.download_all_artifacts()
if not self.dset.state.pdfs_converted:
self.dset.convert_all_pdfs()
if not self.dset.state.certs_analyzed:
self.dset.extract_data()

Check warning on line 241 in src/sec_certs/dataset/auxiliary_dataset_handling.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/auxiliary_dataset_handling.py#L239-L241

Added lines #L239 - L241 were not covered by tests
else:
self.dset = CCDatasetMaintenanceUpdates.from_json(self.dset_path)

Check warning on line 243 in src/sec_certs/dataset/auxiliary_dataset_handling.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/auxiliary_dataset_handling.py#L243

Added line #L243 was not covered by tests


class ProtectionProfileDatasetHandler(AuxiliaryDatasetHandler):
RELATIVE_DIR: ClassVar[str] = "protection_profiles"

def __init__(self, aux_datasets_dir: str | Path = constants.DUMMY_NONEXISTING_PATH):
self.aux_datasets_dir = Path(aux_datasets_dir)

@property
def dset_path(self) -> Path:
return self.root_dir / "pp.json"

def load_dataset(self) -> None:
from sec_certs.dataset.protection_profile import ProtectionProfileDataset

Check warning on line 257 in src/sec_certs/dataset/auxiliary_dataset_handling.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/auxiliary_dataset_handling.py#L257

Added line #L257 was not covered by tests

self.dset = ProtectionProfileDataset.from_json(self.dset_path)

Check warning on line 259 in src/sec_certs/dataset/auxiliary_dataset_handling.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/auxiliary_dataset_handling.py#L259

Added line #L259 was not covered by tests

@staged(logger, "Processing Protection profiles")
def _process_dataset_body(self, download_fresh: bool = False):
from sec_certs.dataset.protection_profile import ProtectionProfileDataset

if not self.dset_path.exists() or download_fresh:
self.dset = ProtectionProfileDataset.from_web(self.dset_path)
self.dset_path.parent.mkdir(exist_ok=True, parents=True)
self.dset = ProtectionProfileDataset(root_dir=self.dset_path.parent)
self.dset.get_certs_from_web()
self.dset.download_all_artifacts()
self.dset.convert_all_pdfs()
self.dset.analyze_certificates()

Check warning on line 271 in src/sec_certs/dataset/auxiliary_dataset_handling.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/auxiliary_dataset_handling.py#L266-L271

Added lines #L266 - L271 were not covered by tests
else:
self.dset = ProtectionProfileDataset.from_json(self.dset_path)
102 changes: 46 additions & 56 deletions src/sec_certs/dataset/cc.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import pandas as pd
from bs4 import BeautifulSoup, Tag

import sec_certs.utils.sanitization
from sec_certs import constants
from sec_certs.configuration import config
from sec_certs.dataset.auxiliary_dataset_handling import (
Expand All @@ -27,6 +26,7 @@
from sec_certs.dataset.dataset import Dataset, logger
from sec_certs.heuristics.cc import (
compute_cert_labs,
compute_eals,
compute_normalized_cert_ids,
compute_references,
compute_sars,
Expand All @@ -36,7 +36,6 @@
from sec_certs.heuristics.common import compute_cpe_heuristics, compute_related_cves, compute_transitive_vulnerabilities
from sec_certs.sample.cc import CCCertificate
from sec_certs.sample.cc_maintenance_update import CCMaintenanceUpdate
from sec_certs.sample.protection_profile import ProtectionProfile
from sec_certs.serialization.json import ComplexSerializableType, serialize
from sec_certs.utils import helpers, sanitization
from sec_certs.utils import parallel_processing as cert_processing
Expand Down Expand Up @@ -73,10 +72,10 @@
self.aux_handlers[CPEMatchDictHandler] = CPEMatchDictHandler(self.auxiliary_datasets_dir)
self.aux_handlers[CCSchemeDatasetHandler] = CCSchemeDatasetHandler(self.auxiliary_datasets_dir)
self.aux_handlers[ProtectionProfileDatasetHandler] = ProtectionProfileDatasetHandler(
self.auxiliary_datasets_dir
self.auxiliary_datasets_dir / "protection_profiles"
)
self.aux_handlers[CCMaintenanceUpdateDatasetHandler] = CCMaintenanceUpdateDatasetHandler(
self.auxiliary_datasets_dir
self.auxiliary_datasets_dir / "maintenances"
)

def to_pandas(self) -> pd.DataFrame:
Expand Down Expand Up @@ -273,12 +272,34 @@
self.certificates_txt_dir,
)

def process_auxiliary_datasets(self, download_fresh: bool = False) -> None:
self.aux_handlers[CCMaintenanceUpdateDatasetHandler].certs_with_updates = [ # type: ignore
x for x in self if x.maintenance_updates
]
self.aux_handlers[CCSchemeDatasetHandler].only_schemes = {x.scheme for x in self} # type: ignore
super().process_auxiliary_datasets(download_fresh)
def process_auxiliary_datasets(
self,
download_fresh: bool = False,
processed_pp_dataset_root_dir: Path | None = None,
skip_schemes: bool = False,
**kwargs,
) -> None:
if CCMaintenanceUpdateDatasetHandler in self.aux_handlers:
self.aux_handlers[CCMaintenanceUpdateDatasetHandler].certs_with_updates = [ # type: ignore

Check warning on line 283 in src/sec_certs/dataset/cc.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/cc.py#L282-L283

Added lines #L282 - L283 were not covered by tests
x for x in self if x.maintenance_updates
]
if CCSchemeDatasetHandler in self.aux_handlers:
self.aux_handlers[CCSchemeDatasetHandler].only_schemes = {x.scheme for x in self} # type: ignore

Check warning on line 287 in src/sec_certs/dataset/cc.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/cc.py#L286-L287

Added lines #L286 - L287 were not covered by tests

if processed_pp_dataset_root_dir:
if self.aux_handlers[ProtectionProfileDatasetHandler].root_dir.exists():
logger.warning(

Check warning on line 291 in src/sec_certs/dataset/cc.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/cc.py#L289-L291

Added lines #L289 - L291 were not covered by tests
f"Overwriting PP Dataset at {self.aux_handlers[ProtectionProfileDatasetHandler].root_dir} with dataset from {processed_pp_dataset_root_dir}."
)
shutil.copytree(

Check warning on line 294 in src/sec_certs/dataset/cc.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/cc.py#L294

Added line #L294 was not covered by tests
processed_pp_dataset_root_dir,
self.aux_handlers[ProtectionProfileDatasetHandler].root_dir,
dirs_exist_ok=True,
)

if skip_schemes:
del self.aux_handlers[CCSchemeDatasetHandler]
super().process_auxiliary_datasets(download_fresh, **kwargs)

Check warning on line 302 in src/sec_certs/dataset/cc.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/cc.py#L300-L302

Added lines #L300 - L302 were not covered by tests

def _merge_certs(self, certs: dict[str, CCCertificate], cert_source: str | None = None) -> None:
"""
Expand Down Expand Up @@ -447,13 +468,6 @@
df_base = df_base.drop_duplicates(subset=["dgst"])
df_main = df_main.drop_duplicates()

profiles = {
x.dgst: {
ProtectionProfile(pp_name=y, pp_eal=None)
for y in sec_certs.utils.sanitization.sanitize_protection_profiles(x.protection_profiles)
}
for x in df_base.itertuples()
}
updates: dict[str, set] = {x.dgst: set() for x in df_base.itertuples()}
for x in df_main.itertuples():
updates[x.dgst].add(
Expand All @@ -479,7 +493,7 @@
x.st_link,
None,
None,
profiles.get(x.dgst, None),
None,
updates.get(x.dgst, None),
None,
None,
Expand Down Expand Up @@ -524,9 +538,9 @@
) -> dict[str, CCCertificate]:
tables = soup.find_all("table", id=table_id)

if not len(tables) <= 1:
if len(tables) > 1:
raise ValueError(
f'The "{file.name}" was expected to contain <1 <table> element. Instead, it contains: {len(tables)} <table> elements.'
f'The "{file.name}" was expected to contain 0-1 <table> element. Instead, it contains: {len(tables)} <table> elements.'
)

if not tables:
Expand Down Expand Up @@ -555,40 +569,8 @@

cert_status = "active" if "active" in str(file) else "archived"

cc_cat_abbreviations = [
"AC",
"BP",
"DP",
"DB",
"DD",
"IC",
"KM",
"MD",
"MF",
"NS",
"OS",
"OD",
"DG",
"TC",
]
cc_table_ids = ["tbl" + x for x in cc_cat_abbreviations]
cc_categories = [
"Access Control Devices and Systems",
"Boundary Protection Devices and Systems",
"Data Protection",
"Databases",
"Detection Devices and Systems",
"ICs, Smart Cards and Smart Card-Related Devices and Systems",
"Key Management Systems",
"Mobility",
"Multi-Function Devices",
"Network and Network-Related Devices and Systems",
"Operating Systems",
"Other Devices and Systems",
"Products for Digital Signatures",
"Trusted Computing",
]
cat_dict = dict(zip(cc_table_ids, cc_categories))
cc_table_ids = ["tbl" + x for x in constants.CC_CAT_ABBREVIATIONS]
cat_dict = dict(zip(cc_table_ids, constants.CC_CATEGORIES))

with file.open("r") as handle:
soup = BeautifulSoup(handle, "html5lib")
Expand Down Expand Up @@ -807,7 +789,7 @@
self._extract_pdf_keywords()

def _compute_heuristics_body(self, skip_schemes: bool = False) -> None:
link_to_protection_profiles(self.aux_handlers[ProtectionProfileDatasetHandler].dset, self.certs.values())
link_to_protection_profiles(self.certs.values(), self.aux_handlers[ProtectionProfileDatasetHandler].dset)
compute_cpe_heuristics(self.aux_handlers[CPEDatasetHandler].dset, self.certs.values())
compute_related_cves(
self.aux_handlers[CPEDatasetHandler].dset,
Expand All @@ -820,9 +802,10 @@
compute_transitive_vulnerabilities(self.certs)

if not skip_schemes:
compute_scheme_data(self.aux_handlers[CCSchemeDatasetHandler].dset, self.certs)

Check warning on line 805 in src/sec_certs/dataset/cc.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/dataset/cc.py#L805

Added line #L805 was not covered by tests

compute_cert_labs(self.certs.values())
compute_eals(self.certs.values(), self.aux_handlers[ProtectionProfileDatasetHandler].dset)
compute_sars(self.certs.values())


Expand All @@ -842,6 +825,7 @@
state: CCDataset.DatasetInternalState | None = None,
):
super().__init__(certs, root_dir, name, description, state) # type: ignore
self.aux_handlers = {}
self.state.meta_sources_parsed = True

@property
Expand All @@ -857,7 +841,13 @@
def compute_related_cves(self) -> None:
raise NotImplementedError

def process_auxiliary_datasets(self, download_fresh: bool = False) -> None:
def process_auxiliary_datasets(
self,
download_fresh: bool = False,
processed_pp_dataset_root_dir: Path | None = None,
skip_schemes: bool = False,
**kwargs,
) -> None:
raise NotImplementedError

def analyze_certificates(self) -> None:
Expand Down
Loading
Loading