Skip to content

Commit dc04491

Browse files
committed
refactor(core): improve logic, error handling, and apply minor fixes
Signed-off-by: Amine <[email protected]>
1 parent 614cb45 commit dc04491

File tree

5 files changed

+63
-61
lines changed

5 files changed

+63
-61
lines changed

.DS_Store

-6 KB
Binary file not shown.

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,3 +181,4 @@ docs/_build
181181
bin/
182182
requirements.txt
183183
.macaron_env_file
184+
**/.DS_Store

src/macaron/malware_analyzer/pypi_heuristics/metadata/typosquatting_presence.py

Lines changed: 45 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
from macaron.config.defaults import defaults
99
from macaron.config.global_config import global_config
10+
from macaron.errors import HeuristicAnalyzerValueError
1011
from macaron.json_tools import JsonType
1112
from macaron.malware_analyzer.pypi_heuristics.base_analyzer import BaseHeuristicAnalyzer
1213
from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult, Heuristics
@@ -58,44 +59,64 @@ class TyposquattingPresenceAnalyzer(BaseHeuristicAnalyzer):
5859
"m": (3, 6),
5960
}
6061

61-
def __init__(self) -> None:
62+
def __init__(self, popular_packages_path: str | None = None) -> None:
6263
super().__init__(
6364
name="typosquatting_presence_analyzer", heuristic=Heuristics.TYPOSQUATTING_PRESENCE, depends_on=None
6465
)
65-
self.popular_packages_path, self.distance_ratio_threshold, self.keyboard, self.scaling, self.cost = (
66+
self.default_path = os.path.join(global_config.resources_path, "popular_packages.txt")
67+
if popular_packages_path:
68+
self.default_path = popular_packages_path
69+
self.popular_packages, self.distance_ratio_threshold, self.keyboard, self.scaling, self.cost = (
6670
self._load_defaults()
6771
)
6872

69-
def _load_defaults(self) -> tuple[str, float, float, float, float]:
73+
def _load_defaults(self) -> tuple[list[str], float, float, float, float]:
7074
"""Load default settings from defaults.ini.
7175
7276
Returns
7377
-------
74-
tuple[str, float, float, float, float]:
75-
The path to the popular packages file, distance ratio threshold,
78+
tuple[list[str], float, float, float, float]:
79+
The popular packages list, distance ratio threshold,
7680
keyboard awareness factor, scaling factor, and cost factor.
7781
"""
7882
section_name = "heuristic.pypi"
79-
default_path = os.path.join(global_config.resources_path, "popular_packages.txt")
83+
path = self.default_path
84+
distance_ratio_threshold = 0.95
85+
keyboard = 0.8
86+
scaling = 0.15
87+
cost = 1.0
88+
8089
if defaults.has_section(section_name):
8190
section = defaults[section_name]
82-
path = section.get("popular_packages_path", default_path)
91+
path_from_config = section.get("popular_packages_path", self.default_path)
8392
# Fall back to default if the path in defaults.ini is empty.
84-
if not path.strip():
85-
path = default_path
86-
return (
87-
path,
88-
section.getfloat("distance_ratio_threshold", 0.95),
89-
section.getfloat("keyboard", 0.8),
90-
section.getfloat("scaling", 0.15),
91-
section.getfloat("cost", 1.0),
92-
)
93+
if path_from_config.strip():
94+
path = path_from_config
95+
distance_ratio_threshold = section.getfloat("distance_ratio_threshold", 0.95)
96+
keyboard = section.getfloat("keyboard", 0.8)
97+
scaling = section.getfloat("scaling", 0.15)
98+
cost = section.getfloat("cost", 1.0)
99+
100+
if not path or not os.path.exists(path):
101+
err_msg = "Popular packages file not found or path not configured"
102+
logger.debug(err_msg)
103+
raise HeuristicAnalyzerValueError(err_msg)
104+
105+
popular_packages_list = []
106+
try:
107+
with open(path, encoding="utf-8") as file:
108+
popular_packages_list = file.read().splitlines()
109+
except OSError as error:
110+
err_msg = "Could not read popular packages file"
111+
logger.debug(err_msg)
112+
raise HeuristicAnalyzerValueError(err_msg) from error
113+
93114
return (
94-
default_path,
95-
0.95,
96-
0.8,
97-
0.15,
98-
1.0,
115+
popular_packages_list,
116+
distance_ratio_threshold,
117+
keyboard,
118+
scaling,
119+
cost,
99120
)
100121

101122
def are_neighbors(self, first_char: str, second_char: str) -> bool:
@@ -244,27 +265,13 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
244265
tuple[HeuristicResult, dict[str, JsonType]]:
245266
The result and related information collected during the analysis.
246267
"""
247-
if not self.popular_packages_path or not os.path.exists(self.popular_packages_path):
248-
err_msg = f"Popular packages file not found or path not configured: {self.popular_packages_path}"
249-
logger.warning("%s. Skipping typosquatting check.", err_msg)
250-
return HeuristicResult.SKIP, {"error": err_msg}
251-
252-
popular_packages = []
253-
try:
254-
with open(self.popular_packages_path, encoding="utf-8") as file:
255-
popular_packages = file.read().splitlines()
256-
except OSError as exception:
257-
err_msg = f"Could not read popular packages file {self.popular_packages_path}: {exception}"
258-
logger.error(err_msg)
259-
return HeuristicResult.SKIP, {"error": err_msg}
260-
261-
if not popular_packages:
262-
err_msg = f"Popular packages file is empty: {self.popular_packages_path}"
268+
if not self.popular_packages:
269+
err_msg = "Popular packages file is empty"
263270
logger.warning(err_msg)
264271
return HeuristicResult.SKIP, {"error": err_msg}
265272

266273
package_name = pypi_package_json.component_name
267-
for popular_package in popular_packages:
274+
for popular_package in self.popular_packages:
268275
# If there is a popular packages file, check if the package name is similar to any of them.
269276
if package_name == popular_package:
270277
return HeuristicResult.PASS, {"package_name": package_name}

src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,10 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
383383
failed({Heuristics.CLOSER_RELEASE_JOIN_DATE.value}),
384384
forceSetup.
385385
386+
% Package released with a name similar to a popular package.
387+
{Confidence.HIGH.value}::trigger(malware_high_confidence_4) :-
388+
quickUndetailed, forceSetup, failed({Heuristics.TYPOSQUATTING_PRESENCE.value}).
389+
386390
% Package released recently with little detail, with multiple releases as a trust marker, but frequent and with
387391
% the same code.
388392
{Confidence.MEDIUM.value}::trigger(malware_medium_confidence_1) :-
@@ -397,18 +401,13 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
397401
failed({Heuristics.ONE_RELEASE.value}),
398402
failed({Heuristics.ANOMALOUS_VERSION.value}).
399403
400-
% Package released with a name similar to a popular package.
401-
{Confidence.HIGH.value}::trigger(malware_high_confidence_4) :-
402-
quickUndetailed, forceSetup, failed({Heuristics.TYPOSQUATTING_PRESENCE.value}).
403-
404404
% ----- Evaluation -----
405405
406406
% Aggregate result
407407
{problog_result_access} :- trigger(malware_high_confidence_1).
408408
{problog_result_access} :- trigger(malware_high_confidence_2).
409409
{problog_result_access} :- trigger(malware_high_confidence_3).
410410
{problog_result_access} :- trigger(malware_high_confidence_4).
411-
{problog_result_access} :- trigger(malware_high_confidence_5).
412411
{problog_result_access} :- trigger(malware_medium_confidence_2).
413412
{problog_result_access} :- trigger(malware_medium_confidence_1).
414413
query({problog_result_access}).

tests/malware_analyzer/pypi/test_typosquatting_presence.py

Lines changed: 13 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
import pytest
1212

13+
from macaron.errors import HeuristicAnalyzerValueError
1314
from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult
1415
from macaron.malware_analyzer.pypi_heuristics.metadata.typosquatting_presence import TyposquattingPresenceAnalyzer
1516

@@ -19,9 +20,9 @@ def analyzer(tmp_path: Path) -> TyposquattingPresenceAnalyzer:
1920
"""Pytest fixture to create a TyposquattingPresenceAnalyzer instance with a dummy popular packages file."""
2021
# create a dummy popular packages file
2122
pkg_file = tmp_path / "popular.txt"
22-
pkg_file.write_text("\n".join(["requests", "flask", "pytest"]))
23-
analyzer_instance = TyposquattingPresenceAnalyzer()
24-
analyzer_instance.popular_packages_path = str(pkg_file)
23+
popular_packages = ["requests", "flask", "pytest"]
24+
pkg_file.write_text("\n".join(popular_packages))
25+
analyzer_instance = TyposquattingPresenceAnalyzer(str(pkg_file))
2526
return analyzer_instance
2627

2728

@@ -53,15 +54,11 @@ def test_analyze_unrelated_name_pass(analyzer: TyposquattingPresenceAnalyzer, py
5354
assert info == {"package_name": "launchable"}
5455

5556

56-
def test_analyze_nonexistent_file_skip(pypi_package_json: MagicMock) -> None:
57-
"""Test the analyzer skips if the popular packages file does not exist."""
58-
analyzer = TyposquattingPresenceAnalyzer()
59-
analyzer.popular_packages_path = "/path/does/not/exist.txt"
60-
result, info = analyzer.analyze(pypi_package_json)
61-
assert result == HeuristicResult.SKIP
62-
error_msg = info.get("error")
63-
assert isinstance(error_msg, str)
64-
assert "Popular packages file not found" in error_msg
57+
def test_analyze_nonexistent_file_skip() -> None:
58+
"""Test the analyzer raises an error if the popular packages file does not exist."""
59+
with pytest.raises(HeuristicAnalyzerValueError) as exc_info:
60+
TyposquattingPresenceAnalyzer("nonexistent_file.txt")
61+
assert "Popular packages file not found or path not configured" in str(exc_info.value)
6562

6663

6764
@pytest.mark.parametrize(
@@ -72,19 +69,17 @@ def test_analyze_nonexistent_file_skip(pypi_package_json: MagicMock) -> None:
7269
("abcd", "wxyz", 0.0),
7370
],
7471
)
75-
def test_jaro_distance(s1: str, s2: str, expected: float) -> None:
72+
def test_jaro_distance(analyzer: TyposquattingPresenceAnalyzer, s1: str, s2: str, expected: float) -> None:
7673
"""Test the Jaro distance calculation."""
77-
analyzer = TyposquattingPresenceAnalyzer()
7874
assert analyzer.jaro_distance(s1, s2) == expected
7975

8076

8177
def test_empty_popular_packages_file(tmp_path: Path, pypi_package_json: MagicMock) -> None:
8278
"""Test the analyzer skips when the popular packages file is empty."""
8379
pkg_file = tmp_path / "empty_popular.txt"
84-
pkg_file.write_text("") # Create an empty file
85-
analyzer = TyposquattingPresenceAnalyzer()
86-
analyzer.popular_packages_path = str(pkg_file)
87-
result, info = analyzer.analyze(pypi_package_json)
80+
pkg_file.write_text("")
81+
analyzer_instance = TyposquattingPresenceAnalyzer(str(pkg_file))
82+
result, info = analyzer_instance.analyze(pypi_package_json)
8883
assert result == HeuristicResult.SKIP
8984
error_msg = info.get("error")
9085
assert isinstance(error_msg, str)

0 commit comments

Comments
 (0)