Skip to content

Commit f14d71c

Browse files
committed
feat(typosquatting): add tests and move analyzer config to defaults.ini
Added unit tests for typosquatting detection. Analyzer variables, including the file path, are now loaded from defaults.ini. Raised heuristic confidence level from medium to high. BREAKING CHANGE: Analyzer config must now be defined in defaults.ini. Signed-off-by: Amine <[email protected]>
1 parent 69a33bc commit f14d71c

File tree

4 files changed

+170
-51
lines changed

4 files changed

+170
-51
lines changed

src/macaron/config/defaults.ini

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -600,3 +600,14 @@ major_threshold = 20
600600
epoch_threshold = 3
601601
# The number of days +/- the day of publish the calendar versioning day may be.
602602
day_publish_error = 4
603+
604+
# The threshold ratio for two packages to be considered similar.
605+
distance_ratio_threshold = 0.95
606+
# The Keyboard cost for two characters that are close to each other on the keyboard.
607+
keyboard = 0.8
608+
# The scaling factor for the jaro winkler distance.
609+
scaling = 0.15
610+
# The cost for two characters that are not close to each other on the keyboard.
611+
cost = 1.0
612+
# The path to the file that contains the list of popular packages.
613+
popular_packages_path =

src/macaron/malware_analyzer/pypi_heuristics/metadata/typosquatting_presence.py

Lines changed: 78 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import logging
66
import os
77

8+
from macaron.config.defaults import defaults
89
from macaron.config.global_config import global_config
910
from macaron.json_tools import JsonType
1011
from macaron.malware_analyzer.pypi_heuristics.base_analyzer import BaseHeuristicAnalyzer
@@ -17,58 +18,88 @@
1718
class TyposquattingPresenceAnalyzer(BaseHeuristicAnalyzer):
1819
"""Check whether the PyPI package has typosquatting presence."""
1920

21+
KEYBOARD_LAYOUT = {
22+
"1": (0, 0),
23+
"2": (0, 1),
24+
"3": (0, 2),
25+
"4": (0, 3),
26+
"5": (0, 4),
27+
"6": (0, 5),
28+
"7": (0, 6),
29+
"8": (0, 7),
30+
"9": (0, 8),
31+
"0": (0, 9),
32+
"-": (0, 10),
33+
"q": (1, 0),
34+
"w": (1, 1),
35+
"e": (1, 2),
36+
"r": (1, 3),
37+
"t": (1, 4),
38+
"y": (1, 5),
39+
"u": (1, 6),
40+
"i": (1, 7),
41+
"o": (1, 8),
42+
"p": (1, 9),
43+
"a": (2, 0),
44+
"s": (2, 1),
45+
"d": (2, 2),
46+
"f": (2, 3),
47+
"g": (2, 4),
48+
"h": (2, 5),
49+
"j": (2, 6),
50+
"k": (2, 7),
51+
"l": (2, 8),
52+
"z": (3, 0),
53+
"x": (3, 1),
54+
"c": (3, 2),
55+
"v": (3, 3),
56+
"b": (3, 4),
57+
"n": (3, 5),
58+
"m": (3, 6),
59+
}
60+
2061
def __init__(self) -> None:
2162
super().__init__(
2263
name="typosquatting_presence_analyzer", heuristic=Heuristics.TYPOSQUATTING_PRESENCE, depends_on=None
2364
)
24-
self.popular_packages_path = os.path.join(global_config.resources_path, "popular_packages.txt")
25-
self.distance_ratio_threshold = 0.95
26-
self.cost = 1
27-
self.scaling = 0.15
28-
self.keyboard = 0.8
29-
self.keyboard_layout = {
30-
"1": (0, 0),
31-
"2": (0, 1),
32-
"3": (0, 2),
33-
"4": (0, 3),
34-
"5": (0, 4),
35-
"6": (0, 5),
36-
"7": (0, 6),
37-
"8": (0, 7),
38-
"9": (0, 8),
39-
"0": (0, 9),
40-
"-": (0, 10),
41-
"q": (1, 0),
42-
"w": (1, 1),
43-
"e": (1, 2),
44-
"r": (1, 3),
45-
"t": (1, 4),
46-
"y": (1, 5),
47-
"u": (1, 6),
48-
"i": (1, 7),
49-
"o": (1, 8),
50-
"p": (1, 9),
51-
"a": (2, 0),
52-
"s": (2, 1),
53-
"d": (2, 2),
54-
"f": (2, 3),
55-
"g": (2, 4),
56-
"h": (2, 5),
57-
"j": (2, 6),
58-
"k": (2, 7),
59-
"l": (2, 8),
60-
"z": (3, 0),
61-
"x": (3, 1),
62-
"c": (3, 2),
63-
"v": (3, 3),
64-
"b": (3, 4),
65-
"n": (3, 5),
66-
"m": (3, 6),
67-
}
65+
self.popular_packages_path, self.distance_ratio_threshold, self.keyboard, self.scaling, self.cost = (
66+
self._load_defaults()
67+
)
6868

6969
if global_config.popular_packages_path is not None:
7070
self.popular_packages_path = global_config.popular_packages_path
7171

72+
def _load_defaults(self) -> tuple[str, float, float, float, float]:
73+
"""Load default settings from defaults.ini.
74+
75+
Returns
76+
-------
77+
tuple[str, float, float, float, float]:
78+
The Major threshold, Epoch threshold, and Day published error.
79+
"""
80+
section_name = "heuristic.pypi"
81+
default_path = os.path.join(global_config.resources_path, "popular_packages.txt")
82+
if defaults.has_section(section_name):
83+
section = defaults[section_name]
84+
path = section.get("popular_packages_path", default_path)
85+
# Fall back to default if the path in defaults.ini is empty
86+
if not path.strip():
87+
path = default_path
88+
return (
89+
path,
90+
section.getfloat("distance_ratio_threshold", 0.95),
91+
section.getfloat("keyboard", 0.8),
92+
section.getfloat("scaling", 0.15),
93+
section.getfloat("cost", 1.0),
94+
)
95+
return (
96+
default_path,
97+
0.95,
98+
0.8,
99+
0.15,
100+
1.0,
101+
)
102+
72103
def are_neighbors(self, char1: str, char2: str) -> bool:
73104
"""Check if two characters are adjacent on a QWERTY keyboard.
74105
@@ -84,8 +115,8 @@ def are_neighbors(self, char1: str, char2: str) -> bool:
84115
bool
85116
True if the characters are neighbors, False otherwise.
86117
"""
87-
c1 = self.keyboard_layout.get(char1)
88-
c2 = self.keyboard_layout.get(char2)
118+
c1 = self.KEYBOARD_LAYOUT.get(char1)
119+
c2 = self.KEYBOARD_LAYOUT.get(char2)
89120
if not c1 or not c2:
90121
return False
91122
return (abs(c1[0] - c2[0]) <= 1) and (abs(c1[1] - c2[1]) <= 1)
@@ -213,7 +244,6 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
213244
The result and related information collected during the analysis.
214245
"""
215246
# If there is a popular packages file, check if the package name is similar to any of them
216-
package_name = pypi_package_json.component_name
217247
if not self.popular_packages_path or not os.path.exists(self.popular_packages_path):
218248
err_msg = f"Popular packages file not found or path not configured: {self.popular_packages_path}"
219249
logger.warning("%s. Skipping typosquatting check.", err_msg)
@@ -228,6 +258,7 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
228258
logger.error(err_msg)
229259
return HeuristicResult.SKIP, {"error": err_msg}
230260

261+
package_name = pypi_package_json.component_name
231262
for popular_package in popular_packages:
232263
if package_name == popular_package:
233264
return HeuristicResult.PASS, {"package_name": package_name}

src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -398,17 +398,16 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
398398
failed({Heuristics.ANOMALOUS_VERSION.value}).
399399
400400
% Package released with a name similar to a popular package.
401-
{Confidence.MEDIUM.value}::trigger(malware_medium_confidence_3) :-
402-
quickUndetailed,
403-
failed({Heuristics.TYPOSQUATTING_PRESENCE.value}).
401+
{Confidence.HIGH.value}::trigger(malware_high_confidence_4) :-
402+
quickUndetailed, forceSetup, failed({Heuristics.TYPOSQUATTING_PRESENCE.value}).
404403
405404
% ----- Evaluation -----
406405
407406
% Aggregate result
408407
{problog_result_access} :- trigger(malware_high_confidence_1).
409408
{problog_result_access} :- trigger(malware_high_confidence_2).
410409
{problog_result_access} :- trigger(malware_high_confidence_3).
411-
{problog_result_access} :- trigger(malware_medium_confidence_3).
410+
{problog_result_access} :- trigger(malware_high_confidence_4).
412411
{problog_result_access} :- trigger(malware_medium_confidence_2).
413412
{problog_result_access} :- trigger(malware_medium_confidence_1).
414413
query({problog_result_access}).
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
2+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
3+
4+
"""Tests for the TyposquattingPresenceAnalyzer heuristic."""
5+
# pylint: disable=redefined-outer-name
6+
7+
8+
from pathlib import Path
9+
from unittest.mock import MagicMock
10+
11+
import pytest
12+
13+
from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult
14+
from macaron.malware_analyzer.pypi_heuristics.metadata.typosquatting_presence import TyposquattingPresenceAnalyzer
15+
16+
17+
@pytest.fixture()
18+
def analyzer(tmp_path: Path) -> TyposquattingPresenceAnalyzer:
19+
"""Pytest fixture to create a TyposquattingPresenceAnalyzer instance with a dummy popular packages file."""
20+
# create a dummy popular packages file
21+
pkg_file = tmp_path / "popular.txt"
22+
pkg_file.write_text("\n".join(["requests", "flask", "pytest"]))
23+
analyzer_instance = TyposquattingPresenceAnalyzer()
24+
analyzer_instance.popular_packages_path = str(pkg_file)
25+
return analyzer_instance
26+
27+
28+
def test_analyze_exact_match_pass(analyzer: TyposquattingPresenceAnalyzer, pypi_package_json: MagicMock) -> None:
29+
"""Test the analyzer passes when the package name is an exact match to a popular package."""
30+
pypi_package_json.component_name = "requests"
31+
result, info = analyzer.analyze(pypi_package_json)
32+
assert result == HeuristicResult.PASS
33+
assert info == {"package_name": "requests"}
34+
35+
36+
def test_analyze_similar_name_fail(analyzer: TyposquattingPresenceAnalyzer, pypi_package_json: MagicMock) -> None:
37+
"""Test the analyzer fails when the package name is suspiciously similar to a popular package."""
38+
pypi_package_json.component_name = "reqursts"
39+
result, info = analyzer.analyze(pypi_package_json)
40+
assert result == HeuristicResult.FAIL
41+
assert info["package_name"] == "reqursts"
42+
assert info["popular_package"] == "requests"
43+
# ratio should match or exceed threshold 0.95
44+
assert isinstance(info["similarity_ratio"], (int, float))
45+
assert info["similarity_ratio"] >= analyzer.distance_ratio_threshold
46+
47+
48+
def test_analyze_unrelated_name_pass(analyzer: TyposquattingPresenceAnalyzer, pypi_package_json: MagicMock) -> None:
49+
"""Test the analyzer passes when the package name is not similar to any popular package."""
50+
pypi_package_json.component_name = "launchable"
51+
result, info = analyzer.analyze(pypi_package_json)
52+
assert result == HeuristicResult.PASS
53+
assert info == {"package_name": "launchable"}
54+
55+
56+
def test_analyze_nonexistent_file_skip(pypi_package_json: MagicMock) -> None:
57+
"""Test the analyzer skips if the popular packages file does not exist."""
58+
analyzer = TyposquattingPresenceAnalyzer()
59+
analyzer.popular_packages_path = "/path/does/not/exist.txt"
60+
result, info = analyzer.analyze(pypi_package_json)
61+
assert result == HeuristicResult.SKIP
62+
error_msg = info.get("error")
63+
assert isinstance(error_msg, str)
64+
assert "Popular packages file not found" in error_msg
65+
66+
67+
@pytest.mark.parametrize(
68+
("s1", "s2", "expected"),
69+
[
70+
("requests", "requests", 1.0),
71+
("reqursts", "requests", 11 / 12),
72+
("abcd", "wxyz", 0.0),
73+
],
74+
)
75+
def test_jaro_distance(s1: str, s2: str, expected: float) -> None:
76+
"""Test the Jaro distance calculation."""
77+
analyzer = TyposquattingPresenceAnalyzer()
78+
assert analyzer.jaro_distance(s1, s2) == expected

0 commit comments

Comments
 (0)