Skip to content

Commit 80afd9e

Browse files
committed
chore(typosquatting): improve variable names and comments for clarity
Signed-off-by: Amine <[email protected]>
1 parent f14d71c commit 80afd9e

File tree

10 files changed

+53
-49
lines changed

10 files changed

+53
-49
lines changed

.DS_Store

6 KB
Binary file not shown.

src/macaron/__main__.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -367,16 +367,6 @@ def main(argv: list[str] | None = None) -> None:
367367
help="The directory where Macaron looks for already cloned repositories.",
368368
)
369369

370-
main_parser.add_argument(
371-
"-pp",
372-
"--popular-packages-path",
373-
required=False,
374-
type=str,
375-
default=None,
376-
help="The path to the popular packages file used for typosquatting detection.",
377-
dest="popular_packages_path",
378-
)
379-
380370
# Add sub parsers for each action.
381371
sub_parser = main_parser.add_subparsers(dest="action", help="Run macaron <action> --help for help")
382372

@@ -589,7 +579,6 @@ def main(argv: list[str] | None = None) -> None:
589579
build_log_path=os.path.join(args.output_dir, "build_log"),
590580
debug_level=log_level,
591581
local_repos_path=args.local_repos_path,
592-
popular_packages_path=args.popular_packages_path,
593582
resources_path=os.path.join(macaron.MACARON_PATH, "resources"),
594583
)
595584

src/macaron/config/global_config.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,6 @@ class GlobalConfig:
4949
#: The path to the local .m2 Maven repository. This attribute is None if there is no available .m2 directory.
5050
local_maven_repo: str | None = None
5151

52-
#: The path to the popular packages file.
53-
popular_packages_path: str | None = None
54-
5552
def load(
5653
self,
5754
macaron_path: str,
@@ -60,7 +57,6 @@ def load(
6057
debug_level: int,
6158
local_repos_path: str,
6259
resources_path: str,
63-
popular_packages_path: str,
6460
) -> None:
6561
"""Initiate the GlobalConfig object.
6662
@@ -85,7 +81,6 @@ def load(
8581
self.debug_level = debug_level
8682
self.local_repos_path = local_repos_path
8783
self.resources_path = resources_path
88-
self.popular_packages_path = popular_packages_path
8984

9085
def load_expectation_files(self, exp_path: str) -> None:
9186
"""
Binary file not shown.

src/macaron/malware_analyzer/pypi_heuristics/metadata/typosquatting_presence.py

Lines changed: 39 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -66,23 +66,21 @@ def __init__(self) -> None:
6666
self._load_defaults()
6767
)
6868

69-
if global_config.popular_packages_path is not None:
70-
self.popular_packages_path = global_config.popular_packages_path
71-
7269
def _load_defaults(self) -> tuple[str, float, float, float, float]:
7370
"""Load default settings from defaults.ini.
7471
7572
Returns
7673
-------
7774
tuple[str, float, float, float, float]:
78-
The Major threshold, Epoch threshold, and Day published error.
75+
The path to the popular packages file, distance ratio threshold,
76+
keyboard awareness factor, scaling factor, and cost factor.
7977
"""
8078
section_name = "heuristic.pypi"
8179
default_path = os.path.join(global_config.resources_path, "popular_packages.txt")
8280
if defaults.has_section(section_name):
8381
section = defaults[section_name]
8482
path = section.get("popular_packages_path", default_path)
85-
# Fall back to default if the path in defaults.ini is empty
83+
# Fall back to default if the path in defaults.ini is empty.
8684
if not path.strip():
8785
path = default_path
8886
return (
@@ -100,35 +98,35 @@ def _load_defaults(self) -> tuple[str, float, float, float, float]:
10098
1.0,
10199
)
102100

103-
def are_neighbors(self, char1: str, char2: str) -> bool:
101+
def are_neighbors(self, first_char: str, second_char: str) -> bool:
104102
"""Check if two characters are adjacent on a QWERTY keyboard.
105103
106104
Parameters
107105
----------
108-
char1 : str
106+
first_char : str
109107
The first character.
110-
char2 : str
108+
second_char : str
111109
The second character.
112110
113111
Returns
114112
-------
115113
bool
116114
True if the characters are neighbors, False otherwise.
117115
"""
118-
c1 = self.KEYBOARD_LAYOUT.get(char1)
119-
c2 = self.KEYBOARD_LAYOUT.get(char2)
120-
if not c1 or not c2:
116+
coordinates1 = self.KEYBOARD_LAYOUT.get(first_char)
117+
coordinates2 = self.KEYBOARD_LAYOUT.get(second_char)
118+
if not coordinates1 or not coordinates2:
121119
return False
122-
return (abs(c1[0] - c2[0]) <= 1) and (abs(c1[1] - c2[1]) <= 1)
120+
return (abs(coordinates1[0] - coordinates2[0]) <= 1) and (abs(coordinates1[1] - coordinates2[1]) <= 1)
123121

124-
def substitution_func(self, char1: str, char2: str) -> float:
122+
def substitution_func(self, first_char: str, second_char: str) -> float:
125123
"""Calculate the substitution cost between two characters.
126124
127125
Parameters
128126
----------
129-
char1 : str
127+
first_char : str
130128
The first character.
131-
char2 : str
129+
second_char : str
132130
The second character.
133131
134132
Returns
@@ -137,9 +135,9 @@ def substitution_func(self, char1: str, char2: str) -> float:
137135
0.0 if the characters are the same, `self.keyboard` if they are
138136
neighbors on a QWERTY keyboard, otherwise `self.cost` .
139137
"""
140-
if char1 == char2:
138+
if first_char == second_char:
141139
return 0.0
142-
if self.keyboard and self.are_neighbors(char1, char2):
140+
if self.keyboard and self.are_neighbors(first_char, second_char):
143141
return self.keyboard
144142
return self.cost
145143

@@ -161,21 +159,22 @@ def jaro_distance(self, package_name: str, popular_package_name: str) -> float:
161159
if package_name == popular_package_name:
162160
return 1.0
163161

164-
len1, len2 = len(package_name), len(popular_package_name)
165-
if len1 == 0 or len2 == 0:
162+
package_name_len = len(package_name)
163+
popular_package_name_len = len(popular_package_name)
164+
if package_name_len == 0 or popular_package_name_len == 0:
166165
return 0.0
167166

168-
match_distance = max(len1, len2) // 2 - 1
167+
match_distance = max(package_name_len, popular_package_name_len) // 2 - 1
169168

170-
package_name_matches = [False] * len1
171-
popular_package_name_matches = [False] * len2
169+
package_name_matches = [False] * package_name_len
170+
popular_package_name_matches = [False] * popular_package_name_len
172171
matches = 0
173-
transpositions = 0.0 # Now a float to handle partial costs
172+
transpositions = 0.0 # a float to handle partial costs.
174173

175-
# Count matches
176-
for i in range(len1):
174+
# Count matches.
175+
for i in range(package_name_len):
177176
start = max(0, i - match_distance)
178-
end = min(i + match_distance + 1, len2)
177+
end = min(i + match_distance + 1, popular_package_name_len)
179178
for j in range(start, end):
180179
if popular_package_name_matches[j]:
181180
continue
@@ -188,19 +187,21 @@ def jaro_distance(self, package_name: str, popular_package_name: str) -> float:
188187
if matches == 0:
189188
return 0.0
190189

191-
# Count transpositions with possible keyboard awareness
190+
# Count transpositions with possible keyboard awareness.
192191
k = 0
193-
for i in range(len1):
192+
for i in range(package_name_len):
194193
if package_name_matches[i]:
195194
while not popular_package_name_matches[k]:
196195
k += 1
197196
if package_name[i] != popular_package_name[k]:
198197
transpositions += self.substitution_func(package_name[i], popular_package_name[k])
199198
k += 1
200199

201-
transpositions /= 2.0 # Adjust for transpositions being counted twice
200+
transpositions /= 2.0 # Adjust for transpositions being counted twice.
202201

203-
return (matches / len1 + matches / len2 + (matches - transpositions) / matches) / 3.0
202+
return (
203+
matches / package_name_len + matches / popular_package_name_len + (matches - transpositions) / matches
204+
) / 3.0
204205

205206
def ratio(self, package_name: str, popular_package_name: str) -> float:
206207
"""Calculate the Jaro-Winkler distance ratio.
@@ -243,7 +244,6 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
243244
tuple[HeuristicResult, dict[str, JsonType]]:
244245
The result and related information collected during the analysis.
245246
"""
246-
# If there is a popular packages file, check if the package name is similar to any of them
247247
if not self.popular_packages_path or not os.path.exists(self.popular_packages_path):
248248
err_msg = f"Popular packages file not found or path not configured: {self.popular_packages_path}"
249249
logger.warning("%s. Skipping typosquatting check.", err_msg)
@@ -253,13 +253,19 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
253253
try:
254254
with open(self.popular_packages_path, encoding="utf-8") as file:
255255
popular_packages = file.read().splitlines()
256-
except OSError as e:
257-
err_msg = f"Could not read popular packages file {self.popular_packages_path}: {e}"
256+
except OSError as exception:
257+
err_msg = f"Could not read popular packages file {self.popular_packages_path}: {exception}"
258258
logger.error(err_msg)
259259
return HeuristicResult.SKIP, {"error": err_msg}
260260

261+
if not popular_packages:
262+
err_msg = f"Popular packages file is empty: {self.popular_packages_path}"
263+
logger.warning(err_msg)
264+
return HeuristicResult.SKIP, {"error": err_msg}
265+
261266
package_name = pypi_package_json.component_name
262267
for popular_package in popular_packages:
268+
# If there is a popular packages file, check if the package name is similar to any of them.
263269
if package_name == popular_package:
264270
return HeuristicResult.PASS, {"package_name": package_name}
265271

src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,7 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
408408
{problog_result_access} :- trigger(malware_high_confidence_2).
409409
{problog_result_access} :- trigger(malware_high_confidence_3).
410410
{problog_result_access} :- trigger(malware_high_confidence_4).
411+
{problog_result_access} :- trigger(malware_high_confidence_5).
411412
{problog_result_access} :- trigger(malware_medium_confidence_2).
412413
{problog_result_access} :- trigger(malware_medium_confidence_1).
413414
query({problog_result_access}).

tests/.DS_Store

6 KB
Binary file not shown.

tests/malware_analyzer/pypi/test_typosquatting_presence.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,3 +76,16 @@ def test_jaro_distance(s1: str, s2: str, expected: float) -> None:
7676
"""Test the Jaro distance calculation."""
7777
analyzer = TyposquattingPresenceAnalyzer()
7878
assert analyzer.jaro_distance(s1, s2) == expected
79+
80+
81+
def test_empty_popular_packages_file(tmp_path: Path, pypi_package_json: MagicMock) -> None:
82+
"""Test the analyzer skips when the popular packages file is empty."""
83+
pkg_file = tmp_path / "empty_popular.txt"
84+
pkg_file.write_text("") # Create an empty file
85+
analyzer = TyposquattingPresenceAnalyzer()
86+
analyzer.popular_packages_path = str(pkg_file)
87+
result, info = analyzer.analyze(pypi_package_json)
88+
assert result == HeuristicResult.SKIP
89+
error_msg = info.get("error")
90+
assert isinstance(error_msg, str)
91+
assert "Popular packages file is empty" in error_msg

tests/parsers/.DS_Store

6 KB
Binary file not shown.

tests/slsa_analyzer/.DS_Store

8 KB
Binary file not shown.

0 commit comments

Comments
 (0)