Skip to content

Commit 608f6c9

Browse files
committed
chore(typosquatting): improve variable names and comments for clarity
Signed-off-by: Amine <[email protected]>
1 parent 0ecf040 commit 608f6c9

File tree

2 files changed

+29
-23
lines changed

2 files changed

+29
-23
lines changed

src/macaron/malware_analyzer/pypi_heuristics/metadata/typosquatting_presence.py

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,9 @@ def _load_defaults(self) -> tuple[list[str], float, float, float, float]:
122122
def are_neighbors(self, first_char: str, second_char: str) -> bool:
123123
"""Check if two characters are adjacent on a QWERTY keyboard.
124124
125+
Adjacent characters are those that are next to each other
126+
either horizontally, vertically, or diagonally.
127+
125128
Parameters
126129
----------
127130
first_char : str
@@ -190,18 +193,18 @@ def jaro_distance(self, package_name: str, popular_package_name: str) -> float:
190193
package_name_matches = [False] * package_name_len
191194
popular_package_name_matches = [False] * popular_package_name_len
192195
matches = 0
193-
transpositions = 0.0 # a float to handle partial costs.
196+
transpositions = 0.0 # A float to handle partial costs.
194197

195198
# Count matches.
196-
for i in range(package_name_len):
197-
start = max(0, i - match_distance)
198-
end = min(i + match_distance + 1, popular_package_name_len)
199-
for j in range(start, end):
200-
if popular_package_name_matches[j]:
199+
for first_index in range(package_name_len):
200+
start = max(0, first_index - match_distance)
201+
end = min(first_index + match_distance + 1, popular_package_name_len)
202+
for second_index in range(start, end):
203+
if popular_package_name_matches[second_index]:
201204
continue
202-
if package_name[i] == popular_package_name[j]:
203-
package_name_matches[i] = True
204-
popular_package_name_matches[j] = True
205+
if package_name[first_index] == popular_package_name[second_index]:
206+
package_name_matches[first_index] = True
207+
popular_package_name_matches[second_index] = True
205208
matches += 1
206209
break
207210

@@ -210,12 +213,12 @@ def jaro_distance(self, package_name: str, popular_package_name: str) -> float:
210213

211214
# Count transpositions with possible keyboard awareness.
212215
k = 0
213-
for i in range(package_name_len):
214-
if package_name_matches[i]:
216+
for index in range(package_name_len):
217+
if package_name_matches[index]:
215218
while not popular_package_name_matches[k]:
216219
k += 1
217-
if package_name[i] != popular_package_name[k]:
218-
transpositions += self.substitution_func(package_name[i], popular_package_name[k])
220+
if package_name[index] != popular_package_name[k]:
221+
transpositions += self.substitution_func(package_name[index], popular_package_name[k])
219222
k += 1
220223

221224
transpositions /= 2.0 # Adjust for transpositions being counted twice.

tests/malware_analyzer/pypi/test_typosquatting_presence.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
# pylint: disable=redefined-outer-name
66

77

8+
import os
89
from pathlib import Path
910
from unittest.mock import MagicMock
1011

@@ -18,10 +19,10 @@
1819
@pytest.fixture()
1920
def analyzer(tmp_path: Path) -> TyposquattingPresenceAnalyzer:
2021
"""Pytest fixture to create a TyposquattingPresenceAnalyzer instance with a dummy popular packages file."""
21-
# create a dummy popular packages file
22-
pkg_file = tmp_path / "popular.txt"
22+
# Create a dummy popular packages file.
23+
pkg_file = Path(os.path.join(tmp_path, "popular.txt"))
2324
popular_packages = ["requests", "flask", "pytest"]
24-
pkg_file.write_text("\n".join(popular_packages))
25+
pkg_file.write_text("\n".join(popular_packages), encoding="utf-8")
2526
analyzer_instance = TyposquattingPresenceAnalyzer(str(pkg_file))
2627
return analyzer_instance
2728

@@ -41,8 +42,8 @@ def test_analyze_similar_name_fail(analyzer: TyposquattingPresenceAnalyzer, pypi
4142
assert result == HeuristicResult.FAIL
4243
assert info["package_name"] == "reqursts"
4344
assert info["popular_package"] == "requests"
44-
# ratio should match or exceed threshold 0.95
45-
assert isinstance(info["similarity_ratio"], (int, float))
45+
# The ratio should match or exceed threshold.
46+
assert isinstance(info["similarity_ratio"], float)
4647
assert info["similarity_ratio"] >= analyzer.distance_ratio_threshold
4748

4849

@@ -62,22 +63,24 @@ def test_analyze_nonexistent_file_skip() -> None:
6263

6364

6465
@pytest.mark.parametrize(
65-
("s1", "s2", "expected"),
66+
("package1", "package2", "expected_ratio"),
6667
[
6768
("requests", "requests", 1.0),
6869
("reqursts", "requests", 11 / 12),
6970
("abcd", "wxyz", 0.0),
7071
],
7172
)
72-
def test_jaro_distance(analyzer: TyposquattingPresenceAnalyzer, s1: str, s2: str, expected: float) -> None:
73+
def test_jaro_distance(
74+
analyzer: TyposquattingPresenceAnalyzer, package1: str, package2: str, expected_ratio: float
75+
) -> None:
7376
"""Test the Jaro distance calculation."""
74-
assert analyzer.jaro_distance(s1, s2) == expected
77+
assert analyzer.jaro_distance(package1, package2) == expected_ratio
7578

7679

7780
def test_empty_popular_packages_file(tmp_path: Path, pypi_package_json: MagicMock) -> None:
7881
"""Test the analyzer skips when the popular packages file is empty."""
79-
pkg_file = tmp_path / "empty_popular.txt"
80-
pkg_file.write_text("")
82+
pkg_file = Path(os.path.join(tmp_path, "empty_popular.txt"))
83+
pkg_file.write_text("", encoding="utf-8")
8184
analyzer_instance = TyposquattingPresenceAnalyzer(str(pkg_file))
8285
result, info = analyzer_instance.analyze(pypi_package_json)
8386
assert result == HeuristicResult.SKIP

0 commit comments

Comments
 (0)