5
5
import logging
6
6
import os
7
7
8
+ from macaron .config .defaults import defaults
8
9
from macaron .config .global_config import global_config
9
10
from macaron .json_tools import JsonType
10
11
from macaron .malware_analyzer .pypi_heuristics .base_analyzer import BaseHeuristicAnalyzer
17
18
class TyposquattingPresenceAnalyzer (BaseHeuristicAnalyzer ):
18
19
"""Check whether the PyPI package has typosquatting presence."""
19
20
21
+ KEYBOARD_LAYOUT = {
22
+ "1" : (0 , 0 ),
23
+ "2" : (0 , 1 ),
24
+ "3" : (0 , 2 ),
25
+ "4" : (0 , 3 ),
26
+ "5" : (0 , 4 ),
27
+ "6" : (0 , 5 ),
28
+ "7" : (0 , 6 ),
29
+ "8" : (0 , 7 ),
30
+ "9" : (0 , 8 ),
31
+ "0" : (0 , 9 ),
32
+ "-" : (0 , 10 ),
33
+ "q" : (1 , 0 ),
34
+ "w" : (1 , 1 ),
35
+ "e" : (1 , 2 ),
36
+ "r" : (1 , 3 ),
37
+ "t" : (1 , 4 ),
38
+ "y" : (1 , 5 ),
39
+ "u" : (1 , 6 ),
40
+ "i" : (1 , 7 ),
41
+ "o" : (1 , 8 ),
42
+ "p" : (1 , 9 ),
43
+ "a" : (2 , 0 ),
44
+ "s" : (2 , 1 ),
45
+ "d" : (2 , 2 ),
46
+ "f" : (2 , 3 ),
47
+ "g" : (2 , 4 ),
48
+ "h" : (2 , 5 ),
49
+ "j" : (2 , 6 ),
50
+ "k" : (2 , 7 ),
51
+ "l" : (2 , 8 ),
52
+ "z" : (3 , 0 ),
53
+ "x" : (3 , 1 ),
54
+ "c" : (3 , 2 ),
55
+ "v" : (3 , 3 ),
56
+ "b" : (3 , 4 ),
57
+ "n" : (3 , 5 ),
58
+ "m" : (3 , 6 ),
59
+ }
60
+
20
61
def __init__ (self ) -> None :
21
62
super ().__init__ (
22
63
name = "typosquatting_presence_analyzer" , heuristic = Heuristics .TYPOSQUATTING_PRESENCE , depends_on = None
23
64
)
24
- self .popular_packages_path = os .path .join (global_config .resources_path , "popular_packages.txt" )
25
- self .distance_ratio_threshold = 0.95
26
- self .cost = 1
27
- self .scaling = 0.15
28
- self .keyboard = 0.8
29
- self .keyboard_layout = {
30
- "1" : (0 , 0 ),
31
- "2" : (0 , 1 ),
32
- "3" : (0 , 2 ),
33
- "4" : (0 , 3 ),
34
- "5" : (0 , 4 ),
35
- "6" : (0 , 5 ),
36
- "7" : (0 , 6 ),
37
- "8" : (0 , 7 ),
38
- "9" : (0 , 8 ),
39
- "0" : (0 , 9 ),
40
- "-" : (0 , 10 ),
41
- "q" : (1 , 0 ),
42
- "w" : (1 , 1 ),
43
- "e" : (1 , 2 ),
44
- "r" : (1 , 3 ),
45
- "t" : (1 , 4 ),
46
- "y" : (1 , 5 ),
47
- "u" : (1 , 6 ),
48
- "i" : (1 , 7 ),
49
- "o" : (1 , 8 ),
50
- "p" : (1 , 9 ),
51
- "a" : (2 , 0 ),
52
- "s" : (2 , 1 ),
53
- "d" : (2 , 2 ),
54
- "f" : (2 , 3 ),
55
- "g" : (2 , 4 ),
56
- "h" : (2 , 5 ),
57
- "j" : (2 , 6 ),
58
- "k" : (2 , 7 ),
59
- "l" : (2 , 8 ),
60
- "z" : (3 , 0 ),
61
- "x" : (3 , 1 ),
62
- "c" : (3 , 2 ),
63
- "v" : (3 , 3 ),
64
- "b" : (3 , 4 ),
65
- "n" : (3 , 5 ),
66
- "m" : (3 , 6 ),
67
- }
65
+ self .popular_packages_path , self .distance_ratio_threshold , self .keyboard , self .scaling , self .cost = (
66
+ self ._load_defaults ()
67
+ )
68
68
69
69
if global_config .popular_packages_path is not None :
70
70
self .popular_packages_path = global_config .popular_packages_path
71
71
72
+ def _load_defaults (self ) -> tuple [str , float , float , float , float ]:
73
+ """Load default settings from defaults.ini.
74
+
75
+ Returns
76
+ -------
77
+ tuple[str, float, float, float, float]:
78
+ The Major threshold, Epoch threshold, and Day published error.
79
+ """
80
+ section_name = "heuristic.pypi"
81
+ default_path = os .path .join (global_config .resources_path , "popular_packages.txt" )
82
+ if defaults .has_section (section_name ):
83
+ section = defaults [section_name ]
84
+ path = section .get ("popular_packages_path" , default_path )
85
+ # Fall back to default if the path in defaults.ini is empty
86
+ if not path .strip ():
87
+ path = default_path
88
+ return (
89
+ path ,
90
+ section .getfloat ("distance_ratio_threshold" , 0.95 ),
91
+ section .getfloat ("keyboard" , 0.8 ),
92
+ section .getfloat ("scaling" , 0.15 ),
93
+ section .getfloat ("cost" , 1.0 ),
94
+ )
95
+ return (
96
+ default_path ,
97
+ 0.95 ,
98
+ 0.8 ,
99
+ 0.15 ,
100
+ 1.0 ,
101
+ )
102
+
72
103
def are_neighbors (self , char1 : str , char2 : str ) -> bool :
73
104
"""Check if two characters are adjacent on a QWERTY keyboard.
74
105
@@ -84,8 +115,8 @@ def are_neighbors(self, char1: str, char2: str) -> bool:
84
115
bool
85
116
True if the characters are neighbors, False otherwise.
86
117
"""
87
- c1 = self .keyboard_layout .get (char1 )
88
- c2 = self .keyboard_layout .get (char2 )
118
+ c1 = self .KEYBOARD_LAYOUT .get (char1 )
119
+ c2 = self .KEYBOARD_LAYOUT .get (char2 )
89
120
if not c1 or not c2 :
90
121
return False
91
122
return (abs (c1 [0 ] - c2 [0 ]) <= 1 ) and (abs (c1 [1 ] - c2 [1 ]) <= 1 )
@@ -213,7 +244,6 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
213
244
The result and related information collected during the analysis.
214
245
"""
215
246
# If there is a popular packages file, check if the package name is similar to any of them
216
- package_name = pypi_package_json .component_name
217
247
if not self .popular_packages_path or not os .path .exists (self .popular_packages_path ):
218
248
err_msg = f"Popular packages file not found or path not configured: { self .popular_packages_path } "
219
249
logger .warning ("%s. Skipping typosquatting check." , err_msg )
@@ -228,6 +258,7 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
228
258
logger .error (err_msg )
229
259
return HeuristicResult .SKIP , {"error" : err_msg }
230
260
261
+ package_name = pypi_package_json .component_name
231
262
for popular_package in popular_packages :
232
263
if package_name == popular_package :
233
264
return HeuristicResult .PASS , {"package_name" : package_name }
0 commit comments