@@ -66,23 +66,21 @@ def __init__(self) -> None:
66
66
self ._load_defaults ()
67
67
)
68
68
69
- if global_config .popular_packages_path is not None :
70
- self .popular_packages_path = global_config .popular_packages_path
71
-
72
69
def _load_defaults (self ) -> tuple [str , float , float , float , float ]:
73
70
"""Load default settings from defaults.ini.
74
71
75
72
Returns
76
73
-------
77
74
tuple[str, float, float, float, float]:
78
- The Major threshold, Epoch threshold, and Day published error.
75
+ The path to the popular packages file, distance ratio threshold,
76
+ keyboard awareness factor, scaling factor, and cost factor.
79
77
"""
80
78
section_name = "heuristic.pypi"
81
79
default_path = os .path .join (global_config .resources_path , "popular_packages.txt" )
82
80
if defaults .has_section (section_name ):
83
81
section = defaults [section_name ]
84
82
path = section .get ("popular_packages_path" , default_path )
85
- # Fall back to default if the path in defaults.ini is empty
83
+ # Fall back to default if the path in defaults.ini is empty.
86
84
if not path .strip ():
87
85
path = default_path
88
86
return (
@@ -100,35 +98,35 @@ def _load_defaults(self) -> tuple[str, float, float, float, float]:
100
98
1.0 ,
101
99
)
102
100
103
- def are_neighbors (self , char1 : str , char2 : str ) -> bool :
101
+ def are_neighbors (self , first_char : str , second_char : str ) -> bool :
104
102
"""Check if two characters are adjacent on a QWERTY keyboard.
105
103
106
104
Parameters
107
105
----------
108
- char1 : str
106
+ first_char : str
109
107
The first character.
110
- char2 : str
108
+ second_char : str
111
109
The second character.
112
110
113
111
Returns
114
112
-------
115
113
bool
116
114
True if the characters are neighbors, False otherwise.
117
115
"""
118
- c1 = self .KEYBOARD_LAYOUT .get (char1 )
119
- c2 = self .KEYBOARD_LAYOUT .get (char2 )
120
- if not c1 or not c2 :
116
+ coordinates1 = self .KEYBOARD_LAYOUT .get (first_char )
117
+ coordinates2 = self .KEYBOARD_LAYOUT .get (second_char )
118
+ if not coordinates1 or not coordinates2 :
121
119
return False
122
- return (abs (c1 [0 ] - c2 [0 ]) <= 1 ) and (abs (c1 [1 ] - c2 [1 ]) <= 1 )
120
+ return (abs (coordinates1 [0 ] - coordinates2 [0 ]) <= 1 ) and (abs (coordinates1 [1 ] - coordinates2 [1 ]) <= 1 )
123
121
124
- def substitution_func (self , char1 : str , char2 : str ) -> float :
122
+ def substitution_func (self , first_char : str , second_char : str ) -> float :
125
123
"""Calculate the substitution cost between two characters.
126
124
127
125
Parameters
128
126
----------
129
- char1 : str
127
+ first_char : str
130
128
The first character.
131
- char2 : str
129
+ second_char : str
132
130
The second character.
133
131
134
132
Returns
@@ -137,9 +135,9 @@ def substitution_func(self, char1: str, char2: str) -> float:
137
135
0.0 if the characters are the same, `self.keyboard` if they are
138
136
neighbors on a QWERTY keyboard, otherwise `self.cost` .
139
137
"""
140
- if char1 == char2 :
138
+ if first_char == second_char :
141
139
return 0.0
142
- if self .keyboard and self .are_neighbors (char1 , char2 ):
140
+ if self .keyboard and self .are_neighbors (first_char , second_char ):
143
141
return self .keyboard
144
142
return self .cost
145
143
@@ -161,21 +159,22 @@ def jaro_distance(self, package_name: str, popular_package_name: str) -> float:
161
159
if package_name == popular_package_name :
162
160
return 1.0
163
161
164
- len1 , len2 = len (package_name ), len (popular_package_name )
165
- if len1 == 0 or len2 == 0 :
162
+ package_name_len = len (package_name )
163
+ popular_package_name_len = len (popular_package_name )
164
+ if package_name_len == 0 or popular_package_name_len == 0 :
166
165
return 0.0
167
166
168
- match_distance = max (len1 , len2 ) // 2 - 1
167
+ match_distance = max (package_name_len , popular_package_name_len ) // 2 - 1
169
168
170
- package_name_matches = [False ] * len1
171
- popular_package_name_matches = [False ] * len2
169
+ package_name_matches = [False ] * package_name_len
170
+ popular_package_name_matches = [False ] * popular_package_name_len
172
171
matches = 0
173
- transpositions = 0.0 # Now a float to handle partial costs
172
+ transpositions = 0.0 # a float to handle partial costs.
174
173
175
- # Count matches
176
- for i in range (len1 ):
174
+ # Count matches.
175
+ for i in range (package_name_len ):
177
176
start = max (0 , i - match_distance )
178
- end = min (i + match_distance + 1 , len2 )
177
+ end = min (i + match_distance + 1 , popular_package_name_len )
179
178
for j in range (start , end ):
180
179
if popular_package_name_matches [j ]:
181
180
continue
@@ -188,19 +187,21 @@ def jaro_distance(self, package_name: str, popular_package_name: str) -> float:
188
187
if matches == 0 :
189
188
return 0.0
190
189
191
- # Count transpositions with possible keyboard awareness
190
+ # Count transpositions with possible keyboard awareness.
192
191
k = 0
193
- for i in range (len1 ):
192
+ for i in range (package_name_len ):
194
193
if package_name_matches [i ]:
195
194
while not popular_package_name_matches [k ]:
196
195
k += 1
197
196
if package_name [i ] != popular_package_name [k ]:
198
197
transpositions += self .substitution_func (package_name [i ], popular_package_name [k ])
199
198
k += 1
200
199
201
- transpositions /= 2.0 # Adjust for transpositions being counted twice
200
+ transpositions /= 2.0 # Adjust for transpositions being counted twice.
202
201
203
- return (matches / len1 + matches / len2 + (matches - transpositions ) / matches ) / 3.0
202
+ return (
203
+ matches / package_name_len + matches / popular_package_name_len + (matches - transpositions ) / matches
204
+ ) / 3.0
204
205
205
206
def ratio (self , package_name : str , popular_package_name : str ) -> float :
206
207
"""Calculate the Jaro-Winkler distance ratio.
@@ -243,7 +244,6 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
243
244
tuple[HeuristicResult, dict[str, JsonType]]:
244
245
The result and related information collected during the analysis.
245
246
"""
246
- # If there is a popular packages file, check if the package name is similar to any of them
247
247
if not self .popular_packages_path or not os .path .exists (self .popular_packages_path ):
248
248
err_msg = f"Popular packages file not found or path not configured: { self .popular_packages_path } "
249
249
logger .warning ("%s. Skipping typosquatting check." , err_msg )
@@ -253,13 +253,19 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
253
253
try :
254
254
with open (self .popular_packages_path , encoding = "utf-8" ) as file :
255
255
popular_packages = file .read ().splitlines ()
256
- except OSError as e :
257
- err_msg = f"Could not read popular packages file { self .popular_packages_path } : { e } "
256
+ except OSError as exception :
257
+ err_msg = f"Could not read popular packages file { self .popular_packages_path } : { exception } "
258
258
logger .error (err_msg )
259
259
return HeuristicResult .SKIP , {"error" : err_msg }
260
260
261
+ if not popular_packages :
262
+ err_msg = f"Popular packages file is empty: { self .popular_packages_path } "
263
+ logger .warning (err_msg )
264
+ return HeuristicResult .SKIP , {"error" : err_msg }
265
+
261
266
package_name = pypi_package_json .component_name
262
267
for popular_package in popular_packages :
268
+ # If there is a popular packages file, check if the package name is similar to any of them.
263
269
if package_name == popular_package :
264
270
return HeuristicResult .PASS , {"package_name" : package_name }
265
271
0 commit comments