|
7 | 7 |
|
8 | 8 | from macaron.config.defaults import defaults
|
9 | 9 | from macaron.config.global_config import global_config
|
| 10 | +from macaron.errors import HeuristicAnalyzerValueError |
10 | 11 | from macaron.json_tools import JsonType
|
11 | 12 | from macaron.malware_analyzer.pypi_heuristics.base_analyzer import BaseHeuristicAnalyzer
|
12 | 13 | from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult, Heuristics
|
@@ -58,44 +59,64 @@ class TyposquattingPresenceAnalyzer(BaseHeuristicAnalyzer):
|
58 | 59 | "m": (3, 6),
|
59 | 60 | }
|
60 | 61 |
|
61 |
| - def __init__(self) -> None: |
| 62 | + def __init__(self, popular_packages_path: str | None = None) -> None: |
62 | 63 | super().__init__(
|
63 | 64 | name="typosquatting_presence_analyzer", heuristic=Heuristics.TYPOSQUATTING_PRESENCE, depends_on=None
|
64 | 65 | )
|
65 |
| - self.popular_packages_path, self.distance_ratio_threshold, self.keyboard, self.scaling, self.cost = ( |
| 66 | + self.default_path = os.path.join(global_config.resources_path, "popular_packages.txt") |
| 67 | + if popular_packages_path: |
| 68 | + self.default_path = popular_packages_path |
| 69 | + self.popular_packages, self.distance_ratio_threshold, self.keyboard, self.scaling, self.cost = ( |
66 | 70 | self._load_defaults()
|
67 | 71 | )
|
68 | 72 |
|
69 |
| - def _load_defaults(self) -> tuple[str, float, float, float, float]: |
| 73 | + def _load_defaults(self) -> tuple[list[str], float, float, float, float]: |
70 | 74 | """Load default settings from defaults.ini.
|
71 | 75 |
|
72 | 76 | Returns
|
73 | 77 | -------
|
74 |
| - tuple[str, float, float, float, float]: |
75 |
| - The path to the popular packages file, distance ratio threshold, |
| 78 | + tuple[list[str], float, float, float, float]: |
| 79 | + The popular packages list, distance ratio threshold, |
76 | 80 | keyboard awareness factor, scaling factor, and cost factor.
|
77 | 81 | """
|
78 | 82 | section_name = "heuristic.pypi"
|
79 |
| - default_path = os.path.join(global_config.resources_path, "popular_packages.txt") |
| 83 | + path = self.default_path |
| 84 | + distance_ratio_threshold = 0.95 |
| 85 | + keyboard = 0.8 |
| 86 | + scaling = 0.15 |
| 87 | + cost = 1.0 |
| 88 | + |
80 | 89 | if defaults.has_section(section_name):
|
81 | 90 | section = defaults[section_name]
|
82 |
| - path = section.get("popular_packages_path", default_path) |
| 91 | + path_from_config = section.get("popular_packages_path", self.default_path) |
83 | 92 | # Fall back to default if the path in defaults.ini is empty.
|
84 |
| - if not path.strip(): |
85 |
| - path = default_path |
86 |
| - return ( |
87 |
| - path, |
88 |
| - section.getfloat("distance_ratio_threshold", 0.95), |
89 |
| - section.getfloat("keyboard", 0.8), |
90 |
| - section.getfloat("scaling", 0.15), |
91 |
| - section.getfloat("cost", 1.0), |
92 |
| - ) |
| 93 | + if path_from_config.strip(): |
| 94 | + path = path_from_config |
| 95 | + distance_ratio_threshold = section.getfloat("distance_ratio_threshold", 0.95) |
| 96 | + keyboard = section.getfloat("keyboard", 0.8) |
| 97 | + scaling = section.getfloat("scaling", 0.15) |
| 98 | + cost = section.getfloat("cost", 1.0) |
| 99 | + |
| 100 | + if not path or not os.path.exists(path): |
| 101 | + err_msg = "Popular packages file not found or path not configured" |
| 102 | + logger.debug(err_msg) |
| 103 | + raise HeuristicAnalyzerValueError(err_msg) |
| 104 | + |
| 105 | + popular_packages_list = [] |
| 106 | + try: |
| 107 | + with open(path, encoding="utf-8") as file: |
| 108 | + popular_packages_list = file.read().splitlines() |
| 109 | + except OSError as error: |
| 110 | + err_msg = "Could not read popular packages file" |
| 111 | + logger.debug(err_msg) |
| 112 | + raise HeuristicAnalyzerValueError(err_msg) from error |
| 113 | + |
93 | 114 | return (
|
94 |
| - default_path, |
95 |
| - 0.95, |
96 |
| - 0.8, |
97 |
| - 0.15, |
98 |
| - 1.0, |
| 115 | + popular_packages_list, |
| 116 | + distance_ratio_threshold, |
| 117 | + keyboard, |
| 118 | + scaling, |
| 119 | + cost, |
99 | 120 | )
|
100 | 121 |
|
101 | 122 | def are_neighbors(self, first_char: str, second_char: str) -> bool:
|
@@ -244,27 +265,13 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
|
244 | 265 | tuple[HeuristicResult, dict[str, JsonType]]:
|
245 | 266 | The result and related information collected during the analysis.
|
246 | 267 | """
|
247 |
| - if not self.popular_packages_path or not os.path.exists(self.popular_packages_path): |
248 |
| - err_msg = f"Popular packages file not found or path not configured: {self.popular_packages_path}" |
249 |
| - logger.warning("%s. Skipping typosquatting check.", err_msg) |
250 |
| - return HeuristicResult.SKIP, {"error": err_msg} |
251 |
| - |
252 |
| - popular_packages = [] |
253 |
| - try: |
254 |
| - with open(self.popular_packages_path, encoding="utf-8") as file: |
255 |
| - popular_packages = file.read().splitlines() |
256 |
| - except OSError as exception: |
257 |
| - err_msg = f"Could not read popular packages file {self.popular_packages_path}: {exception}" |
258 |
| - logger.error(err_msg) |
259 |
| - return HeuristicResult.SKIP, {"error": err_msg} |
260 |
| - |
261 |
| - if not popular_packages: |
262 |
| - err_msg = f"Popular packages file is empty: {self.popular_packages_path}" |
| 268 | + if not self.popular_packages: |
| 269 | + err_msg = "Popular packages file is empty" |
263 | 270 | logger.warning(err_msg)
|
264 | 271 | return HeuristicResult.SKIP, {"error": err_msg}
|
265 | 272 |
|
266 | 273 | package_name = pypi_package_json.component_name
|
267 |
| - for popular_package in popular_packages: |
| 274 | + for popular_package in self.popular_packages: |
268 | 275 | # If there is a popular packages file, check if the package name is similar to any of them.
|
269 | 276 | if package_name == popular_package:
|
270 | 277 | return HeuristicResult.PASS, {"package_name": package_name}
|
|
0 commit comments