Skip to content

Commit 0e8f909

Browse files
committed
CM-52497 - Fix SCA folder excluder
1 parent d0e9a8e commit 0e8f909

File tree

3 files changed

+123
-6
lines changed

3 files changed

+123
-6
lines changed

cycode/cli/consts.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@
105105
'conan.lock',
106106
)
107107

108-
SCA_EXCLUDED_PATHS = (
108+
SCA_EXCLUDED_FOLDER_IN_PATH = (
109109
'node_modules',
110110
'venv',
111111
'.venv',

cycode/cli/files_collector/file_excluder.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from pathlib import Path
12
from typing import TYPE_CHECKING
23

34
from cycode.cli import consts
@@ -40,11 +41,13 @@ def _does_document_exceed_max_size_limit(content: str) -> bool:
4041

4142

4243
def _is_file_relevant_for_sca_scan(filename: str) -> bool:
43-
if any(sca_excluded_path in filename for sca_excluded_path in consts.SCA_EXCLUDED_PATHS):
44-
logger.debug(
45-
'The file is irrelevant because it is from the inner path of node_modules, %s', {'filename': filename}
46-
)
47-
return False
44+
for part in Path(filename).parts:
45+
if part in consts.SCA_EXCLUDED_FOLDER_IN_PATH:
46+
logger.debug(
47+
'The file is irrelevant because it is from an excluded directory, %s',
48+
{'filename': filename, 'excluded_directory': part},
49+
)
50+
return False
4851

4952
return True
5053

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
import pytest
2+
3+
from cycode.cli import consts
4+
from cycode.cli.files_collector.file_excluder import _is_file_relevant_for_sca_scan
5+
6+
7+
class TestIsFileRelevantForScaScan:
8+
"""Test the SCA path exclusion logic."""
9+
10+
def test_files_in_excluded_directories_should_be_excluded(self) -> None:
11+
"""Test that files inside excluded directories are properly excluded."""
12+
13+
# Test node_modules exclusion
14+
assert _is_file_relevant_for_sca_scan('project/node_modules/package/index.js') is False
15+
assert _is_file_relevant_for_sca_scan('/project/node_modules/package.json') is False
16+
assert _is_file_relevant_for_sca_scan('deep/nested/node_modules/lib/file.txt') is False
17+
18+
# Test .gradle exclusion
19+
assert _is_file_relevant_for_sca_scan('project/.gradle/wrapper/gradle-wrapper.jar') is False
20+
assert _is_file_relevant_for_sca_scan('/home/user/.gradle/caches/modules.xml') is False
21+
22+
# Test venv exclusion
23+
assert _is_file_relevant_for_sca_scan('project/venv/lib/python3.8/site-packages/module.py') is False
24+
assert _is_file_relevant_for_sca_scan('/home/user/venv/bin/activate') is False
25+
26+
# Test __pycache__ exclusion
27+
assert _is_file_relevant_for_sca_scan('src/__pycache__/module.cpython-38.pyc') is False
28+
assert _is_file_relevant_for_sca_scan('project/utils/__pycache__/helper.pyc') is False
29+
30+
def test_files_with_excluded_names_in_filename_should_be_included(self) -> None:
31+
"""Test that files containing excluded directory names in their filename are NOT excluded."""
32+
33+
# These should be INCLUDED because the excluded terms are in the filename, not directory path
34+
assert _is_file_relevant_for_sca_scan('project/build.gradle') is True
35+
assert _is_file_relevant_for_sca_scan('project/gradlew') is True
36+
assert _is_file_relevant_for_sca_scan('app/node_modules_backup.txt') is True
37+
assert _is_file_relevant_for_sca_scan('src/venv_setup.py') is True
38+
assert _is_file_relevant_for_sca_scan('utils/pycache_cleaner.py') is True
39+
assert _is_file_relevant_for_sca_scan('config/gradle_config.xml') is True
40+
41+
def test_files_in_regular_directories_should_be_included(self) -> None:
42+
"""Test that files in regular directories (not excluded) are included."""
43+
44+
assert _is_file_relevant_for_sca_scan('project/src/main.py') is True
45+
assert _is_file_relevant_for_sca_scan('app/components/button.tsx') is True
46+
assert _is_file_relevant_for_sca_scan('/home/user/project/package.json') is True
47+
assert _is_file_relevant_for_sca_scan('build/dist/app.js') is True
48+
assert _is_file_relevant_for_sca_scan('tests/unit/test_utils.py') is True
49+
50+
def test_multiple_excluded_directories_in_path(self) -> None:
51+
"""Test paths that contain multiple excluded directories."""
52+
53+
# Should be excluded if ANY directory in the path is excluded
54+
assert _is_file_relevant_for_sca_scan('project/venv/lib/node_modules/package.json') is False
55+
assert _is_file_relevant_for_sca_scan('app/node_modules/dep/.gradle/build.xml') is False
56+
assert _is_file_relevant_for_sca_scan('src/__pycache__/nested/venv/file.py') is False
57+
58+
def test_absolute_vs_relative_paths(self) -> None:
59+
"""Test both absolute and relative path formats."""
60+
61+
# Relative paths
62+
assert _is_file_relevant_for_sca_scan('node_modules/package.json') is False
63+
assert _is_file_relevant_for_sca_scan('src/app.py') is True
64+
65+
# Absolute paths
66+
assert _is_file_relevant_for_sca_scan('/home/user/project/node_modules/lib.js') is False
67+
assert _is_file_relevant_for_sca_scan('/home/user/project/src/main.py') is True
68+
69+
def test_edge_cases(self) -> None:
70+
"""Test edge cases and boundary conditions."""
71+
72+
# Empty string should be considered relevant (no path to exclude)
73+
assert _is_file_relevant_for_sca_scan('') is True
74+
# Single filename without a directory
75+
assert _is_file_relevant_for_sca_scan('package.json') is True
76+
# Root-level excluded directory
77+
assert _is_file_relevant_for_sca_scan('/node_modules/package.json') is False
78+
# Excluded directory as part of the filename but in allowed directory
79+
assert _is_file_relevant_for_sca_scan('src/my_node_modules_file.js') is True
80+
81+
def test_case_sensitivity(self) -> None:
82+
"""Test that directory matching is case-sensitive."""
83+
84+
# Excluded directories are lowercase, so uppercase versions should be included
85+
assert _is_file_relevant_for_sca_scan('project/NODE_MODULES/package.json') is True
86+
assert _is_file_relevant_for_sca_scan('project/Node_Modules/lib.js') is True
87+
assert _is_file_relevant_for_sca_scan('project/VENV/lib/module.py') is True
88+
89+
# But exact case matches should be excluded
90+
assert _is_file_relevant_for_sca_scan('project/node_modules/package.json') is False
91+
assert _is_file_relevant_for_sca_scan('project/venv/lib/module.py') is False
92+
93+
def test_nested_excluded_directories(self) -> None:
94+
"""Test deeply nested directory structures with excluded directories."""
95+
96+
# Deep nesting should still work
97+
deep_path = 'a/b/c/d/e/f/g/node_modules/h/i/j/package.json'
98+
assert _is_file_relevant_for_sca_scan(deep_path) is False
99+
100+
# Multiple levels of excluded directories
101+
multi_excluded = 'project/node_modules/package/venv/lib/__pycache__/module.pyc'
102+
assert _is_file_relevant_for_sca_scan(multi_excluded) is False
103+
104+
@pytest.mark.parametrize('excluded_dir', consts.SCA_EXCLUDED_FOLDER_IN_PATH)
105+
def test_parametrized_excluded_directories(self, excluded_dir: str) -> None:
106+
"""Parametrized test to ensure all excluded directories work correctly."""
107+
108+
# File inside excluded directory should be excluded
109+
excluded_path = f'project/{excluded_dir}/file.txt'
110+
assert _is_file_relevant_for_sca_scan(excluded_path) is False
111+
112+
# File with excluded directory name in filename should be included
113+
included_path = f'project/src/{excluded_dir}_config.txt'
114+
assert _is_file_relevant_for_sca_scan(included_path) is True

0 commit comments

Comments
 (0)