Skip to content

Commit c861b40

Browse files
authored
CM-48095 - Add new files filter for SAST (#311)
1 parent c81407b commit c861b40

File tree

12 files changed

+236
-138
lines changed

12 files changed

+236
-138
lines changed

cycode/cli/apps/scan/code_scanner.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from cycode.cli.console import console
1616
from cycode.cli.exceptions import custom_exceptions
1717
from cycode.cli.exceptions.handle_scan_errors import handle_scan_exception
18-
from cycode.cli.files_collector.excluder import exclude_irrelevant_documents_to_scan
18+
from cycode.cli.files_collector.excluder import excluder
1919
from cycode.cli.files_collector.models.in_memory_zip import InMemoryZip
2020
from cycode.cli.files_collector.path_documents import get_relevant_documents
2121
from cycode.cli.files_collector.repository_documents import (
@@ -56,8 +56,8 @@ def scan_sca_pre_commit(ctx: typer.Context, repo_path: str) -> None:
5656
progress_bar_section=ScanProgressBarSection.PREPARE_LOCAL_FILES,
5757
repo_path=repo_path,
5858
)
59-
git_head_documents = exclude_irrelevant_documents_to_scan(scan_type, git_head_documents)
60-
pre_committed_documents = exclude_irrelevant_documents_to_scan(scan_type, pre_committed_documents)
59+
git_head_documents = excluder.exclude_irrelevant_documents_to_scan(scan_type, git_head_documents)
60+
pre_committed_documents = excluder.exclude_irrelevant_documents_to_scan(scan_type, pre_committed_documents)
6161
sca_code_scanner.perform_pre_hook_range_scan_actions(repo_path, git_head_documents, pre_committed_documents)
6262
scan_commit_range_documents(
6363
ctx,
@@ -77,8 +77,8 @@ def scan_sca_commit_range(ctx: typer.Context, path: str, commit_range: str) -> N
7777
from_commit_documents, to_commit_documents = get_commit_range_modified_documents(
7878
progress_bar, ScanProgressBarSection.PREPARE_LOCAL_FILES, path, from_commit_rev, to_commit_rev
7979
)
80-
from_commit_documents = exclude_irrelevant_documents_to_scan(scan_type, from_commit_documents)
81-
to_commit_documents = exclude_irrelevant_documents_to_scan(scan_type, to_commit_documents)
80+
from_commit_documents = excluder.exclude_irrelevant_documents_to_scan(scan_type, from_commit_documents)
81+
to_commit_documents = excluder.exclude_irrelevant_documents_to_scan(scan_type, to_commit_documents)
8282
sca_code_scanner.perform_pre_commit_range_scan_actions(
8383
path, from_commit_documents, from_commit_rev, to_commit_documents, to_commit_rev
8484
)
@@ -288,7 +288,7 @@ def scan_commit_range(
288288
{'path': path, 'commit_range': commit_range, 'commit_id': commit_id},
289289
)
290290

291-
documents_to_scan.extend(exclude_irrelevant_documents_to_scan(scan_type, commit_documents_to_scan))
291+
documents_to_scan.extend(excluder.exclude_irrelevant_documents_to_scan(scan_type, commit_documents_to_scan))
292292

293293
logger.debug('List of commit ids to scan, %s', {'commit_ids': commit_ids_to_scan})
294294
logger.debug('Starting to scan commit range (it may take a few minutes)')

cycode/cli/apps/scan/pre_commit/pre_commit_command.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
from cycode.cli import consts
77
from cycode.cli.apps.scan.code_scanner import get_scan_parameters, scan_documents, scan_sca_pre_commit
8-
from cycode.cli.files_collector.excluder import exclude_irrelevant_documents_to_scan
8+
from cycode.cli.files_collector.excluder import excluder
99
from cycode.cli.files_collector.repository_documents import (
1010
get_diff_file_content,
1111
get_diff_file_path,
@@ -45,5 +45,5 @@ def pre_commit_command(
4545
progress_bar.update(ScanProgressBarSection.PREPARE_LOCAL_FILES)
4646
documents_to_scan.append(Document(get_path_by_os(get_diff_file_path(file)), get_diff_file_content(file)))
4747

48-
documents_to_scan = exclude_irrelevant_documents_to_scan(scan_type, documents_to_scan)
48+
documents_to_scan = excluder.exclude_irrelevant_documents_to_scan(scan_type, documents_to_scan)
4949
scan_documents(ctx, documents_to_scan, get_scan_parameters(ctx), is_git_diff=True)

cycode/cli/apps/scan/repository/repository_command.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from cycode.cli import consts
88
from cycode.cli.apps.scan.code_scanner import get_scan_parameters, scan_documents
99
from cycode.cli.exceptions.handle_scan_errors import handle_scan_exception
10-
from cycode.cli.files_collector.excluder import exclude_irrelevant_documents_to_scan
10+
from cycode.cli.files_collector.excluder import excluder
1111
from cycode.cli.files_collector.repository_documents import get_git_repository_tree_file_entries
1212
from cycode.cli.files_collector.sca.sca_code_scanner import perform_pre_scan_documents_actions
1313
from cycode.cli.logger import logger
@@ -57,7 +57,7 @@ def repository_command(
5757
)
5858
)
5959

60-
documents_to_scan = exclude_irrelevant_documents_to_scan(scan_type, documents_to_scan)
60+
documents_to_scan = excluder.exclude_irrelevant_documents_to_scan(scan_type, documents_to_scan)
6161

6262
perform_pre_scan_documents_actions(ctx, scan_type, documents_to_scan)
6363

cycode/cli/apps/scan/scan_command.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
ISSUE_DETECTED_STATUS_CODE,
1010
NO_ISSUES_STATUS_CODE,
1111
)
12+
from cycode.cli.files_collector.excluder import excluder
1213
from cycode.cli.utils import scan_utils
1314
from cycode.cli.utils.get_api_client import get_scan_cycode_client
1415
from cycode.cli.utils.sentry import add_breadcrumb
@@ -138,13 +139,19 @@ def scan_command(
138139

139140
ctx.obj['show_secret'] = show_secret
140141
ctx.obj['soft_fail'] = soft_fail
141-
ctx.obj['client'] = get_scan_cycode_client(ctx)
142142
ctx.obj['scan_type'] = scan_type
143143
ctx.obj['sync'] = sync
144144
ctx.obj['severity_threshold'] = severity_threshold
145145
ctx.obj['monitor'] = monitor
146146
ctx.obj['report'] = report
147147

148+
scan_client = get_scan_cycode_client(ctx)
149+
ctx.obj['client'] = scan_client
150+
151+
remote_scan_config = scan_client.get_scan_configuration_safe(scan_type)
152+
if remote_scan_config:
153+
excluder.apply_scan_config(str(scan_type), remote_scan_config)
154+
148155
if export_type and export_file:
149156
console_printer = ctx.obj['console_printer']
150157
console_printer.enable_recording(export_type, export_file)

cycode/cli/files_collector/excluder.py

Lines changed: 124 additions & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -10,36 +10,12 @@
1010
if TYPE_CHECKING:
1111
from cycode.cli.models import Document
1212
from cycode.cli.utils.progress_bar import BaseProgressBar, ProgressBarSection
13+
from cycode.cyclient import models
1314

1415

1516
logger = get_logger('File Excluder')
1617

1718

18-
def exclude_irrelevant_files(
19-
progress_bar: 'BaseProgressBar', progress_bar_section: 'ProgressBarSection', scan_type: str, filenames: list[str]
20-
) -> list[str]:
21-
relevant_files = []
22-
for filename in filenames:
23-
progress_bar.update(progress_bar_section)
24-
if _is_relevant_file_to_scan(scan_type, filename):
25-
relevant_files.append(filename)
26-
27-
is_sub_path.cache_clear() # free up memory
28-
29-
return relevant_files
30-
31-
32-
def exclude_irrelevant_documents_to_scan(scan_type: str, documents_to_scan: list['Document']) -> list['Document']:
33-
logger.debug('Excluding irrelevant documents to scan')
34-
35-
relevant_documents = []
36-
for document in documents_to_scan:
37-
if _is_relevant_document_to_scan(scan_type, document.path, document.content):
38-
relevant_documents.append(document)
39-
40-
return relevant_documents
41-
42-
4319
def _is_subpath_of_cycode_configuration_folder(filename: str) -> bool:
4420
return (
4521
is_sub_path(configuration_manager.global_config_file_manager.get_config_directory_path(), filename)
@@ -63,43 +39,6 @@ def _does_document_exceed_max_size_limit(content: str) -> bool:
6339
return get_content_size(content) > consts.FILE_MAX_SIZE_LIMIT_IN_BYTES
6440

6541

66-
def _is_relevant_file_to_scan(scan_type: str, filename: str) -> bool:
67-
if _is_subpath_of_cycode_configuration_folder(filename):
68-
logger.debug(
69-
'The file is irrelevant because it is in the Cycode configuration directory, %s',
70-
{'filename': filename, 'configuration_directory': consts.CYCODE_CONFIGURATION_DIRECTORY},
71-
)
72-
return False
73-
74-
if _is_path_configured_in_exclusions(scan_type, filename):
75-
logger.debug('The file is irrelevant because its path is in the ignore paths list, %s', {'filename': filename})
76-
return False
77-
78-
if not _is_file_extension_supported(scan_type, filename):
79-
logger.debug(
80-
'The file is irrelevant because its extension is not supported, %s',
81-
{'scan_type': scan_type, 'filename': filename},
82-
)
83-
return False
84-
85-
if is_binary_file(filename):
86-
logger.debug('The file is irrelevant because it is a binary file, %s', {'filename': filename})
87-
return False
88-
89-
if scan_type != consts.SCA_SCAN_TYPE and _does_file_exceed_max_size_limit(filename):
90-
logger.debug(
91-
'The file is irrelevant because it has exceeded the maximum size limit, %s',
92-
{
93-
'max_file_size': consts.FILE_MAX_SIZE_LIMIT_IN_BYTES,
94-
'file_size': get_file_size(filename),
95-
'filename': filename,
96-
},
97-
)
98-
return False
99-
100-
return not (scan_type == consts.SCA_SCAN_TYPE and not _is_file_relevant_for_sca_scan(filename))
101-
102-
10342
def _is_file_relevant_for_sca_scan(filename: str) -> bool:
10443
if any(sca_excluded_path in filename for sca_excluded_path in consts.SCA_EXCLUDED_PATHS):
10544
logger.debug(
@@ -110,52 +49,126 @@ def _is_file_relevant_for_sca_scan(filename: str) -> bool:
11049
return True
11150

11251

113-
def _is_relevant_document_to_scan(scan_type: str, filename: str, content: str) -> bool:
114-
if _is_subpath_of_cycode_configuration_folder(filename):
115-
logger.debug(
116-
'The document is irrelevant because it is in the Cycode configuration directory, %s',
117-
{'filename': filename, 'configuration_directory': consts.CYCODE_CONFIGURATION_DIRECTORY},
118-
)
119-
return False
120-
121-
if _is_path_configured_in_exclusions(scan_type, filename):
122-
logger.debug(
123-
'The document is irrelevant because its path is in the ignore paths list, %s', {'filename': filename}
124-
)
125-
return False
126-
127-
if not _is_file_extension_supported(scan_type, filename):
128-
logger.debug(
129-
'The document is irrelevant because its extension is not supported, %s',
130-
{'scan_type': scan_type, 'filename': filename},
131-
)
132-
return False
133-
134-
if is_binary_content(content):
135-
logger.debug('The document is irrelevant because it is a binary file, %s', {'filename': filename})
136-
return False
137-
138-
if scan_type != consts.SCA_SCAN_TYPE and _does_document_exceed_max_size_limit(content):
139-
logger.debug(
140-
'The document is irrelevant because it has exceeded the maximum size limit, %s',
141-
{
142-
'max_document_size': consts.FILE_MAX_SIZE_LIMIT_IN_BYTES,
143-
'document_size': get_content_size(content),
144-
'filename': filename,
145-
},
146-
)
147-
return False
148-
149-
return True
150-
151-
152-
def _is_file_extension_supported(scan_type: str, filename: str) -> bool:
153-
filename = filename.lower()
154-
155-
if scan_type == consts.IAC_SCAN_TYPE:
156-
return filename.endswith(consts.IAC_SCAN_SUPPORTED_FILES)
157-
158-
if scan_type == consts.SCA_SCAN_TYPE:
159-
return filename.endswith(consts.SCA_CONFIGURATION_SCAN_SUPPORTED_FILES)
160-
161-
return not filename.endswith(consts.SECRET_SCAN_FILE_EXTENSIONS_TO_IGNORE)
52+
class Excluder:
53+
def __init__(self) -> None:
54+
self._scannable_extensions: dict[str, tuple[str, ...]] = {
55+
consts.IAC_SCAN_TYPE: consts.IAC_SCAN_SUPPORTED_FILES,
56+
consts.SCA_SCAN_TYPE: consts.SCA_CONFIGURATION_SCAN_SUPPORTED_FILES,
57+
}
58+
self._non_scannable_extensions: dict[str, tuple[str, ...]] = {
59+
consts.SECRET_SCAN_TYPE: consts.SECRET_SCAN_FILE_EXTENSIONS_TO_IGNORE,
60+
}
61+
62+
def apply_scan_config(self, scan_type: str, scan_config: 'models.ScanConfiguration') -> None:
63+
if scan_config.scannable_extensions:
64+
self._scannable_extensions[scan_type] = tuple(scan_config.scannable_extensions)
65+
66+
def _is_file_extension_supported(self, scan_type: str, filename: str) -> bool:
67+
filename = filename.lower()
68+
69+
scannable_extensions = self._scannable_extensions.get(scan_type)
70+
if scannable_extensions:
71+
return filename.endswith(scannable_extensions)
72+
73+
non_scannable_extensions = self._non_scannable_extensions.get(scan_type)
74+
if non_scannable_extensions:
75+
return not filename.endswith(non_scannable_extensions)
76+
77+
return True
78+
79+
def _is_relevant_file_to_scan_common(self, scan_type: str, filename: str) -> bool:
80+
if _is_subpath_of_cycode_configuration_folder(filename):
81+
logger.debug(
82+
'The document is irrelevant because it is in the Cycode configuration directory, %s',
83+
{'filename': filename, 'configuration_directory': consts.CYCODE_CONFIGURATION_DIRECTORY},
84+
)
85+
return False
86+
87+
if _is_path_configured_in_exclusions(scan_type, filename):
88+
logger.debug(
89+
'The document is irrelevant because its path is in the ignore paths list, %s', {'filename': filename}
90+
)
91+
return False
92+
93+
if not self._is_file_extension_supported(scan_type, filename):
94+
logger.debug(
95+
'The document is irrelevant because its extension is not supported, %s',
96+
{'scan_type': scan_type, 'filename': filename},
97+
)
98+
return False
99+
100+
return True
101+
102+
def _is_relevant_file_to_scan(self, scan_type: str, filename: str) -> bool:
103+
if not self._is_relevant_file_to_scan_common(scan_type, filename):
104+
return False
105+
106+
if is_binary_file(filename):
107+
logger.debug('The file is irrelevant because it is a binary file, %s', {'filename': filename})
108+
return False
109+
110+
if scan_type != consts.SCA_SCAN_TYPE and _does_file_exceed_max_size_limit(filename):
111+
logger.debug(
112+
'The file is irrelevant because it has exceeded the maximum size limit, %s',
113+
{
114+
'max_file_size': consts.FILE_MAX_SIZE_LIMIT_IN_BYTES,
115+
'file_size': get_file_size(filename),
116+
'filename': filename,
117+
},
118+
)
119+
return False
120+
121+
return not (scan_type == consts.SCA_SCAN_TYPE and not _is_file_relevant_for_sca_scan(filename))
122+
123+
def _is_relevant_document_to_scan(self, scan_type: str, filename: str, content: str) -> bool:
124+
if not self._is_relevant_file_to_scan_common(scan_type, filename):
125+
return False
126+
127+
if is_binary_content(content):
128+
logger.debug('The document is irrelevant because it is a binary file, %s', {'filename': filename})
129+
return False
130+
131+
if scan_type != consts.SCA_SCAN_TYPE and _does_document_exceed_max_size_limit(content):
132+
logger.debug(
133+
'The document is irrelevant because it has exceeded the maximum size limit, %s',
134+
{
135+
'max_document_size': consts.FILE_MAX_SIZE_LIMIT_IN_BYTES,
136+
'document_size': get_content_size(content),
137+
'filename': filename,
138+
},
139+
)
140+
return False
141+
142+
return True
143+
144+
def exclude_irrelevant_files(
145+
self,
146+
progress_bar: 'BaseProgressBar',
147+
progress_bar_section: 'ProgressBarSection',
148+
scan_type: str,
149+
filenames: list[str],
150+
) -> list[str]:
151+
relevant_files = []
152+
for filename in filenames:
153+
progress_bar.update(progress_bar_section)
154+
if self._is_relevant_file_to_scan(scan_type, filename):
155+
relevant_files.append(filename)
156+
157+
is_sub_path.cache_clear() # free up memory
158+
159+
return relevant_files
160+
161+
def exclude_irrelevant_documents_to_scan(
162+
self, scan_type: str, documents_to_scan: list['Document']
163+
) -> list['Document']:
164+
logger.debug('Excluding irrelevant documents to scan')
165+
166+
relevant_documents = []
167+
for document in documents_to_scan:
168+
if self._is_relevant_document_to_scan(scan_type, document.path, document.content):
169+
relevant_documents.append(document)
170+
171+
return relevant_documents
172+
173+
174+
excluder = Excluder()

0 commit comments

Comments
 (0)