1010if TYPE_CHECKING :
1111 from cycode .cli .models import Document
1212 from cycode .cli .utils .progress_bar import BaseProgressBar , ProgressBarSection
13+ from cycode .cyclient import models
1314
1415
1516logger = get_logger ('File Excluder' )
1617
1718
18- def exclude_irrelevant_files (
19- progress_bar : 'BaseProgressBar' , progress_bar_section : 'ProgressBarSection' , scan_type : str , filenames : list [str ]
20- ) -> list [str ]:
21- relevant_files = []
22- for filename in filenames :
23- progress_bar .update (progress_bar_section )
24- if _is_relevant_file_to_scan (scan_type , filename ):
25- relevant_files .append (filename )
26-
27- is_sub_path .cache_clear () # free up memory
28-
29- return relevant_files
30-
31-
32- def exclude_irrelevant_documents_to_scan (scan_type : str , documents_to_scan : list ['Document' ]) -> list ['Document' ]:
33- logger .debug ('Excluding irrelevant documents to scan' )
34-
35- relevant_documents = []
36- for document in documents_to_scan :
37- if _is_relevant_document_to_scan (scan_type , document .path , document .content ):
38- relevant_documents .append (document )
39-
40- return relevant_documents
41-
42-
4319def _is_subpath_of_cycode_configuration_folder (filename : str ) -> bool :
4420 return (
4521 is_sub_path (configuration_manager .global_config_file_manager .get_config_directory_path (), filename )
@@ -63,43 +39,6 @@ def _does_document_exceed_max_size_limit(content: str) -> bool:
6339 return get_content_size (content ) > consts .FILE_MAX_SIZE_LIMIT_IN_BYTES
6440
6541
66- def _is_relevant_file_to_scan (scan_type : str , filename : str ) -> bool :
67- if _is_subpath_of_cycode_configuration_folder (filename ):
68- logger .debug (
69- 'The file is irrelevant because it is in the Cycode configuration directory, %s' ,
70- {'filename' : filename , 'configuration_directory' : consts .CYCODE_CONFIGURATION_DIRECTORY },
71- )
72- return False
73-
74- if _is_path_configured_in_exclusions (scan_type , filename ):
75- logger .debug ('The file is irrelevant because its path is in the ignore paths list, %s' , {'filename' : filename })
76- return False
77-
78- if not _is_file_extension_supported (scan_type , filename ):
79- logger .debug (
80- 'The file is irrelevant because its extension is not supported, %s' ,
81- {'scan_type' : scan_type , 'filename' : filename },
82- )
83- return False
84-
85- if is_binary_file (filename ):
86- logger .debug ('The file is irrelevant because it is a binary file, %s' , {'filename' : filename })
87- return False
88-
89- if scan_type != consts .SCA_SCAN_TYPE and _does_file_exceed_max_size_limit (filename ):
90- logger .debug (
91- 'The file is irrelevant because it has exceeded the maximum size limit, %s' ,
92- {
93- 'max_file_size' : consts .FILE_MAX_SIZE_LIMIT_IN_BYTES ,
94- 'file_size' : get_file_size (filename ),
95- 'filename' : filename ,
96- },
97- )
98- return False
99-
100- return not (scan_type == consts .SCA_SCAN_TYPE and not _is_file_relevant_for_sca_scan (filename ))
101-
102-
10342def _is_file_relevant_for_sca_scan (filename : str ) -> bool :
10443 if any (sca_excluded_path in filename for sca_excluded_path in consts .SCA_EXCLUDED_PATHS ):
10544 logger .debug (
@@ -110,52 +49,126 @@ def _is_file_relevant_for_sca_scan(filename: str) -> bool:
11049 return True
11150
11251
113- def _is_relevant_document_to_scan (scan_type : str , filename : str , content : str ) -> bool :
114- if _is_subpath_of_cycode_configuration_folder (filename ):
115- logger .debug (
116- 'The document is irrelevant because it is in the Cycode configuration directory, %s' ,
117- {'filename' : filename , 'configuration_directory' : consts .CYCODE_CONFIGURATION_DIRECTORY },
118- )
119- return False
120-
121- if _is_path_configured_in_exclusions (scan_type , filename ):
122- logger .debug (
123- 'The document is irrelevant because its path is in the ignore paths list, %s' , {'filename' : filename }
124- )
125- return False
126-
127- if not _is_file_extension_supported (scan_type , filename ):
128- logger .debug (
129- 'The document is irrelevant because its extension is not supported, %s' ,
130- {'scan_type' : scan_type , 'filename' : filename },
131- )
132- return False
133-
134- if is_binary_content (content ):
135- logger .debug ('The document is irrelevant because it is a binary file, %s' , {'filename' : filename })
136- return False
137-
138- if scan_type != consts .SCA_SCAN_TYPE and _does_document_exceed_max_size_limit (content ):
139- logger .debug (
140- 'The document is irrelevant because it has exceeded the maximum size limit, %s' ,
141- {
142- 'max_document_size' : consts .FILE_MAX_SIZE_LIMIT_IN_BYTES ,
143- 'document_size' : get_content_size (content ),
144- 'filename' : filename ,
145- },
146- )
147- return False
148-
149- return True
150-
151-
152- def _is_file_extension_supported (scan_type : str , filename : str ) -> bool :
153- filename = filename .lower ()
154-
155- if scan_type == consts .IAC_SCAN_TYPE :
156- return filename .endswith (consts .IAC_SCAN_SUPPORTED_FILES )
157-
158- if scan_type == consts .SCA_SCAN_TYPE :
159- return filename .endswith (consts .SCA_CONFIGURATION_SCAN_SUPPORTED_FILES )
160-
161- return not filename .endswith (consts .SECRET_SCAN_FILE_EXTENSIONS_TO_IGNORE )
52+ class Excluder :
53+ def __init__ (self ) -> None :
54+ self ._scannable_extensions : dict [str , tuple [str , ...]] = {
55+ consts .IAC_SCAN_TYPE : consts .IAC_SCAN_SUPPORTED_FILES ,
56+ consts .SCA_SCAN_TYPE : consts .SCA_CONFIGURATION_SCAN_SUPPORTED_FILES ,
57+ }
58+ self ._non_scannable_extensions : dict [str , tuple [str , ...]] = {
59+ consts .SECRET_SCAN_TYPE : consts .SECRET_SCAN_FILE_EXTENSIONS_TO_IGNORE ,
60+ }
61+
62+ def apply_scan_config (self , scan_type : str , scan_config : 'models.ScanConfiguration' ) -> None :
63+ if scan_config .scannable_extensions :
64+ self ._scannable_extensions [scan_type ] = tuple (scan_config .scannable_extensions )
65+
66+ def _is_file_extension_supported (self , scan_type : str , filename : str ) -> bool :
67+ filename = filename .lower ()
68+
69+ scannable_extensions = self ._scannable_extensions .get (scan_type )
70+ if scannable_extensions :
71+ return filename .endswith (scannable_extensions )
72+
73+ non_scannable_extensions = self ._non_scannable_extensions .get (scan_type )
74+ if non_scannable_extensions :
75+ return not filename .endswith (non_scannable_extensions )
76+
77+ return True
78+
79+ def _is_relevant_file_to_scan_common (self , scan_type : str , filename : str ) -> bool :
80+ if _is_subpath_of_cycode_configuration_folder (filename ):
81+ logger .debug (
82+ 'The document is irrelevant because it is in the Cycode configuration directory, %s' ,
83+ {'filename' : filename , 'configuration_directory' : consts .CYCODE_CONFIGURATION_DIRECTORY },
84+ )
85+ return False
86+
87+ if _is_path_configured_in_exclusions (scan_type , filename ):
88+ logger .debug (
89+ 'The document is irrelevant because its path is in the ignore paths list, %s' , {'filename' : filename }
90+ )
91+ return False
92+
93+ if not self ._is_file_extension_supported (scan_type , filename ):
94+ logger .debug (
95+ 'The document is irrelevant because its extension is not supported, %s' ,
96+ {'scan_type' : scan_type , 'filename' : filename },
97+ )
98+ return False
99+
100+ return True
101+
102+ def _is_relevant_file_to_scan (self , scan_type : str , filename : str ) -> bool :
103+ if not self ._is_relevant_file_to_scan_common (scan_type , filename ):
104+ return False
105+
106+ if is_binary_file (filename ):
107+ logger .debug ('The file is irrelevant because it is a binary file, %s' , {'filename' : filename })
108+ return False
109+
110+ if scan_type != consts .SCA_SCAN_TYPE and _does_file_exceed_max_size_limit (filename ):
111+ logger .debug (
112+ 'The file is irrelevant because it has exceeded the maximum size limit, %s' ,
113+ {
114+ 'max_file_size' : consts .FILE_MAX_SIZE_LIMIT_IN_BYTES ,
115+ 'file_size' : get_file_size (filename ),
116+ 'filename' : filename ,
117+ },
118+ )
119+ return False
120+
121+ return not (scan_type == consts .SCA_SCAN_TYPE and not _is_file_relevant_for_sca_scan (filename ))
122+
123+ def _is_relevant_document_to_scan (self , scan_type : str , filename : str , content : str ) -> bool :
124+ if not self ._is_relevant_file_to_scan_common (scan_type , filename ):
125+ return False
126+
127+ if is_binary_content (content ):
128+ logger .debug ('The document is irrelevant because it is a binary file, %s' , {'filename' : filename })
129+ return False
130+
131+ if scan_type != consts .SCA_SCAN_TYPE and _does_document_exceed_max_size_limit (content ):
132+ logger .debug (
133+ 'The document is irrelevant because it has exceeded the maximum size limit, %s' ,
134+ {
135+ 'max_document_size' : consts .FILE_MAX_SIZE_LIMIT_IN_BYTES ,
136+ 'document_size' : get_content_size (content ),
137+ 'filename' : filename ,
138+ },
139+ )
140+ return False
141+
142+ return True
143+
144+ def exclude_irrelevant_files (
145+ self ,
146+ progress_bar : 'BaseProgressBar' ,
147+ progress_bar_section : 'ProgressBarSection' ,
148+ scan_type : str ,
149+ filenames : list [str ],
150+ ) -> list [str ]:
151+ relevant_files = []
152+ for filename in filenames :
153+ progress_bar .update (progress_bar_section )
154+ if self ._is_relevant_file_to_scan (scan_type , filename ):
155+ relevant_files .append (filename )
156+
157+ is_sub_path .cache_clear () # free up memory
158+
159+ return relevant_files
160+
161+ def exclude_irrelevant_documents_to_scan (
162+ self , scan_type : str , documents_to_scan : list ['Document' ]
163+ ) -> list ['Document' ]:
164+ logger .debug ('Excluding irrelevant documents to scan' )
165+
166+ relevant_documents = []
167+ for document in documents_to_scan :
168+ if self ._is_relevant_document_to_scan (scan_type , document .path , document .content ):
169+ relevant_documents .append (document )
170+
171+ return relevant_documents
172+
173+
174+ excluder = Excluder ()
0 commit comments