10
10
if TYPE_CHECKING :
11
11
from cycode .cli .models import Document
12
12
from cycode .cli .utils .progress_bar import BaseProgressBar , ProgressBarSection
13
+ from cycode .cyclient import models
13
14
14
15
15
16
logger = get_logger ('File Excluder' )
16
17
17
18
18
- def exclude_irrelevant_files (
19
- progress_bar : 'BaseProgressBar' , progress_bar_section : 'ProgressBarSection' , scan_type : str , filenames : list [str ]
20
- ) -> list [str ]:
21
- relevant_files = []
22
- for filename in filenames :
23
- progress_bar .update (progress_bar_section )
24
- if _is_relevant_file_to_scan (scan_type , filename ):
25
- relevant_files .append (filename )
26
-
27
- is_sub_path .cache_clear () # free up memory
28
-
29
- return relevant_files
30
-
31
-
32
- def exclude_irrelevant_documents_to_scan (scan_type : str , documents_to_scan : list ['Document' ]) -> list ['Document' ]:
33
- logger .debug ('Excluding irrelevant documents to scan' )
34
-
35
- relevant_documents = []
36
- for document in documents_to_scan :
37
- if _is_relevant_document_to_scan (scan_type , document .path , document .content ):
38
- relevant_documents .append (document )
39
-
40
- return relevant_documents
41
-
42
-
43
19
def _is_subpath_of_cycode_configuration_folder (filename : str ) -> bool :
44
20
return (
45
21
is_sub_path (configuration_manager .global_config_file_manager .get_config_directory_path (), filename )
@@ -63,43 +39,6 @@ def _does_document_exceed_max_size_limit(content: str) -> bool:
63
39
return get_content_size (content ) > consts .FILE_MAX_SIZE_LIMIT_IN_BYTES
64
40
65
41
66
- def _is_relevant_file_to_scan (scan_type : str , filename : str ) -> bool :
67
- if _is_subpath_of_cycode_configuration_folder (filename ):
68
- logger .debug (
69
- 'The file is irrelevant because it is in the Cycode configuration directory, %s' ,
70
- {'filename' : filename , 'configuration_directory' : consts .CYCODE_CONFIGURATION_DIRECTORY },
71
- )
72
- return False
73
-
74
- if _is_path_configured_in_exclusions (scan_type , filename ):
75
- logger .debug ('The file is irrelevant because its path is in the ignore paths list, %s' , {'filename' : filename })
76
- return False
77
-
78
- if not _is_file_extension_supported (scan_type , filename ):
79
- logger .debug (
80
- 'The file is irrelevant because its extension is not supported, %s' ,
81
- {'scan_type' : scan_type , 'filename' : filename },
82
- )
83
- return False
84
-
85
- if is_binary_file (filename ):
86
- logger .debug ('The file is irrelevant because it is a binary file, %s' , {'filename' : filename })
87
- return False
88
-
89
- if scan_type != consts .SCA_SCAN_TYPE and _does_file_exceed_max_size_limit (filename ):
90
- logger .debug (
91
- 'The file is irrelevant because it has exceeded the maximum size limit, %s' ,
92
- {
93
- 'max_file_size' : consts .FILE_MAX_SIZE_LIMIT_IN_BYTES ,
94
- 'file_size' : get_file_size (filename ),
95
- 'filename' : filename ,
96
- },
97
- )
98
- return False
99
-
100
- return not (scan_type == consts .SCA_SCAN_TYPE and not _is_file_relevant_for_sca_scan (filename ))
101
-
102
-
103
42
def _is_file_relevant_for_sca_scan (filename : str ) -> bool :
104
43
if any (sca_excluded_path in filename for sca_excluded_path in consts .SCA_EXCLUDED_PATHS ):
105
44
logger .debug (
@@ -110,52 +49,126 @@ def _is_file_relevant_for_sca_scan(filename: str) -> bool:
110
49
return True
111
50
112
51
113
- def _is_relevant_document_to_scan (scan_type : str , filename : str , content : str ) -> bool :
114
- if _is_subpath_of_cycode_configuration_folder (filename ):
115
- logger .debug (
116
- 'The document is irrelevant because it is in the Cycode configuration directory, %s' ,
117
- {'filename' : filename , 'configuration_directory' : consts .CYCODE_CONFIGURATION_DIRECTORY },
118
- )
119
- return False
120
-
121
- if _is_path_configured_in_exclusions (scan_type , filename ):
122
- logger .debug (
123
- 'The document is irrelevant because its path is in the ignore paths list, %s' , {'filename' : filename }
124
- )
125
- return False
126
-
127
- if not _is_file_extension_supported (scan_type , filename ):
128
- logger .debug (
129
- 'The document is irrelevant because its extension is not supported, %s' ,
130
- {'scan_type' : scan_type , 'filename' : filename },
131
- )
132
- return False
133
-
134
- if is_binary_content (content ):
135
- logger .debug ('The document is irrelevant because it is a binary file, %s' , {'filename' : filename })
136
- return False
137
-
138
- if scan_type != consts .SCA_SCAN_TYPE and _does_document_exceed_max_size_limit (content ):
139
- logger .debug (
140
- 'The document is irrelevant because it has exceeded the maximum size limit, %s' ,
141
- {
142
- 'max_document_size' : consts .FILE_MAX_SIZE_LIMIT_IN_BYTES ,
143
- 'document_size' : get_content_size (content ),
144
- 'filename' : filename ,
145
- },
146
- )
147
- return False
148
-
149
- return True
150
-
151
-
152
- def _is_file_extension_supported (scan_type : str , filename : str ) -> bool :
153
- filename = filename .lower ()
154
-
155
- if scan_type == consts .IAC_SCAN_TYPE :
156
- return filename .endswith (consts .IAC_SCAN_SUPPORTED_FILES )
157
-
158
- if scan_type == consts .SCA_SCAN_TYPE :
159
- return filename .endswith (consts .SCA_CONFIGURATION_SCAN_SUPPORTED_FILES )
160
-
161
- return not filename .endswith (consts .SECRET_SCAN_FILE_EXTENSIONS_TO_IGNORE )
52
+ class Excluder :
53
+ def __init__ (self ) -> None :
54
+ self ._scannable_extensions : dict [str , tuple [str , ...]] = {
55
+ consts .IAC_SCAN_TYPE : consts .IAC_SCAN_SUPPORTED_FILES ,
56
+ consts .SCA_SCAN_TYPE : consts .SCA_CONFIGURATION_SCAN_SUPPORTED_FILES ,
57
+ }
58
+ self ._non_scannable_extensions : dict [str , tuple [str , ...]] = {
59
+ consts .SECRET_SCAN_TYPE : consts .SECRET_SCAN_FILE_EXTENSIONS_TO_IGNORE ,
60
+ }
61
+
62
+ def apply_scan_config (self , scan_type : str , scan_config : 'models.ScanConfiguration' ) -> None :
63
+ if scan_config .scannable_extensions :
64
+ self ._scannable_extensions [scan_type ] = tuple (scan_config .scannable_extensions )
65
+
66
+ def _is_file_extension_supported (self , scan_type : str , filename : str ) -> bool :
67
+ filename = filename .lower ()
68
+
69
+ scannable_extensions = self ._scannable_extensions .get (scan_type )
70
+ if scannable_extensions :
71
+ return filename .endswith (scannable_extensions )
72
+
73
+ non_scannable_extensions = self ._non_scannable_extensions .get (scan_type )
74
+ if non_scannable_extensions :
75
+ return not filename .endswith (non_scannable_extensions )
76
+
77
+ return True
78
+
79
+ def _is_relevant_file_to_scan_common (self , scan_type : str , filename : str ) -> bool :
80
+ if _is_subpath_of_cycode_configuration_folder (filename ):
81
+ logger .debug (
82
+ 'The document is irrelevant because it is in the Cycode configuration directory, %s' ,
83
+ {'filename' : filename , 'configuration_directory' : consts .CYCODE_CONFIGURATION_DIRECTORY },
84
+ )
85
+ return False
86
+
87
+ if _is_path_configured_in_exclusions (scan_type , filename ):
88
+ logger .debug (
89
+ 'The document is irrelevant because its path is in the ignore paths list, %s' , {'filename' : filename }
90
+ )
91
+ return False
92
+
93
+ if not self ._is_file_extension_supported (scan_type , filename ):
94
+ logger .debug (
95
+ 'The document is irrelevant because its extension is not supported, %s' ,
96
+ {'scan_type' : scan_type , 'filename' : filename },
97
+ )
98
+ return False
99
+
100
+ return True
101
+
102
+ def _is_relevant_file_to_scan (self , scan_type : str , filename : str ) -> bool :
103
+ if not self ._is_relevant_file_to_scan_common (scan_type , filename ):
104
+ return False
105
+
106
+ if is_binary_file (filename ):
107
+ logger .debug ('The file is irrelevant because it is a binary file, %s' , {'filename' : filename })
108
+ return False
109
+
110
+ if scan_type != consts .SCA_SCAN_TYPE and _does_file_exceed_max_size_limit (filename ):
111
+ logger .debug (
112
+ 'The file is irrelevant because it has exceeded the maximum size limit, %s' ,
113
+ {
114
+ 'max_file_size' : consts .FILE_MAX_SIZE_LIMIT_IN_BYTES ,
115
+ 'file_size' : get_file_size (filename ),
116
+ 'filename' : filename ,
117
+ },
118
+ )
119
+ return False
120
+
121
+ return not (scan_type == consts .SCA_SCAN_TYPE and not _is_file_relevant_for_sca_scan (filename ))
122
+
123
+ def _is_relevant_document_to_scan (self , scan_type : str , filename : str , content : str ) -> bool :
124
+ if not self ._is_relevant_file_to_scan_common (scan_type , filename ):
125
+ return False
126
+
127
+ if is_binary_content (content ):
128
+ logger .debug ('The document is irrelevant because it is a binary file, %s' , {'filename' : filename })
129
+ return False
130
+
131
+ if scan_type != consts .SCA_SCAN_TYPE and _does_document_exceed_max_size_limit (content ):
132
+ logger .debug (
133
+ 'The document is irrelevant because it has exceeded the maximum size limit, %s' ,
134
+ {
135
+ 'max_document_size' : consts .FILE_MAX_SIZE_LIMIT_IN_BYTES ,
136
+ 'document_size' : get_content_size (content ),
137
+ 'filename' : filename ,
138
+ },
139
+ )
140
+ return False
141
+
142
+ return True
143
+
144
+ def exclude_irrelevant_files (
145
+ self ,
146
+ progress_bar : 'BaseProgressBar' ,
147
+ progress_bar_section : 'ProgressBarSection' ,
148
+ scan_type : str ,
149
+ filenames : list [str ],
150
+ ) -> list [str ]:
151
+ relevant_files = []
152
+ for filename in filenames :
153
+ progress_bar .update (progress_bar_section )
154
+ if self ._is_relevant_file_to_scan (scan_type , filename ):
155
+ relevant_files .append (filename )
156
+
157
+ is_sub_path .cache_clear () # free up memory
158
+
159
+ return relevant_files
160
+
161
+ def exclude_irrelevant_documents_to_scan (
162
+ self , scan_type : str , documents_to_scan : list ['Document' ]
163
+ ) -> list ['Document' ]:
164
+ logger .debug ('Excluding irrelevant documents to scan' )
165
+
166
+ relevant_documents = []
167
+ for document in documents_to_scan :
168
+ if self ._is_relevant_document_to_scan (scan_type , document .path , document .content ):
169
+ relevant_documents .append (document )
170
+
171
+ return relevant_documents
172
+
173
+
174
+ excluder = Excluder ()
0 commit comments