11import os
2- from collections import defaultdict
3- from typing import TYPE_CHECKING , Iterable , List , Set , Tuple
2+ from typing import TYPE_CHECKING , Iterable , List , Optional , Tuple
43
54import pathspec
65
1110 is_iac ,
1211 is_tfplan_file ,
1312)
13+ from cycode .cli .files_collector .walk_ignore import walk_ignore
1414from cycode .cli .models import Document
1515from cycode .cli .utils .path_utils import get_absolute_path , get_file_content
1616from cycode .cyclient import logger
1919 from cycode .cli .utils .progress_bar import BaseProgressBar , ProgressBarSection
2020
2121
22- def _walk_to_top (path : str ) -> Iterable [str ]:
23- while os .path .dirname (path ) != path :
24- yield path
25- path = os .path .dirname (path )
22+ def _get_all_existing_files_in_directory (path : str , * , walk_with_ignore_patterns : bool = True ) -> List [str ]:
23+ files : List [str ] = []
2624
27- if path :
28- yield path # Include the top-level directory
29-
30-
31- _SUPPORTED_IGNORE_PATTERN_FILES = {'.gitignore' , '.cycodeignore' }
32-
33-
34- def _collect_top_level_ignore_files (path : str ) -> List [str ]:
35- ignore_files = []
36- for dir_path in _walk_to_top (path ):
37- for ignore_file in _SUPPORTED_IGNORE_PATTERN_FILES :
38- ignore_file_path = os .path .join (dir_path , ignore_file )
39- if os .path .exists (ignore_file_path ):
40- logger .debug ('Found top level ignore file: %s' , ignore_file_path )
41- ignore_files .append (ignore_file_path )
42- return ignore_files
43-
44-
45- def _get_global_ignore_patterns (path : str ) -> List [str ]:
46- ignore_patterns = []
47- for ignore_file in _collect_top_level_ignore_files (path ):
48- file_patterns = get_file_content (ignore_file ).splitlines ()
49- ignore_patterns .extend (file_patterns )
50- return ignore_patterns
51-
52-
53- def _apply_ignore_patterns (ignore_patterns : List [str ], files : Set [str ]) -> Set [str ]:
54- if not ignore_patterns :
55- return files
56-
57- path_spec = pathspec .PathSpec .from_lines (pathspec .patterns .GitWildMatchPattern , ignore_patterns )
58- excluded_file_paths = set (path_spec .match_files (files ))
59-
60- return files - excluded_file_paths
61-
62-
63- def _get_all_existing_files_in_directory (path : str , * , apply_ignore_patterns : bool = True ) -> Set [str ]:
64- files : Set [str ] = set ()
65-
66- global_ignore_patterns = _get_global_ignore_patterns (path )
67- path_to_ignore_patterns = defaultdict (list )
68-
69- for root , _ , filenames in os .walk (path ):
25+ walk_func = walk_ignore if walk_with_ignore_patterns else os .walk
26+ for root , _ , filenames in walk_func (path ):
7027 for filename in filenames :
71- filepath = os .path .join (root , filename )
72-
73- if filepath in _SUPPORTED_IGNORE_PATTERN_FILES :
74- logger .debug ('Found ignore file: %s' , filepath )
75- # TODO(MarshalX): accumulate ignore pattern from previous levels
76- path_to_ignore_patterns [root ].extend (get_file_content (filepath ).splitlines ())
77-
78- if apply_ignore_patterns and root in path_to_ignore_patterns :
79- filtered_paths = _apply_ignore_patterns (
80- path_to_ignore_patterns [root ],
81- {
82- filepath ,
83- },
84- )
85- if filtered_paths :
86- files .update (filtered_paths )
87- else :
88- files .add (os .path .join (root , filename ))
89-
90- if apply_ignore_patterns :
91- logger .debug ('Applying global ignore patterns %s' , {'global_ignore_patterns' : global_ignore_patterns })
92- return _apply_ignore_patterns (global_ignore_patterns , files )
28+ files .append (os .path .join (root , filename ))
9329
9430 return files
9531
9632
97- def _get_relevant_files_in_path (path : str , exclude_patterns : Iterable [str ]) -> List [str ]:
33+ def _get_relevant_files_in_path (path : str , exclude_patterns : Optional [ Iterable [str ]] = None ) -> List [str ]:
9834 absolute_path = get_absolute_path (path )
9935
10036 if not os .path .isfile (absolute_path ) and not os .path .isdir (absolute_path ):
@@ -103,24 +39,21 @@ def _get_relevant_files_in_path(path: str, exclude_patterns: Iterable[str]) -> L
10339 if os .path .isfile (absolute_path ):
10440 return [absolute_path ]
10541
106- all_file_paths = _get_all_existing_files_in_directory (absolute_path )
107-
108- path_spec = pathspec .PathSpec .from_lines (pathspec .patterns .GitWildMatchPattern , exclude_patterns )
109- excluded_file_paths = set (path_spec .match_files (all_file_paths ))
42+ file_paths = _get_all_existing_files_in_directory (absolute_path )
11043
111- relevant_file_paths = all_file_paths - excluded_file_paths
44+ if exclude_patterns :
45+ path_spec = pathspec .PathSpec .from_lines (pathspec .patterns .GitWildMatchPattern , exclude_patterns )
46+ file_paths = path_spec .match_files (file_paths , negate = True )
11247
113- return [file_path for file_path in relevant_file_paths if os .path .isfile (file_path )]
48+ return [file_path for file_path in file_paths if os .path .isfile (file_path )]
11449
11550
11651def _get_relevant_files (
11752 progress_bar : 'BaseProgressBar' , progress_bar_section : 'ProgressBarSection' , scan_type : str , paths : Tuple [str ]
11853) -> List [str ]:
11954 all_files_to_scan = []
12055 for path in paths :
121- all_files_to_scan .extend (
122- _get_relevant_files_in_path (path = path , exclude_patterns = ['**/.git/**' , '**/.cycode/**' ])
123- )
56+ all_files_to_scan .extend (_get_relevant_files_in_path (path ))
12457
12558 # we are double the progress bar section length because we are going to process the files twice
12659 # first time to get the file list with respect of excluded patterns (excluding takes seconds to execute)
0 commit comments