Skip to content

Commit aa534b3

Browse files
authored
CM-42771 - Add support of .gitignore files for a file excluding from scans (#272)
1 parent 2f2759b commit aa534b3

File tree

10 files changed

+887
-46
lines changed

10 files changed

+887
-46
lines changed

.github/workflows/tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,4 +50,4 @@ jobs:
5050
run: poetry install
5151

5252
- name: Run Tests
53-
run: poetry run pytest
53+
run: poetry run python -m pytest

.github/workflows/tests_full.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ jobs:
5050
uses: actions/cache@v3
5151
with:
5252
path: ~/.local
53-
key: poetry-${{ matrix.os }}-${{ matrix.python-version }}-1 # increment to reset cache
53+
key: poetry-${{ matrix.os }}-${{ matrix.python-version }}-2 # increment to reset cache
5454

5555
- name: Setup Poetry
5656
if: steps.cached-poetry.outputs.cache-hit != 'true'
@@ -71,4 +71,4 @@ jobs:
7171
./dist/cycode-cli version
7272
7373
- name: Run pytest
74-
run: poetry run pytest
74+
run: poetry run python -m pytest

cycode/cli/files_collector/path_documents.py

Lines changed: 9 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
import os
2-
from typing import TYPE_CHECKING, Iterable, List, Tuple
3-
4-
import pathspec
2+
from typing import TYPE_CHECKING, List, Tuple
53

64
from cycode.cli.files_collector.excluder import exclude_irrelevant_files
75
from cycode.cli.files_collector.iac.tf_content_generator import (
@@ -10,6 +8,7 @@
108
is_iac,
119
is_tfplan_file,
1210
)
11+
from cycode.cli.files_collector.walk_ignore import walk_ignore
1312
from cycode.cli.models import Document
1413
from cycode.cli.utils.path_utils import get_absolute_path, get_file_content
1514
from cycode.cyclient import logger
@@ -18,17 +17,18 @@
1817
from cycode.cli.utils.progress_bar import BaseProgressBar, ProgressBarSection
1918

2019

21-
def _get_all_existing_files_in_directory(path: str) -> List[str]:
20+
def _get_all_existing_files_in_directory(path: str, *, walk_with_ignore_patterns: bool = True) -> List[str]:
2221
files: List[str] = []
2322

24-
for root, _, filenames in os.walk(path):
23+
walk_func = walk_ignore if walk_with_ignore_patterns else os.walk
24+
for root, _, filenames in walk_func(path):
2525
for filename in filenames:
2626
files.append(os.path.join(root, filename))
2727

2828
return files
2929

3030

31-
def _get_relevant_files_in_path(path: str, exclude_patterns: Iterable[str]) -> List[str]:
31+
def _get_relevant_files_in_path(path: str) -> List[str]:
3232
absolute_path = get_absolute_path(path)
3333

3434
if not os.path.isfile(absolute_path) and not os.path.isdir(absolute_path):
@@ -37,24 +37,16 @@ def _get_relevant_files_in_path(path: str, exclude_patterns: Iterable[str]) -> L
3737
if os.path.isfile(absolute_path):
3838
return [absolute_path]
3939

40-
all_file_paths = set(_get_all_existing_files_in_directory(absolute_path))
41-
42-
path_spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, exclude_patterns)
43-
excluded_file_paths = set(path_spec.match_files(all_file_paths))
44-
45-
relevant_file_paths = all_file_paths - excluded_file_paths
46-
47-
return [file_path for file_path in relevant_file_paths if os.path.isfile(file_path)]
40+
file_paths = _get_all_existing_files_in_directory(absolute_path)
41+
return [file_path for file_path in file_paths if os.path.isfile(file_path)]
4842

4943

5044
def _get_relevant_files(
5145
progress_bar: 'BaseProgressBar', progress_bar_section: 'ProgressBarSection', scan_type: str, paths: Tuple[str]
5246
) -> List[str]:
5347
all_files_to_scan = []
5448
for path in paths:
55-
all_files_to_scan.extend(
56-
_get_relevant_files_in_path(path=path, exclude_patterns=['**/.git/**', '**/.cycode/**'])
57-
)
49+
all_files_to_scan.extend(_get_relevant_files_in_path(path))
5850

5951
# we are double the progress bar section length because we are going to process the files twice
6052
# first time to get the file list with respect of excluded patterns (excluding takes seconds to execute)
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import os
2+
from typing import Generator, Iterable, List, Tuple
3+
4+
from cycode.cli.utils.ignore_utils import IgnoreFilterManager
5+
from cycode.cyclient import logger
6+
7+
_SUPPORTED_IGNORE_PATTERN_FILES = { # oneday we will bring .cycodeignore or something like that
8+
'.gitignore',
9+
}
10+
_DEFAULT_GLOBAL_IGNORE_PATTERNS = [
11+
'.git',
12+
'.cycode',
13+
]
14+
15+
16+
def _walk_to_top(path: str) -> Iterable[str]:
17+
while os.path.dirname(path) != path:
18+
yield path
19+
path = os.path.dirname(path)
20+
21+
if path:
22+
yield path # Include the top-level directory
23+
24+
25+
def _collect_top_level_ignore_files(path: str) -> List[str]:
26+
ignore_files = []
27+
for dir_path in _walk_to_top(path):
28+
for ignore_file in _SUPPORTED_IGNORE_PATTERN_FILES:
29+
ignore_file_path = os.path.join(dir_path, ignore_file)
30+
if os.path.exists(ignore_file_path):
31+
logger.debug('Apply top level ignore file: %s', ignore_file_path)
32+
ignore_files.append(ignore_file_path)
33+
return ignore_files
34+
35+
36+
def walk_ignore(path: str) -> Generator[Tuple[str, List[str], List[str]], None, None]:
37+
ignore_filter_manager = IgnoreFilterManager.build(
38+
path=path,
39+
global_ignore_file_paths=_collect_top_level_ignore_files(path),
40+
global_patterns=_DEFAULT_GLOBAL_IGNORE_PATTERNS,
41+
)
42+
yield from ignore_filter_manager.walk()

0 commit comments

Comments
 (0)