Skip to content

Commit 62b4fae

Browse files
committed
fix file filtering for SAST commit range; fix pre-receive availability for SCA
1 parent 3897d1d commit 62b4fae

File tree

5 files changed

+194
-192
lines changed

5 files changed

+194
-192
lines changed

cycode/cli/apps/scan/commit_range_scanner.py

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -18,19 +18,19 @@
1818
)
1919
from cycode.cli.config import configuration_manager
2020
from cycode.cli.exceptions.handle_scan_errors import handle_scan_exception
21-
from cycode.cli.files_collector.commit_range_documents import collect_commit_range_diff_documents
22-
from cycode.cli.files_collector.file_excluder import excluder
23-
from cycode.cli.files_collector.models.in_memory_zip import InMemoryZip
24-
from cycode.cli.files_collector.repository_documents import (
21+
from cycode.cli.files_collector.commit_range_documents import (
22+
collect_commit_range_diff_documents,
2523
get_commit_range_modified_documents,
2624
get_diff_file_content,
2725
get_diff_file_path,
2826
get_pre_commit_modified_documents,
2927
parse_commit_range,
3028
)
29+
from cycode.cli.files_collector.file_excluder import excluder
30+
from cycode.cli.files_collector.models.in_memory_zip import InMemoryZip
3131
from cycode.cli.files_collector.sca.sca_file_collector import (
32-
perform_pre_commit_range_scan_actions,
33-
perform_pre_hook_range_scan_actions,
32+
perform_sca_pre_commit_range_scan_actions,
33+
perform_sca_pre_hook_range_scan_actions,
3434
)
3535
from cycode.cli.files_collector.zip_documents import zip_documents
3636
from cycode.cli.models import Document
@@ -180,7 +180,7 @@ def _scan_sca_commit_range(ctx: typer.Context, path: str, commit_range: str, **_
180180
from_commit_documents = excluder.exclude_irrelevant_documents_to_scan(consts.SCA_SCAN_TYPE, from_commit_documents)
181181
to_commit_documents = excluder.exclude_irrelevant_documents_to_scan(consts.SCA_SCAN_TYPE, to_commit_documents)
182182

183-
perform_pre_commit_range_scan_actions(
183+
perform_sca_pre_commit_range_scan_actions(
184184
path, from_commit_documents, from_commit_rev, to_commit_documents, to_commit_rev
185185
)
186186

@@ -207,6 +207,8 @@ def _scan_sast_commit_range(ctx: typer.Context, path: str, commit_range: str, **
207207
_, commit_documents, diff_documents = get_commit_range_modified_documents(
208208
ctx.obj['progress_bar'], ScanProgressBarSection.PREPARE_LOCAL_FILES, path, from_commit_rev, to_commit_rev
209209
)
210+
commit_documents = excluder.exclude_irrelevant_documents_to_scan(consts.SAST_SCAN_TYPE, commit_documents)
211+
diff_documents = excluder.exclude_irrelevant_documents_to_scan(consts.SAST_SCAN_TYPE, diff_documents)
210212

211213
_scan_commit_range_documents(ctx, commit_documents, diff_documents, scan_parameters=scan_parameters)
212214

@@ -243,7 +245,7 @@ def _scan_sca_pre_commit(ctx: typer.Context, repo_path: str) -> None:
243245
consts.SCA_SCAN_TYPE, pre_committed_documents
244246
)
245247

246-
perform_pre_hook_range_scan_actions(repo_path, git_head_documents, pre_committed_documents)
248+
perform_sca_pre_hook_range_scan_actions(repo_path, git_head_documents, pre_committed_documents)
247249

248250
_scan_commit_range_documents(
249251
ctx,
@@ -256,16 +258,18 @@ def _scan_sca_pre_commit(ctx: typer.Context, repo_path: str) -> None:
256258

257259
def _scan_secret_pre_commit(ctx: typer.Context, repo_path: str) -> None:
258260
progress_bar = ctx.obj['progress_bar']
259-
diff_files = git_proxy.get_repo(repo_path).index.diff(consts.GIT_HEAD_COMMIT_REV, create_patch=True, R=True)
261+
diff_index = git_proxy.get_repo(repo_path).index.diff(consts.GIT_HEAD_COMMIT_REV, create_patch=True, R=True)
260262

261-
progress_bar.set_section_length(ScanProgressBarSection.PREPARE_LOCAL_FILES, len(diff_files))
263+
progress_bar.set_section_length(ScanProgressBarSection.PREPARE_LOCAL_FILES, len(diff_index))
262264

263265
documents_to_scan = []
264-
for file in diff_files:
266+
for diff in diff_index:
265267
progress_bar.update(ScanProgressBarSection.PREPARE_LOCAL_FILES)
266-
documents_to_scan.append(Document(get_path_by_os(get_diff_file_path(file)), get_diff_file_content(file)))
267-
268+
documents_to_scan.append(
269+
Document(get_path_by_os(get_diff_file_path(diff)), get_diff_file_content(diff), is_git_diff_format=True)
270+
)
268271
documents_to_scan = excluder.exclude_irrelevant_documents_to_scan(consts.SECRET_SCAN_TYPE, documents_to_scan)
272+
269273
scan_documents(ctx, documents_to_scan, get_scan_parameters(ctx), is_git_diff=True)
270274

271275

cycode/cli/apps/scan/pre_receive/pre_receive_command.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
import os
33
from typing import Annotated, Optional
44

5-
import click
65
import typer
76

87
from cycode.cli import consts
@@ -14,7 +13,7 @@
1413
from cycode.cli.config import configuration_manager
1514
from cycode.cli.console import console
1615
from cycode.cli.exceptions.handle_scan_errors import handle_scan_exception
17-
from cycode.cli.files_collector.repository_documents import (
16+
from cycode.cli.files_collector.commit_range_documents import (
1817
calculate_pre_receive_commit_range,
1918
parse_pre_receive_input,
2019
)
@@ -32,10 +31,6 @@ def pre_receive_command(
3231
try:
3332
add_breadcrumb('pre_receive')
3433

35-
scan_type = ctx.obj['scan_type']
36-
if scan_type != consts.SECRET_SCAN_TYPE:
37-
raise click.ClickException(f'Commit range scanning for {scan_type.upper()} is not supported')
38-
3934
if should_skip_pre_receive_scan():
4035
logger.info(
4136
'A scan has been skipped as per your request. '

cycode/cli/files_collector/commit_range_documents.py

Lines changed: 171 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,24 @@
1-
from typing import Optional
1+
import os
2+
import sys
3+
from typing import TYPE_CHECKING, Optional
24

35
import typer
46

5-
from cycode.cli.files_collector.repository_documents import get_diff_file_content, get_diff_file_path
7+
from cycode.cli import consts
8+
from cycode.cli.files_collector.repository_documents import (
9+
get_file_content_from_commit_path,
10+
)
611
from cycode.cli.models import Document
712
from cycode.cli.utils.git_proxy import git_proxy
8-
from cycode.cli.utils.path_utils import get_path_by_os
13+
from cycode.cli.utils.path_utils import get_file_content, get_path_by_os
914
from cycode.cli.utils.progress_bar import ScanProgressBarSection
1015
from cycode.logger import get_logger
1116

17+
if TYPE_CHECKING:
18+
from git import Diff, Repo
19+
20+
from cycode.cli.utils.progress_bar import BaseProgressBar, ProgressBarSection
21+
1222
logger = get_logger('Commit Range Collector')
1323

1424

@@ -67,3 +77,161 @@ def collect_commit_range_diff_documents(
6777
logger.debug('List of commit ids to scan, %s', {'commit_ids': commit_ids_to_scan})
6878

6979
return commit_documents_to_scan
80+
81+
82+
def calculate_pre_receive_commit_range(branch_update_details: str) -> Optional[str]:
83+
end_commit = _get_end_commit_from_branch_update_details(branch_update_details)
84+
85+
# branch is deleted, no need to perform scan
86+
if end_commit == consts.EMPTY_COMMIT_SHA:
87+
return None
88+
89+
start_commit = _get_oldest_unupdated_commit_for_branch(end_commit)
90+
91+
# no new commit to update found
92+
if not start_commit:
93+
return None
94+
95+
return f'{start_commit}~1...{end_commit}'
96+
97+
98+
def _get_end_commit_from_branch_update_details(update_details: str) -> str:
99+
# update details pattern: <start_commit> <end_commit> <ref>
100+
_, end_commit, _ = update_details.split()
101+
return end_commit
102+
103+
104+
def _get_oldest_unupdated_commit_for_branch(commit: str) -> Optional[str]:
105+
# get a list of commits by chronological order that are not in the remote repository yet
106+
# more info about rev-list command: https://git-scm.com/docs/git-rev-list
107+
repo = git_proxy.get_repo(os.getcwd())
108+
not_updated_commits = repo.git.rev_list(commit, '--topo-order', '--reverse', '--not', '--all')
109+
110+
commits = not_updated_commits.splitlines()
111+
if not commits:
112+
return None
113+
114+
return commits[0]
115+
116+
117+
def _get_file_content_from_commit_diff(repo: 'Repo', commit: str, diff: 'Diff') -> Optional[str]:
118+
file_path = get_diff_file_path(diff, relative=True)
119+
return get_file_content_from_commit_path(repo, commit, file_path)
120+
121+
122+
def get_commit_range_modified_documents(
123+
progress_bar: 'BaseProgressBar',
124+
progress_bar_section: 'ProgressBarSection',
125+
path: str,
126+
from_commit_rev: str,
127+
to_commit_rev: str,
128+
) -> tuple[list[Document], list[Document], list[Document]]:
129+
from_commit_documents = []
130+
to_commit_documents = []
131+
diff_documents = []
132+
133+
repo = git_proxy.get_repo(path)
134+
diff_index = repo.commit(from_commit_rev).diff(to_commit_rev, create_patch=True, R=True)
135+
136+
modified_files_diff = [
137+
diff for diff in diff_index if diff.change_type != consts.COMMIT_DIFF_DELETED_FILE_CHANGE_TYPE
138+
]
139+
progress_bar.set_section_length(progress_bar_section, len(modified_files_diff))
140+
for diff in modified_files_diff:
141+
progress_bar.update(progress_bar_section)
142+
143+
file_path = get_path_by_os(get_diff_file_path(diff))
144+
145+
diff_documents.append(
146+
Document(
147+
path=file_path,
148+
content=get_diff_file_content(diff),
149+
is_git_diff_format=True,
150+
)
151+
)
152+
153+
file_content = _get_file_content_from_commit_diff(repo, from_commit_rev, diff)
154+
if file_content is not None:
155+
from_commit_documents.append(Document(file_path, file_content))
156+
157+
file_content = _get_file_content_from_commit_diff(repo, to_commit_rev, diff)
158+
if file_content is not None:
159+
to_commit_documents.append(Document(file_path, file_content))
160+
161+
return from_commit_documents, to_commit_documents, diff_documents
162+
163+
164+
def parse_pre_receive_input() -> str:
165+
"""Parse input to pushed branch update details.
166+
167+
Example input:
168+
old_value new_value refname
169+
-----------------------------------------------
170+
0000000000000000000000000000000000000000 9cf90954ef26e7c58284f8ebf7dcd0fcf711152a refs/heads/main
171+
973a96d3e925b65941f7c47fa16129f1577d499f 0000000000000000000000000000000000000000 refs/heads/feature-branch
172+
59564ef68745bca38c42fc57a7822efd519a6bd9 3378e52dcfa47fb11ce3a4a520bea5f85d5d0bf3 refs/heads/develop
173+
174+
:return: First branch update details (input's first line)
175+
"""
176+
# FIXME(MarshalX): this blocks main thread forever if called outside of pre-receive hook
177+
pre_receive_input = sys.stdin.read().strip()
178+
if not pre_receive_input:
179+
raise ValueError(
180+
'Pre receive input was not found. Make sure that you are using this command only in pre-receive hook'
181+
)
182+
183+
# each line represents a branch update request, handle the first one only
184+
# TODO(MichalBor): support case of multiple update branch requests
185+
return pre_receive_input.splitlines()[0]
186+
187+
188+
def get_diff_file_path(diff: 'Diff', relative: bool = False) -> Optional[str]:
189+
if relative:
190+
# relative to the repository root
191+
return diff.b_path if diff.b_path else diff.a_path
192+
193+
if diff.b_blob:
194+
return diff.b_blob.abspath
195+
return diff.a_blob.abspath
196+
197+
198+
def get_diff_file_content(diff: 'Diff') -> str:
199+
return diff.diff.decode('UTF-8', errors='replace')
200+
201+
202+
def get_pre_commit_modified_documents(
203+
progress_bar: 'BaseProgressBar',
204+
progress_bar_section: 'ProgressBarSection',
205+
repo_path: str,
206+
) -> tuple[list[Document], list[Document]]:
207+
git_head_documents = []
208+
pre_committed_documents = []
209+
210+
repo = git_proxy.get_repo(repo_path)
211+
diff_index = repo.index.diff(consts.GIT_HEAD_COMMIT_REV, create_patch=True, R=True)
212+
progress_bar.set_section_length(progress_bar_section, len(diff_index))
213+
for diff in diff_index:
214+
progress_bar.update(progress_bar_section)
215+
216+
file_path = get_path_by_os(get_diff_file_path(diff))
217+
file_content = _get_file_content_from_commit_diff(repo, consts.GIT_HEAD_COMMIT_REV, diff)
218+
if file_content is not None:
219+
git_head_documents.append(Document(file_path, file_content))
220+
221+
if os.path.exists(file_path):
222+
file_content = get_file_content(file_path)
223+
pre_committed_documents.append(Document(file_path, file_content))
224+
225+
return git_head_documents, pre_committed_documents
226+
227+
228+
def parse_commit_range(commit_range: str, path: str) -> tuple[str, str]:
229+
from_commit_rev = None
230+
to_commit_rev = None
231+
232+
for commit in git_proxy.get_repo(path).iter_commits(rev=commit_range):
233+
if not to_commit_rev:
234+
to_commit_rev = commit.hexsha
235+
from_commit_rev = commit.hexsha
236+
237+
return from_commit_rev, to_commit_rev

0 commit comments

Comments
 (0)