From 91a73980e48adc0735b5a10330ec6fda4839e455 Mon Sep 17 00:00:00 2001
From: gotbadger
Date: Fri, 21 Nov 2025 09:38:25 +0000
Subject: [PATCH] CM-55207: improve testing and cover some edge cases in commit
pre-receive
---
.../scan/pre_receive/pre_receive_command.py | 4 +-
.../files_collector/commit_range_documents.py | 34 +++-
.../test_commit_range_documents.py | 176 ++++++++++++++++++
3 files changed, 206 insertions(+), 8 deletions(-)
diff --git a/cycode/cli/apps/scan/pre_receive/pre_receive_command.py b/cycode/cli/apps/scan/pre_receive/pre_receive_command.py
index 3b85dc9e..f6265fd2 100644
--- a/cycode/cli/apps/scan/pre_receive/pre_receive_command.py
+++ b/cycode/cli/apps/scan/pre_receive/pre_receive_command.py
@@ -47,7 +47,9 @@ def pre_receive_command(
timeout = configuration_manager.get_pre_receive_command_timeout(command_scan_type)
with TimeoutAfter(timeout):
branch_update_details = parse_pre_receive_input()
- commit_range = calculate_pre_receive_commit_range(branch_update_details)
+ commit_range = calculate_pre_receive_commit_range(
+ repo_path=os.getcwd(), branch_update_details=branch_update_details
+ )
if not commit_range:
logger.info(
'No new commits found for pushed branch, %s',
diff --git a/cycode/cli/files_collector/commit_range_documents.py b/cycode/cli/files_collector/commit_range_documents.py
index 8f8eccac..2ca54dd5 100644
--- a/cycode/cli/files_collector/commit_range_documents.py
+++ b/cycode/cli/files_collector/commit_range_documents.py
@@ -104,19 +104,27 @@ def collect_commit_range_diff_documents(
return commit_documents_to_scan
-def calculate_pre_receive_commit_range(branch_update_details: str) -> Optional[str]:
+def calculate_pre_receive_commit_range(repo_path: str, branch_update_details: str) -> Optional[str]:
end_commit = _get_end_commit_from_branch_update_details(branch_update_details)
# branch is deleted, no need to perform scan
if end_commit == consts.EMPTY_COMMIT_SHA:
return None
- start_commit = _get_oldest_unupdated_commit_for_branch(end_commit)
+ repo = git_proxy.get_repo(repo_path)
+ start_commit = _get_oldest_unupdated_commit_for_branch(repo, end_commit)
# no new commit to update found
if not start_commit:
return None
+ # If the oldest not-yet-updated commit has no parent (root commit or orphaned history),
+ # using '~1' will fail. In that case, scan from the end commit, which effectively
+ # includes the entire history reachable from it (which is exactly what we need here).
+
+ if not bool(repo.commit(start_commit).parents):
+ return f'{end_commit}'
+
return f'{start_commit}~1...{end_commit}'
@@ -126,10 +134,10 @@ def _get_end_commit_from_branch_update_details(update_details: str) -> str:
return end_commit
-def _get_oldest_unupdated_commit_for_branch(commit: str) -> Optional[str]:
+def _get_oldest_unupdated_commit_for_branch(repo: 'Repo', commit: str) -> Optional[str]:
# get a list of commits by chronological order that are not in the remote repository yet
# more info about rev-list command: https://git-scm.com/docs/git-rev-list
- repo = git_proxy.get_repo(os.getcwd())
+
not_updated_commits = repo.git.rev_list(commit, '--topo-order', '--reverse', '--not', '--all')
commits = not_updated_commits.splitlines()
@@ -199,8 +207,7 @@ def parse_pre_receive_input() -> str:
:return: First branch update details (input's first line)
"""
- # FIXME(MarshalX): this blocks main thread forever if called outside of pre-receive hook
- pre_receive_input = sys.stdin.read().strip()
+ pre_receive_input = _read_hook_input_from_stdin()
if not pre_receive_input:
raise ValueError(
'Pre receive input was not found. Make sure that you are using this command only in pre-receive hook'
@@ -222,7 +229,7 @@ def parse_pre_push_input() -> str:
:return: First, push update details (input's first line)
""" # noqa: E501
- pre_push_input = sys.stdin.read().strip()
+ pre_push_input = _read_hook_input_from_stdin()
if not pre_push_input:
raise ValueError(
'Pre push input was not found. Make sure that you are using this command only in pre-push hook'
@@ -232,6 +239,19 @@ def parse_pre_push_input() -> str:
return pre_push_input.splitlines()[0]
+def _read_hook_input_from_stdin() -> str:
+ """Read input from stdin when called from a hook.
+
+ If called manually from the command line, return an empty string so it doesn't block the main thread.
+
+ Returns:
+ Input from stdin
+ """
+ if sys.stdin.isatty():
+ return ''
+ return sys.stdin.read().strip()
+
+
def _get_default_branches_for_merge_base(repo: 'Repo') -> list[str]:
"""Get a list of default branches to try for merge base calculation.
diff --git a/tests/cli/files_collector/test_commit_range_documents.py b/tests/cli/files_collector/test_commit_range_documents.py
index cd30d1eb..d27d24a3 100644
--- a/tests/cli/files_collector/test_commit_range_documents.py
+++ b/tests/cli/files_collector/test_commit_range_documents.py
@@ -12,13 +12,22 @@
from cycode.cli.files_collector.commit_range_documents import (
_get_default_branches_for_merge_base,
calculate_pre_push_commit_range,
+ calculate_pre_receive_commit_range,
get_diff_file_path,
get_safe_head_reference_for_diff,
parse_commit_range,
parse_pre_push_input,
+ parse_pre_receive_input,
)
from cycode.cli.utils.path_utils import get_path_by_os
+DUMMY_SHA_0 = '0' * 40
+DUMMY_SHA_1 = '1' * 40
+DUMMY_SHA_2 = '2' * 40
+DUMMY_SHA_A = 'a' * 40
+DUMMY_SHA_B = 'b' * 40
+DUMMY_SHA_C = 'c' * 40
+
@contextmanager
def git_repository(path: str) -> Generator[Repo, None, None]:
@@ -871,3 +880,170 @@ def test_single_commit_spec(self) -> None:
parsed_from, parsed_to = parse_commit_range(a, temp_dir)
assert (parsed_from, parsed_to) == (a, c)
+
+
+class TestParsePreReceiveInput:
+ """Test the parse_pre_receive_input function with various pre-receive hook input scenarios."""
+
+ def test_parse_single_update_input(self) -> None:
+ """Test parsing a single branch update input."""
+ pre_receive_input = f'{DUMMY_SHA_1} {DUMMY_SHA_2} refs/heads/main'
+
+ with patch('sys.stdin', StringIO(pre_receive_input)):
+ result = parse_pre_receive_input()
+ assert result == pre_receive_input
+
+ def test_parse_multiple_update_input_returns_first_line(self) -> None:
+ """Test parsing multiple branch updates returns only the first line."""
+ pre_receive_input = f"""{DUMMY_SHA_0} {DUMMY_SHA_A} refs/heads/main
+{DUMMY_SHA_B} {DUMMY_SHA_C} refs/heads/feature"""
+
+ with patch('sys.stdin', StringIO(pre_receive_input)):
+ result = parse_pre_receive_input()
+ assert result == f'{DUMMY_SHA_0} {DUMMY_SHA_A} refs/heads/main'
+
+ def test_parse_empty_input_raises_error(self) -> None:
+ """Test that empty input raises ValueError."""
+ match = 'Pre receive input was not found'
+ with patch('sys.stdin', StringIO('')), pytest.raises(ValueError, match=match):
+ parse_pre_receive_input()
+
+
+class TestCalculatePreReceiveCommitRange:
+ """Test the calculate_pre_receive_commit_range function with representative scenarios."""
+
+ def test_branch_deletion_returns_none(self) -> None:
+ """When end commit is all zeros (deletion), no scan is needed."""
+ update_details = f'{DUMMY_SHA_A} {consts.EMPTY_COMMIT_SHA} refs/heads/feature'
+ assert calculate_pre_receive_commit_range(os.getcwd(), update_details) is None
+
+ def test_no_new_commits_returns_none(self) -> None:
+ """When there are no commits not in remote, return None."""
+ with tempfile.TemporaryDirectory() as server_dir:
+ server_repo = Repo.init(server_dir, bare=True)
+ try:
+ with tempfile.TemporaryDirectory() as work_dir:
+ work_repo = Repo.init(work_dir, b='main')
+ try:
+ # Create a single commit and push it to the server as main (end commit is already on a ref)
+ test_file = os.path.join(work_dir, 'file.txt')
+ with open(test_file, 'w') as f:
+ f.write('base')
+ work_repo.index.add(['file.txt'])
+ end_commit = work_repo.index.commit('initial')
+
+ work_repo.create_remote('origin', server_dir)
+ work_repo.remotes.origin.push('main:main')
+
+ update_details = f'{DUMMY_SHA_A} {end_commit.hexsha} refs/heads/main'
+ assert calculate_pre_receive_commit_range(server_dir, update_details) is None
+ finally:
+ work_repo.close()
+ finally:
+ server_repo.close()
+
+ def test_returns_triple_dot_range_from_oldest_unupdated(self) -> None:
+ """Returns '~1...' when there are new commits to scan."""
+ with tempfile.TemporaryDirectory() as server_dir:
+ server_repo = Repo.init(server_dir, bare=True)
+ try:
+ with tempfile.TemporaryDirectory() as work_dir:
+ work_repo = Repo.init(work_dir, b='main')
+ try:
+ # Create commit A and push it to server as main (server has A on a ref)
+ a_path = os.path.join(work_dir, 'a.txt')
+ with open(a_path, 'w') as f:
+ f.write('A')
+ work_repo.index.add(['a.txt'])
+ work_repo.index.commit('A')
+
+ work_repo.create_remote('origin', server_dir)
+ work_repo.remotes.origin.push('main:main')
+
+ # Create commits B and C locally (not yet on server ref)
+ b_path = os.path.join(work_dir, 'b.txt')
+ with open(b_path, 'w') as f:
+ f.write('B')
+ work_repo.index.add(['b.txt'])
+ b_commit = work_repo.index.commit('B')
+
+ c_path = os.path.join(work_dir, 'c.txt')
+ with open(c_path, 'w') as f:
+ f.write('C')
+ work_repo.index.add(['c.txt'])
+ end_commit = work_repo.index.commit('C')
+
+ # Push the objects to a temporary ref and then delete that ref on server,
+ # so the objects exist but are not reachable from any ref.
+ work_repo.remotes.origin.push(f'{end_commit.hexsha}:refs/tmp/hold')
+ Repo(server_dir).git.update_ref('-d', 'refs/tmp/hold')
+
+ update_details = f'{DUMMY_SHA_A} {end_commit.hexsha} refs/heads/main'
+ result = calculate_pre_receive_commit_range(server_dir, update_details)
+ assert result == f'{b_commit.hexsha}~1...{end_commit.hexsha}'
+ finally:
+ work_repo.close()
+ finally:
+ server_repo.close()
+
+ def test_initial_oldest_commit_without_parent_returns_single_commit_range(self) -> None:
+ """If oldest commit has no parent, avoid '~1' and scan from end commit only."""
+ with tempfile.TemporaryDirectory() as server_dir:
+ server_repo = Repo.init(server_dir, bare=True)
+ try:
+ with tempfile.TemporaryDirectory() as work_dir:
+ work_repo = Repo.init(work_dir, b='main')
+ try:
+ # Create a single root commit locally
+ p = os.path.join(work_dir, 'root.txt')
+ with open(p, 'w') as f:
+ f.write('root')
+ work_repo.index.add(['root.txt'])
+ end_commit = work_repo.index.commit('root')
+
+ work_repo.create_remote('origin', server_dir)
+ # Push objects to a temporary ref and delete it so server has objects but no refs
+ work_repo.remotes.origin.push(f'{end_commit.hexsha}:refs/tmp/hold')
+ Repo(server_dir).git.update_ref('-d', 'refs/tmp/hold')
+
+ update_details = f'{DUMMY_SHA_A} {end_commit.hexsha} refs/heads/main'
+ result = calculate_pre_receive_commit_range(server_dir, update_details)
+ assert result == end_commit.hexsha
+ finally:
+ work_repo.close()
+ finally:
+ server_repo.close()
+
+ def test_initial_oldest_commit_without_parent_with_two_commits_returns_single_commit_range(self) -> None:
+ """If there are two new commits and the oldest has no parent, avoid '~1' and scan from end commit only."""
+ with tempfile.TemporaryDirectory() as server_dir:
+ server_repo = Repo.init(server_dir, bare=True)
+ try:
+ with tempfile.TemporaryDirectory() as work_dir:
+ work_repo = Repo.init(work_dir, b='main')
+ try:
+ # Create two commits locally: oldest has no parent, second on top
+ a_path = os.path.join(work_dir, 'a.txt')
+ with open(a_path, 'w') as f:
+ f.write('A')
+ work_repo.index.add(['a.txt'])
+ work_repo.index.commit('A')
+
+ d_path = os.path.join(work_dir, 'd.txt')
+ with open(d_path, 'w') as f:
+ f.write('D')
+ work_repo.index.add(['d.txt'])
+ end_commit = work_repo.index.commit('D')
+
+ work_repo.create_remote('origin', server_dir)
+ # Push objects to a temporary ref and delete it so server has objects but no refs
+ work_repo.remotes.origin.push(f'{end_commit.hexsha}:refs/tmp/hold')
+ Repo(server_dir).git.update_ref('-d', 'refs/tmp/hold')
+
+ update_details = f'{consts.EMPTY_COMMIT_SHA} {end_commit.hexsha} refs/heads/main'
+ result = calculate_pre_receive_commit_range(server_dir, update_details)
+ assert result == end_commit.hexsha
+ finally:
+ work_repo.close()
+ finally:
+ server_repo.close()