|
12 | 12 | # language governing permissions and limitations under the License.
|
13 | 13 | from __future__ import absolute_import
|
14 | 14 |
|
15 |
| -import pytest |
16 | 15 | import os
|
17 |
| -from pathlib import Path |
18 | 16 | import subprocess
|
19 |
| -from mock import patch, ANY |
| 17 | +from pathlib import Path |
| 18 | + |
| 19 | +import pytest |
| 20 | +from mock import ANY, patch |
20 | 21 |
|
21 | 22 | from sagemaker import git_utils
|
22 | 23 |
|
@@ -494,3 +495,212 @@ def test_git_clone_repo_codecommit_https_creds_not_stored_locally(tempdir, mkdte
|
494 | 495 | with pytest.raises(subprocess.CalledProcessError) as error:
|
495 | 496 | git_utils.git_clone_repo(git_config, entry_point)
|
496 | 497 | assert "returned non-zero exit status" in str(error.value)
|
| 498 | + |
| 499 | + |
| 500 | +class TestGitUrlSanitization: |
| 501 | + """Test cases for Git URL sanitization to prevent injection attacks.""" |
| 502 | + |
| 503 | + def test_sanitize_git_url_valid_https_urls(self): |
| 504 | + """Test that valid HTTPS URLs pass sanitization.""" |
| 505 | + valid_urls = [ |
| 506 | + "https://github.com/user/repo.git", |
| 507 | + "https://gitlab.com/user/repo.git", |
| 508 | + "https://[email protected]/user/repo.git", |
| 509 | + "https://user:[email protected]/user/repo.git", |
| 510 | + "http://internal-git.company.com/repo.git", |
| 511 | + ] |
| 512 | + |
| 513 | + for url in valid_urls: |
| 514 | + # Should not raise any exception |
| 515 | + result = git_utils._sanitize_git_url(url) |
| 516 | + assert result == url |
| 517 | + |
| 518 | + def test_sanitize_git_url_valid_ssh_urls(self): |
| 519 | + """Test that valid SSH URLs pass sanitization.""" |
| 520 | + valid_urls = [ |
| 521 | + "[email protected]:user/repo.git", |
| 522 | + "[email protected]:user/repo.git", |
| 523 | + "ssh://[email protected]/user/repo.git", |
| 524 | + "ssh://git-codecommit.us-west-2.amazonaws.com/v1/repos/test-repo/", # 0 @ symbols - valid for ssh:// |
| 525 | + |
| 526 | + ] |
| 527 | + |
| 528 | + for url in valid_urls: |
| 529 | + # Should not raise any exception |
| 530 | + result = git_utils._sanitize_git_url(url) |
| 531 | + assert result == url |
| 532 | + |
| 533 | + def test_sanitize_git_url_blocks_multiple_at_https(self): |
| 534 | + """Test that HTTPS URLs with multiple @ symbols are blocked.""" |
| 535 | + malicious_urls = [ |
| 536 | + "https://[email protected]@github.com/repo.git", |
| 537 | + "https://[email protected]@gitlab.com/user/repo.git", |
| 538 | + "https://a@b@[email protected]/repo.git", |
| 539 | + "https://user@[email protected]/legit/repo.git", |
| 540 | + ] |
| 541 | + |
| 542 | + for url in malicious_urls: |
| 543 | + with pytest.raises(ValueError) as error: |
| 544 | + git_utils._sanitize_git_url(url) |
| 545 | + assert "multiple @ symbols detected" in str(error.value) |
| 546 | + |
| 547 | + def test_sanitize_git_url_blocks_multiple_at_ssh(self): |
| 548 | + """Test that SSH URLs with multiple @ symbols are blocked.""" |
| 549 | + malicious_urls = [ |
| 550 | + "[email protected]@github.com:repo.git", |
| 551 | + "git@[email protected]:user/repo.git", |
| 552 | + "ssh://git@[email protected]/repo.git", |
| 553 | + "git@a@b@c:repo.git", |
| 554 | + ] |
| 555 | + |
| 556 | + for url in malicious_urls: |
| 557 | + with pytest.raises(ValueError) as error: |
| 558 | + git_utils._sanitize_git_url(url) |
| 559 | + # git@ URLs should give "exactly one @ symbol" error |
| 560 | + # ssh:// URLs should give "multiple @ symbols detected" error |
| 561 | + assert any( |
| 562 | + phrase in str(error.value) |
| 563 | + for phrase in ["multiple @ symbols detected", "exactly one @ symbol"] |
| 564 | + ) |
| 565 | + |
| 566 | + def test_sanitize_git_url_blocks_invalid_schemes_and_git_at_format(self): |
| 567 | + """Test that invalid schemes and git@ format violations are blocked.""" |
| 568 | + # Test unsupported schemes |
| 569 | + unsupported_scheme_urls = [ |
| 570 | + "git-github.com:user/repo.git", # Doesn't start with git@, ssh://, http://, https:// |
| 571 | + ] |
| 572 | + |
| 573 | + for url in unsupported_scheme_urls: |
| 574 | + with pytest.raises(ValueError) as error: |
| 575 | + git_utils._sanitize_git_url(url) |
| 576 | + assert "Unsupported URL scheme" in str(error.value) |
| 577 | + |
| 578 | + # Test git@ URLs with wrong @ count |
| 579 | + invalid_git_at_urls = [ |
| 580 | + "[email protected]@evil.com:repo.git", # 2 @ symbols |
| 581 | + ] |
| 582 | + |
| 583 | + for url in invalid_git_at_urls: |
| 584 | + with pytest.raises(ValueError) as error: |
| 585 | + git_utils._sanitize_git_url(url) |
| 586 | + assert "exactly one @ symbol" in str(error.value) |
| 587 | + |
| 588 | + def test_sanitize_git_url_blocks_url_encoding_obfuscation(self): |
| 589 | + """Test that URL-encoded obfuscation attempts are blocked.""" |
| 590 | + obfuscated_urls = [ |
| 591 | + "https://github.com%25evil.com/repo.git", |
| 592 | + "https://[email protected]%40attacker.com/repo.git", |
| 593 | + "https://github.com%2Fevil.com/repo.git", |
| 594 | + "https://github.com%3Aevil.com/repo.git", |
| 595 | + ] |
| 596 | + |
| 597 | + for url in obfuscated_urls: |
| 598 | + with pytest.raises(ValueError) as error: |
| 599 | + git_utils._sanitize_git_url(url) |
| 600 | + # The error could be either suspicious encoding or invalid characters |
| 601 | + assert any( |
| 602 | + phrase in str(error.value) |
| 603 | + for phrase in ["Suspicious URL encoding detected", "Invalid characters in hostname"] |
| 604 | + ) |
| 605 | + |
| 606 | + def test_sanitize_git_url_blocks_invalid_hostname_chars(self): |
| 607 | + """Test that hostnames with invalid characters are blocked.""" |
| 608 | + invalid_urls = [ |
| 609 | + "https://github<script>.com/repo.git", |
| 610 | + "https://github>.com/repo.git", |
| 611 | + "https://github[].com/repo.git", |
| 612 | + "https://github{}.com/repo.git", |
| 613 | + ] |
| 614 | + |
| 615 | + for url in invalid_urls: |
| 616 | + with pytest.raises(ValueError) as error: |
| 617 | + git_utils._sanitize_git_url(url) |
| 618 | + # The error could be various types due to URL parsing edge cases |
| 619 | + assert any( |
| 620 | + phrase in str(error.value) |
| 621 | + for phrase in [ |
| 622 | + "Invalid characters in hostname", |
| 623 | + "Failed to parse URL", |
| 624 | + "does not appear to be an IPv4 or IPv6 address", |
| 625 | + ] |
| 626 | + ) |
| 627 | + |
| 628 | + def test_sanitize_git_url_blocks_unsupported_schemes(self): |
| 629 | + """Test that unsupported URL schemes are blocked.""" |
| 630 | + unsupported_urls = [ |
| 631 | + "ftp://github.com/repo.git", |
| 632 | + "file:///local/repo.git", |
| 633 | + "javascript:alert('xss')", |
| 634 | + "data:text/html,<script>alert('xss')</script>", |
| 635 | + ] |
| 636 | + |
| 637 | + for url in unsupported_urls: |
| 638 | + with pytest.raises(ValueError) as error: |
| 639 | + git_utils._sanitize_git_url(url) |
| 640 | + assert "Unsupported URL scheme" in str(error.value) |
| 641 | + |
| 642 | + def test_git_clone_repo_blocks_malicious_https_url(self): |
| 643 | + """Test that git_clone_repo blocks malicious HTTPS URLs.""" |
| 644 | + malicious_git_config = { |
| 645 | + "repo": "https://[email protected]@github.com/legit/repo.git", |
| 646 | + "branch": "main", |
| 647 | + } |
| 648 | + entry_point = "train.py" |
| 649 | + |
| 650 | + with pytest.raises(ValueError) as error: |
| 651 | + git_utils.git_clone_repo(malicious_git_config, entry_point) |
| 652 | + assert "multiple @ symbols detected" in str(error.value) |
| 653 | + |
| 654 | + def test_git_clone_repo_blocks_malicious_ssh_url(self): |
| 655 | + """Test that git_clone_repo blocks malicious SSH URLs.""" |
| 656 | + malicious_git_config = { |
| 657 | + "repo": "git@[email protected]:sage-maker/temp-sev2.git", |
| 658 | + "branch": "main", |
| 659 | + } |
| 660 | + entry_point = "train.py" |
| 661 | + |
| 662 | + with pytest.raises(ValueError) as error: |
| 663 | + git_utils.git_clone_repo(malicious_git_config, entry_point) |
| 664 | + assert "exactly one @ symbol" in str(error.value) |
| 665 | + |
| 666 | + def test_git_clone_repo_blocks_url_encoded_attack(self): |
| 667 | + """Test that git_clone_repo blocks URL-encoded attacks.""" |
| 668 | + malicious_git_config = { |
| 669 | + "repo": "https://github.com%40attacker.com/repo.git", |
| 670 | + "branch": "main", |
| 671 | + } |
| 672 | + entry_point = "train.py" |
| 673 | + |
| 674 | + with pytest.raises(ValueError) as error: |
| 675 | + git_utils.git_clone_repo(malicious_git_config, entry_point) |
| 676 | + assert "Suspicious URL encoding detected" in str(error.value) |
| 677 | + |
| 678 | + def test_sanitize_git_url_comprehensive_attack_scenarios(self): |
| 679 | + attack_scenarios = [ |
| 680 | + # Original PoC attack |
| 681 | + "https://USER@YOUR_NGROK_OR_LOCALHOST/[email protected]%25legit%25repo.git", |
| 682 | + # Variations of the attack |
| 683 | + "https://user@[email protected]/legit/repo.git", |
| 684 | + "[email protected]@github.com:user/repo.git", |
| 685 | + "ssh://[email protected]@github.com/repo.git", |
| 686 | + # URL encoding variations |
| 687 | + "https://github.com%40evil.com/repo.git", |
| 688 | + "https://[email protected]%2Fevil.com/repo.git", |
| 689 | + ] |
| 690 | + |
| 691 | + entry_point = "train.py" |
| 692 | + |
| 693 | + for malicious_url in attack_scenarios: |
| 694 | + git_config = {"repo": malicious_url} |
| 695 | + with pytest.raises(ValueError) as error: |
| 696 | + git_utils.git_clone_repo(git_config, entry_point) |
| 697 | + # Should be blocked by sanitization |
| 698 | + assert any( |
| 699 | + phrase in str(error.value) |
| 700 | + for phrase in [ |
| 701 | + "multiple @ symbols detected", |
| 702 | + "exactly one @ symbol", |
| 703 | + "Suspicious URL encoding detected", |
| 704 | + "Invalid characters in hostname", |
| 705 | + ] |
| 706 | + ) |
0 commit comments