Skip to content

Commit

Permalink
fix: limit the size of the git clone (#1111)
Browse files Browse the repository at this point in the history
* fix: limit the size of the git clone

fix: set a max repo size while cloning

Signed-off-by: Harikrishnan Balagopal <[email protected]>

* chore: update go version to be able to use atomic counters atomic.Int64

Signed-off-by: Harikrishnan Balagopal <[email protected]>

* fixup! chore: update go version to be able to use atomic counters atomic.Int64

Signed-off-by: Harikrishnan Balagopal <[email protected]>

---------

Signed-off-by: Harikrishnan Balagopal <[email protected]>
  • Loading branch information
HarikrishnanBalagopal authored Dec 2, 2023
1 parent af4d50b commit e88cece
Show file tree
Hide file tree
Showing 13 changed files with 281 additions and 149 deletions.
1 change: 1 addition & 0 deletions cmd/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ const (
qadisablecliFlag = "qa-disable-cli"
qaportFlag = "qa-port"
planProgressPortFlag = "plan-progress-port"
maxCloneSizeBytesFlag = "max-clone-size"
transformerSelectorFlag = "transformer-selector"
qaEnabledCategoriesFlag = "qa-enable"
qaDisabledCategoriesFlag = "qa-disable"
Expand Down
4 changes: 4 additions & 0 deletions cmd/plan.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import (
)

type planFlags struct {
maxVCSRepoCloneSize int64
progressServerPort int
planfile string
srcpath string
Expand Down Expand Up @@ -65,6 +66,8 @@ func planHandler(cmd *cobra.Command, flags planFlags) {
}()
defer lib.Destroy()

vcs.SetMaxRepoCloneSize(flags.maxVCSRepoCloneSize)

var err error
planfile := flags.planfile
srcpath := flags.srcpath
Expand Down Expand Up @@ -182,6 +185,7 @@ func GetPlanCommand() *cobra.Command {
planCmd.Flags().StringSliceVar(&flags.preSets, preSetFlag, []string{}, "Specify preset config to use.")
planCmd.Flags().StringArrayVar(&flags.setconfigs, setConfigFlag, []string{}, "Specify config key-value pairs.")
planCmd.Flags().IntVar(&flags.progressServerPort, planProgressPortFlag, 0, "Port for the plan progress server. If not provided, the server won't be started.")
planCmd.Flags().Int64Var(&flags.maxVCSRepoCloneSize, maxCloneSizeBytesFlag, -1, "Max size in bytes when cloning a git repo. Default -1 is infinite")
planCmd.Flags().BoolVar(&flags.disableLocalExecution, common.DisableLocalExecutionFlag, false, "Allow files to be executed locally.")
planCmd.Flags().BoolVar(&flags.failOnEmptyPlan, common.FailOnEmptyPlan, false, "If true, planning will exit with a failure exit code if no services are detected (and no default transformers are found).")

Expand Down
13 changes: 12 additions & 1 deletion cmd/transform.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ import (

type transformFlags struct {
qaflags
// maxVCSRepoCloneSize is the maximum size in bytes for cloning repos
maxVCSRepoCloneSize int64
// ignoreEnv tells us whether to use data collected from the local machine
ignoreEnv bool
// disableLocalExecution disables execution of executables locally
Expand Down Expand Up @@ -72,6 +74,7 @@ func transformHandler(cmd *cobra.Command, flags transformFlags) {
}
defer pprof.StopCPUProfile()
}
vcs.SetMaxRepoCloneSize(flags.maxVCSRepoCloneSize)

ctx, cancel := context.WithCancel(cmd.Context())
logrus.AddHook(common.NewCleanupHook(cancel))
Expand Down Expand Up @@ -250,7 +253,14 @@ func transformHandler(cmd *cobra.Command, flags transformFlags) {
}
startQA(flags.qaflags)
}
if err := lib.Transform(ctx, transformationPlan, preExistingPlan, flags.outpath, flags.transformerSelector, flags.maxIterations); err != nil {
if err := lib.Transform(
ctx,
transformationPlan,
preExistingPlan,
flags.outpath,
flags.transformerSelector,
flags.maxIterations,
); err != nil {
logrus.Fatalf("failed to transform. Error: %q", err)
}
logrus.Infof("Transformed target artifacts can be found at [%s].", flags.outpath)
Expand Down Expand Up @@ -290,6 +300,7 @@ func GetTransformCommand() *cobra.Command {
transformCmd.Flags().StringVarP(&flags.customizationsPath, customizationsFlag, "c", "", "Specify directory or a git url (see https://move2kube.konveyor.io/concepts/git-support) where customizations are stored. By default we look for "+common.DefaultCustomizationDir)
transformCmd.Flags().StringVarP(&flags.transformerSelector, transformerSelectorFlag, "t", "", "Specify the transformer selector.")
transformCmd.Flags().BoolVar(&flags.qaskip, qaSkipFlag, false, "Enable/disable the default answers to questions posed in QA Cli sub-system. If disabled, you will have to answer the questions posed by QA during interaction.")
transformCmd.Flags().Int64Var(&flags.maxVCSRepoCloneSize, maxCloneSizeBytesFlag, -1, "Max size in bytes when cloning a git repo. Default -1 is infinite")

// QA options
transformCmd.Flags().StringSliceVar(&flags.qaEnabledCategories, qaEnabledCategoriesFlag, []string{}, "Specify the QA categories to enable (cannot be used in conjunction with qa-disable)")
Expand Down
127 changes: 71 additions & 56 deletions common/vcs/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,16 @@ import (
"strings"
"time"

"github.com/go-git/go-billy/v5"
"github.com/go-git/go-billy/v5/osfs"
"github.com/go-git/go-git/v5"
"github.com/go-git/go-git/v5/config"
"github.com/go-git/go-git/v5/plumbing"
"github.com/go-git/go-git/v5/plumbing/cache"
"github.com/go-git/go-git/v5/plumbing/object"
"github.com/go-git/go-git/v5/plumbing/transport/http"
"github.com/go-git/go-git/v5/plumbing/transport/ssh"
"github.com/go-git/go-git/v5/storage/filesystem"
"github.com/konveyor/move2kube/common"
"github.com/konveyor/move2kube/qaengine"
"github.com/sirupsen/logrus"
Expand All @@ -47,6 +51,11 @@ type GitVCSRepo struct {
GitRepoPath string
}

var (
// for https or ssh git repo urls
gitVCSRegex = regexp.MustCompile(`^git\+(https|ssh)://[a-zA-Z0-9]+([\-\.]{1}[a-zA-Z0-9]+)*\.[a-zA-Z]{2,5}(:[0-9]{1,5})?(\/.*)?$`)
)

func isGitCommitHash(commithash string) bool {
gitCommitHashRegex := regexp.MustCompile(`^[a-fA-F0-9]{40}$`)
return gitCommitHashRegex.MatchString(commithash)
Expand Down Expand Up @@ -112,26 +121,23 @@ func getGitRepoStruct(vcsurl string) (*GitVCSRepo, error) {

}

// isGitVCS checks if the given vcs url is git
// isGitVCS checks if the given vcs url is a git repo url
func isGitVCS(vcsurl string) bool {
// for https or ssh
gitVCSRegex := `^git\+(https|ssh)://[a-zA-Z0-9]+([\-\.]{1}[a-zA-Z0-9]+)*\.[a-zA-Z]{2,5}(:[0-9]{1,5})?(\/.*)?$`
matched, err := regexp.MatchString(gitVCSRegex, vcsurl)
if err != nil {
logrus.Fatalf("failed to match the given vcsurl %v with the git vcs regex expression %v. Error : %v", vcsurl, gitVCSRegex, err)
}
return matched
return gitVCSRegex.MatchString(vcsurl)
}

func pushGitVCS(remotePath, folderName string) error {
func pushGitVCS(remotePath, folderName string, maxSize int64) error {
if !common.IgnoreEnvironment {
logrus.Warnf("push to remote git repositories using credentials from the environment is not yet supported.")
}
remotePathSplitByAt := strings.Split(remotePath, "@")
remotePathSplitByColon := strings.Split(remotePathSplitByAt[0], ":")
isSSH := strings.HasPrefix(remotePath, "git+ssh")
isHTTPS := strings.HasPrefix(remotePath, "git+https")
gitFSPath := GetClonedPath(remotePath, folderName, false)
gitFSPath, err := GetClonedPath(remotePath, folderName, false)
if err != nil {
return fmt.Errorf("failed to clone the repo. Error: %w", err)
}
if (isHTTPS && len(remotePathSplitByColon) > 2) || (isSSH && len(remotePathSplitByColon) > 2) {
gitFSPath = strings.TrimSuffix(gitFSPath, remotePathSplitByColon[len(remotePathSplitByColon)-1])
}
Expand Down Expand Up @@ -202,60 +208,73 @@ func pushGitVCS(remotePath, folderName string) error {
return nil
}

// Clone Clones a git repository with the given commit depth and path where to be cloned and returns final path
func (gvcsrepo *GitVCSRepo) Clone(gitCloneOptions VCSCloneOptions) (string, error) {

if gitCloneOptions.CloneDestinationPath == "" {
return "", fmt.Errorf("the path where the repository has to be clone is empty - %s", gitCloneOptions.CloneDestinationPath)
// Clone clones a git repository with the given commit depth
// and path where it is to be cloned and returns the final path inside the repo
func (gvcsrepo *GitVCSRepo) Clone(cloneOptions VCSCloneOptions) (string, error) {
if cloneOptions.CloneDestinationPath == "" {
return "", fmt.Errorf("the path where the repository has to be cloned cannot be empty")
}
repoPath := filepath.Join(gitCloneOptions.CloneDestinationPath, gvcsrepo.GitRepoPath)
_, err := os.Stat(repoPath)
if os.IsNotExist(err) {
logrus.Debugf("cloned output would be available at '%s'", repoPath)
} else if gitCloneOptions.Overwrite {
logrus.Infof("git repository might get overwritten at %s", repoPath)
err = os.RemoveAll(repoPath)
if err != nil {
return "", fmt.Errorf("failed to remove the directory at the given path - %s", repoPath)
repoPath := filepath.Join(cloneOptions.CloneDestinationPath, gvcsrepo.GitRepoPath)
repoDirInfo, err := os.Stat(repoPath)
if err != nil {
if !os.IsNotExist(err) {
return "", fmt.Errorf("failed to stat the git repo clone destination path '%s'. error: %w", repoPath, err)
}
logrus.Debugf("the cloned git repo will be available at '%s'", repoPath)
} else {
return filepath.Join(repoPath, gvcsrepo.PathWithinRepo), nil
if !cloneOptions.Overwrite {
if !repoDirInfo.IsDir() {
return "", fmt.Errorf("a file already exists at the git repo clone destination path '%s'", repoPath)
}
logrus.Infof("Assuming that the directory at '%s' is the cloned repo", repoPath)
return filepath.Join(repoPath, gvcsrepo.PathWithinRepo), nil
}
logrus.Infof("git repository clone will overwrite the files/directories at '%s'", repoPath)
if err := os.RemoveAll(repoPath); err != nil {
return "", fmt.Errorf("failed to remove the files/directories at '%s' . error: %w", repoPath, err)
}
}
logrus.Infof("Cloning the repository using git into '%s' . This might take some time.", cloneOptions.CloneDestinationPath)

// ------------
var repoDirWt, dotGitDir billy.Filesystem
repoDirWt = osfs.New(repoPath)
dotGitDir, _ = repoDirWt.Chroot(git.GitDirName)
fStorer := filesystem.NewStorage(dotGitDir, cache.NewObjectLRUDefault())
limitStorer := Limit(fStorer, cloneOptions.MaxSize)
// ------------

commitDepth := 1
if cloneOptions.CommitDepth != 0 {
commitDepth = cloneOptions.CommitDepth
}
logrus.Infof("Cloning the repository using git into %s. This might take some time.", gitCloneOptions.CloneDestinationPath)
if gvcsrepo.Branch != "" {
commitDepth := 1
if gitCloneOptions.CommitDepth != 0 {
commitDepth = gitCloneOptions.CommitDepth
}
cloneOpts := git.CloneOptions{
URL: gvcsrepo.URL,
Depth: commitDepth,
SingleBranch: true,
ReferenceName: plumbing.ReferenceName(fmt.Sprintf("refs/heads/%s", gvcsrepo.Branch)),
}
gvcsrepo.GitRepository, err = git.PlainClone(repoPath, false, &cloneOpts)
gvcsrepo.GitRepository, err = git.Clone(limitStorer, repoDirWt, &cloneOpts)
if err != nil {
logrus.Debugf("provided branch %+v does not exist in the remote, therefore creating one.", gvcsrepo.Branch)
logrus.Debugf("failed to clone the given branch '%s' . Will clone the entire repo and try again.", gvcsrepo.Branch)
cloneOpts := git.CloneOptions{
URL: gvcsrepo.URL,
Depth: commitDepth,
}
gvcsrepo.GitRepository, err = git.PlainClone(repoPath, false, &cloneOpts)
gvcsrepo.GitRepository, err = git.Clone(limitStorer, repoDirWt, &cloneOpts)
if err != nil {
return "", fmt.Errorf("failed to perform clone operation using git with options. Error : %+v", err)
return "", fmt.Errorf("failed to perform clone operation using git. Error: %w", err)
}
branch := fmt.Sprintf("refs/heads/%s", gvcsrepo.Branch)
b := plumbing.ReferenceName(branch)
w, err := gvcsrepo.GitRepository.Worktree()
if err != nil {
return "", fmt.Errorf("failed return a worktree for the repostiory. Error : %+v", err)
return "", fmt.Errorf("failed return a worktree for the repostiory. Error: %w", err)
}

err = w.Checkout(&git.CheckoutOptions{Create: false, Force: false, Branch: b})

if err != nil {
err := w.Checkout(&git.CheckoutOptions{Create: true, Force: false, Branch: b})
if err != nil {
if err := w.Checkout(&git.CheckoutOptions{Create: false, Force: false, Branch: b}); err != nil {
logrus.Debugf("failed to checkout the branch '%s', creating it...", b)
if err := w.Checkout(&git.CheckoutOptions{Create: true, Force: false, Branch: b}); err != nil {
return "", fmt.Errorf("failed checkout a new branch. Error : %+v", err)
}
}
Expand All @@ -265,45 +284,41 @@ func (gvcsrepo *GitVCSRepo) Clone(gitCloneOptions VCSCloneOptions) (string, erro
cloneOpts := git.CloneOptions{
URL: gvcsrepo.URL,
}
gvcsrepo.GitRepository, err = git.PlainClone(repoPath, false, &cloneOpts)
gvcsrepo.GitRepository, err = git.Clone(limitStorer, repoDirWt, &cloneOpts)
if err != nil {
return "", fmt.Errorf("failed to perform clone operation using git with options %+v. Error : %+v", cloneOpts, err)
return "", fmt.Errorf("failed to perform clone operation using git with options %+v. Error: %w", cloneOpts, err)
}
r, err := git.PlainOpen(repoPath)
if err != nil {
return "", fmt.Errorf("failed to open the git repository at the given path %+v. Error : %+v", repoPath, err)
return "", fmt.Errorf("failed to open the git repository at the given path '%s' . Error: %w", repoPath, err)
}
w, err := r.Worktree()
if err != nil {
return "", fmt.Errorf("failed return a worktree for the repostiory %+v. Error : %+v", r, err)
return "", fmt.Errorf("failed return a worktree for the repostiory %+v. Error: %w", r, err)
}
checkoutOpts := git.CheckoutOptions{
Hash: commitHash,
}
err = w.Checkout(&checkoutOpts)
if err != nil {
return "", fmt.Errorf("failed to checkout commit hash : %s on work tree. Error : %+v", commitHash, w)
checkoutOpts := git.CheckoutOptions{Hash: commitHash}
if err := w.Checkout(&checkoutOpts); err != nil {
return "", fmt.Errorf("failed to checkout commit hash '%s' on work tree. Error: %w", commitHash, err)
}
} else if gvcsrepo.Tag != "" {
cloneOpts := git.CloneOptions{
URL: gvcsrepo.URL,
ReferenceName: plumbing.ReferenceName(fmt.Sprintf("refs/tags/%s", gvcsrepo.Tag)),
}
gvcsrepo.GitRepository, err = git.PlainClone(repoPath, false, &cloneOpts)
gvcsrepo.GitRepository, err = git.Clone(limitStorer, repoDirWt, &cloneOpts)
if err != nil {
return "", fmt.Errorf("failed to perform clone operation using git with options %+v. Error : %+v", cloneOpts, err)
return "", fmt.Errorf("failed to perform clone operation using git with options %+v. Error: %w", cloneOpts, err)
}
} else {
commitDepth := 1
cloneOpts := git.CloneOptions{
URL: gvcsrepo.URL,
Depth: commitDepth,
SingleBranch: true,
ReferenceName: "refs/heads/main",
}
gvcsrepo.GitRepository, err = git.PlainClone(repoPath, false, &cloneOpts)
gvcsrepo.GitRepository, err = git.Clone(limitStorer, repoDirWt, &cloneOpts)
if err != nil {
return "", fmt.Errorf("failed to perform clone operation using git with options %+v. Error : %+v", cloneOpts, err)
return "", fmt.Errorf("failed to perform clone operation using git with options %+v and %+v. Error: %w", cloneOpts, cloneOptions, err)
}
}
return filepath.Join(repoPath, gvcsrepo.PathWithinRepo), nil
Expand Down
39 changes: 20 additions & 19 deletions common/vcs/git_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import (
"testing"

"github.com/google/go-cmp/cmp"
"github.com/konveyor/move2kube/common"
)

func TestIsGitCommitHash(t *testing.T) {
Expand Down Expand Up @@ -125,45 +124,47 @@ func TestIsGitVCS(t *testing.T) {
}

func TestClone(t *testing.T) {
// Test case - clone a valid vcs url with overwrite true
t.Log("Test case - clone a valid vcs url with overwrite true")
gitURL := "git+https://github.com/konveyor/move2kube.git"
repo, err := getGitRepoStruct(gitURL)
if err != nil {
t.Errorf("failed to get git repo struct for the given git URL %s. Error : %+v", gitURL, err)
t.Fatalf("failed to get git repo struct for the given git URL %s. Error : %+v", gitURL, err)
}
overwrite := true
tempPath, err := filepath.Abs(common.RemoteTempPath)
if err != nil {
t.Errorf("failed to get absolute path of %s. Error : %+v", common.RemoteTempPath, err)
tempPath := t.TempDir()
cloneDestPath := filepath.Join(tempPath, "test-clone")
var infiniteSize int64 = -1
cloneOpts := VCSCloneOptions{
CommitDepth: 1,
Overwrite: overwrite,
CloneDestinationPath: cloneDestPath,
MaxSize: infiniteSize,
}
folderName := "test-clone"
cloneOpts := VCSCloneOptions{CommitDepth: 1, Overwrite: overwrite, CloneDestinationPath: filepath.Join(tempPath, folderName)}
clonedPath, err := repo.Clone(cloneOpts)
if err != nil {
t.Errorf("failed to clone the git repo. Error : %+v", err)
t.Fatalf("failed to clone the git repo. Error : %+v", err)
}

// Test case 2 - Repository already exists with overwrite true
t.Log("Test case 2 - Repository already exists with overwrite false")
gitURL = "git+https://github.com/konveyor/move2kube.git"
repo, err = getGitRepoStruct(gitURL)
if err != nil {
t.Errorf("failed to get git repo struct for the given git URL %s. Error : %+v", gitURL, err)
t.Fatalf("failed to get git repo struct for the given git URL '%s' . Error : %+v", gitURL, err)
}
overwrite = false
tempPath, err = filepath.Abs(common.RemoteTempPath)
if err != nil {
t.Errorf("failed to get absolute path of %s. Error : %+v", common.RemoteTempPath, err)
cloneOpts = VCSCloneOptions{
CommitDepth: 1,
Overwrite: overwrite,
CloneDestinationPath: cloneDestPath,
MaxSize: infiniteSize,
}
folderName = "test-clone"
cloneOpts = VCSCloneOptions{CommitDepth: 1, Overwrite: overwrite, CloneDestinationPath: filepath.Join(tempPath, folderName)}
clonedPathWithoutOverwrite, err := repo.Clone(cloneOpts)
if err != nil {
t.Errorf("failed to clone the git repo. Error : %+v", err)
t.Fatalf("failed to clone the git repo. Error : %+v", err)
}
if clonedPath != clonedPathWithoutOverwrite {
t.Errorf("cloned paths did not match with overwrite false. cloned path %s, cloned path without overwrite: %s", clonedPath, clonedPathWithoutOverwrite)
t.Fatalf("cloned paths did not match with overwrite false. cloned path '%s', cloned path without overwrite: '%s'", clonedPath, clonedPathWithoutOverwrite)
}

}

func TestIsGitBranch(t *testing.T) {
Expand Down
Loading

0 comments on commit e88cece

Please sign in to comment.