Skip to content

Commit 5cca4e8

Browse files
committed
Leverage local links on git checkouts
This commit updates the handling of git checkouts from the database to use hardlinks if possible, speeding up this operation for large repositories significantly. As a refresher, Cargo caches git repositories in a few locations to speed up local usage of git repositories. Cargo has a "database" folder which is a bare checkout of any git repository Cargo has cached historically. This database folder contains effectively a bunch of databases for remote repos that are updated periodically. When actually building a crate Cargo will clone this database into a different location, the checkouts folder. Each rev we build (ever) is cached in the checkouts folder. This means that once a checkout directory is created it's frozen for all of time. This latter step is what this commit is optimizing. When checking out the database onto the local filesystem at a particular revision. Previously we were instructing libgit2 to fall back to a "git aware" transport which was exceedingly slow on some systems for filesystem-to-filesystem transfers. This optimization (we just forgot to turn it on in libgit2) is a longstanding one and should speed this up significantly! Closes #4604
1 parent 33f08cb commit 5cca4e8

File tree

3 files changed

+69
-29
lines changed

3 files changed

+69
-29
lines changed

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ failure = "0.1.1"
2828
filetime = "0.1"
2929
flate2 = "1.0"
3030
fs2 = "0.4"
31-
git2 = "0.6"
31+
git2 = "0.6.11"
3232
git2-curl = "0.7"
3333
glob = "0.2"
3434
hex = "0.3"

src/cargo/sources/git/source.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ impl<'cfg> Source for GitSource<'cfg> {
159159
let should_update = actual_rev.is_err() ||
160160
self.source_id.precise().is_none();
161161

162-
let (repo, actual_rev) = if should_update {
162+
let (db, actual_rev) = if should_update {
163163
self.config.shell().status("Updating",
164164
format!("git repository `{}`", self.remote.url()))?;
165165

@@ -175,7 +175,7 @@ impl<'cfg> Source for GitSource<'cfg> {
175175
// Don’t use the full hash,
176176
// to contribute less to reaching the path length limit on Windows:
177177
// https://github.com/servo/servo/pull/14397
178-
let short_id = repo.to_short_id(actual_rev.clone()).unwrap();
178+
let short_id = db.to_short_id(actual_rev.clone()).unwrap();
179179

180180
let checkout_path = lock.parent().join("checkouts")
181181
.join(&self.ident).join(short_id.as_str());
@@ -185,7 +185,7 @@ impl<'cfg> Source for GitSource<'cfg> {
185185
// in scope so the destructors here won't tamper with too much.
186186
// Checkout is immutable, so we don't need to protect it with a lock once
187187
// it is created.
188-
repo.copy_to(actual_rev.clone(), &checkout_path, self.config)?;
188+
db.copy_to(actual_rev.clone(), &checkout_path, self.config)?;
189189

190190
let source_id = self.source_id.with_precise(Some(actual_rev.to_string()));
191191
let path_source = PathSource::new_recursive(&checkout_path,

src/cargo/sources/git/utils.rs

+65-25
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ use url::Url;
1212

1313
use core::GitReference;
1414
use util::{ToUrl, internal, Config, network, Progress};
15-
use util::errors::{CargoResult, CargoResultExt, CargoError, Internal};
15+
use util::errors::{CargoResult, CargoResultExt, Internal};
1616

1717
#[derive(PartialEq, Clone, Debug)]
1818
pub struct GitRevision(git2::Oid);
@@ -226,14 +226,43 @@ impl<'a> GitCheckout<'a> {
226226
fs::create_dir_all(&dirname).chain_err(|| {
227227
format!("Couldn't mkdir {}", dirname.display())
228228
})?;
229-
if fs::metadata(&into).is_ok() {
229+
if into.exists() {
230230
fs::remove_dir_all(into).chain_err(|| {
231231
format!("Couldn't rmdir {}", into.display())
232232
})?;
233233
}
234-
let repo = git2::Repository::init(into)?;
235-
let mut checkout = GitCheckout::new(into, database, revision, repo);
236-
checkout.fetch(config)?;
234+
235+
// we're doing a local filesystem-to-filesystem clone so there should
236+
// be no need to respect global configuration options, so pass in
237+
// an empty instance of `git2::Config` below.
238+
let git_config = git2::Config::new()?;
239+
240+
// Clone the repository, but make sure we use the "local" option in
241+
// libgit2 which will attempt to use hardlinks to set up the database.
242+
// This should speed up the clone operation quite a bit if it works.
243+
//
244+
// Note that we still use the same fetch options because while we don't
245+
// need authentication information we may want progress bars and such.
246+
let url = database.path.to_url()?;
247+
let mut repo = None;
248+
with_fetch_options(&git_config, &url, config, &mut |fopts| {
249+
let mut checkout = git2::build::CheckoutBuilder::new();
250+
checkout.dry_run(); // we'll do this below during a `reset`
251+
252+
let r = git2::build::RepoBuilder::new()
253+
// use hard links and/or copy the database, we're doing a
254+
// filesystem clone so this'll speed things up quite a bit.
255+
.clone_local(git2::build::CloneLocal::Local)
256+
.with_checkout(checkout)
257+
.fetch_options(fopts)
258+
// .remote_create(|repo, _name, url| repo.remote_anonymous(url))
259+
.clone(url.as_str(), into)?;
260+
repo = Some(r);
261+
Ok(())
262+
})?;
263+
let repo = repo.unwrap();
264+
265+
let checkout = GitCheckout::new(into, database, revision, repo);
237266
checkout.reset(config)?;
238267
Ok(checkout)
239268
}
@@ -242,7 +271,7 @@ impl<'a> GitCheckout<'a> {
242271
match self.repo.revparse_single("HEAD") {
243272
Ok(ref head) if head.id() == self.revision.0 => {
244273
// See comments in reset() for why we check this
245-
fs::metadata(self.location.join(".cargo-ok")).is_ok()
274+
self.location.join(".cargo-ok").exists()
246275
}
247276
_ => false,
248277
}
@@ -555,6 +584,33 @@ fn reset(repo: &git2::Repository,
555584
Ok(())
556585
}
557586

587+
pub fn with_fetch_options(git_config: &git2::Config,
588+
url: &Url,
589+
config: &Config,
590+
cb: &mut FnMut(git2::FetchOptions) -> CargoResult<()>)
591+
-> CargoResult<()>
592+
{
593+
let mut progress = Progress::new("Fetch", config);
594+
network::with_retry(config, || {
595+
with_authentication(url.as_str(), git_config, |f| {
596+
let mut rcb = git2::RemoteCallbacks::new();
597+
rcb.credentials(f);
598+
599+
rcb.transfer_progress(|stats| {
600+
progress.tick(stats.indexed_objects(), stats.total_objects()).is_ok()
601+
});
602+
603+
// Create a local anonymous remote in the repository to fetch the
604+
// url
605+
let mut opts = git2::FetchOptions::new();
606+
opts.remote_callbacks(rcb)
607+
.download_tags(git2::AutotagOption::All);
608+
cb(opts)
609+
})?;
610+
Ok(())
611+
})
612+
}
613+
558614
pub fn fetch(repo: &mut git2::Repository,
559615
url: &Url,
560616
refspec: &str,
@@ -585,26 +641,10 @@ pub fn fetch(repo: &mut git2::Repository,
585641
maybe_gc_repo(repo)?;
586642

587643
debug!("doing a fetch for {}", url);
588-
let mut progress = Progress::new("Fetch", config);
589-
with_authentication(url.as_str(), &repo.config()?, |f| {
590-
let mut cb = git2::RemoteCallbacks::new();
591-
cb.credentials(f);
592-
593-
cb.transfer_progress(|stats| {
594-
progress.tick(stats.indexed_objects(), stats.total_objects()).is_ok()
595-
});
596-
597-
// Create a local anonymous remote in the repository to fetch the url
644+
with_fetch_options(&repo.config()?, url, config, &mut |mut opts| {
645+
debug!("initiating fetch of {} from {}", refspec, url);
598646
let mut remote = repo.remote_anonymous(url.as_str())?;
599-
let mut opts = git2::FetchOptions::new();
600-
opts.remote_callbacks(cb)
601-
.download_tags(git2::AutotagOption::All);
602-
603-
network::with_retry(config, || {
604-
debug!("initiating fetch of {} from {}", refspec, url);
605-
remote.fetch(&[refspec], Some(&mut opts), None)
606-
.map_err(CargoError::from)
607-
})?;
647+
remote.fetch(&[refspec], Some(&mut opts), None)?;
608648
Ok(())
609649
})
610650
}

0 commit comments

Comments
 (0)