Skip to content

Commit d8fa3eb

Browse files
committed
Auto merge of #4026 - alexcrichton:bare-registry, r=matklad
Don't check out the crates.io index locally This commit moves working with the crates.io index to operating on the git object layers rather than actually literally checking out the index. This is aimed at two different goals: * Improving the on-disk file size of the registry * Improving cloning times for the registry as the index doesn't need to be checked out The on disk size of my `registry` folder of a fresh check out of the index went form 124M to 48M, saving a good chunk of space! The entire operation took about 0.6s less on a Unix machine (out of 4.7s total for current Cargo). On Windows, however, the clone operation went from 11s to 6.7s, a much larger improvement! Closes #4015
2 parents c00e56d + 15cc376 commit d8fa3eb

File tree

4 files changed

+177
-91
lines changed

4 files changed

+177
-91
lines changed

src/cargo/sources/registry/index.rs

Lines changed: 50 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
use std::collections::HashMap;
2-
use std::io::prelude::*;
3-
use std::fs::File;
42
use std::path::Path;
3+
use std::str;
54

65
use serde_json;
76

87
use core::dependency::{Dependency, DependencyInner, Kind};
98
use core::{SourceId, Summary, PackageId, Registry};
109
use sources::registry::{RegistryPackage, RegistryDependency, INDEX_LOCK};
10+
use sources::registry::RegistryData;
1111
use util::{CargoResult, ChainError, internal, Filesystem, Config};
12+
use util::human;
1213

1314
pub struct RegistryIndex<'cfg> {
1415
source_id: SourceId,
@@ -23,7 +24,8 @@ impl<'cfg> RegistryIndex<'cfg> {
2324
pub fn new(id: &SourceId,
2425
path: &Filesystem,
2526
config: &'cfg Config,
26-
locked: bool) -> RegistryIndex<'cfg> {
27+
locked: bool)
28+
-> RegistryIndex<'cfg> {
2729
RegistryIndex {
2830
source_id: id.clone(),
2931
path: path.clone(),
@@ -35,13 +37,16 @@ impl<'cfg> RegistryIndex<'cfg> {
3537
}
3638

3739
/// Return the hash listed for a specified PackageId.
38-
pub fn hash(&mut self, pkg: &PackageId) -> CargoResult<String> {
40+
pub fn hash(&mut self,
41+
pkg: &PackageId,
42+
load: &mut RegistryData)
43+
-> CargoResult<String> {
3944
let key = (pkg.name().to_string(), pkg.version().to_string());
4045
if let Some(s) = self.hashes.get(&key) {
4146
return Ok(s.clone())
4247
}
4348
// Ok, we're missing the key, so parse the index file to load it.
44-
self.summaries(pkg.name())?;
49+
self.summaries(pkg.name(), load)?;
4550
self.hashes.get(&key).chain_error(|| {
4651
internal(format!("no hash listed for {}", pkg))
4752
}).map(|s| s.clone())
@@ -51,20 +56,26 @@ impl<'cfg> RegistryIndex<'cfg> {
5156
///
5257
/// Returns a list of pairs of (summary, yanked) for the package name
5358
/// specified.
54-
pub fn summaries(&mut self, name: &str) -> CargoResult<&Vec<(Summary, bool)>> {
59+
pub fn summaries(&mut self,
60+
name: &str,
61+
load: &mut RegistryData)
62+
-> CargoResult<&Vec<(Summary, bool)>> {
5563
if self.cache.contains_key(name) {
5664
return Ok(&self.cache[name]);
5765
}
58-
let summaries = self.load_summaries(name)?;
66+
let summaries = self.load_summaries(name, load)?;
5967
let summaries = summaries.into_iter().filter(|summary| {
6068
summary.0.package_id().name() == name
6169
}).collect();
6270
self.cache.insert(name.to_string(), summaries);
6371
Ok(&self.cache[name])
6472
}
6573

66-
fn load_summaries(&mut self, name: &str) -> CargoResult<Vec<(Summary, bool)>> {
67-
let (path, _lock) = if self.locked {
74+
fn load_summaries(&mut self,
75+
name: &str,
76+
load: &mut RegistryData)
77+
-> CargoResult<Vec<(Summary, bool)>> {
78+
let (root, _lock) = if self.locked {
6879
let lock = self.path.open_ro(Path::new(INDEX_LOCK),
6980
self.config,
7081
"the registry index");
@@ -84,25 +95,32 @@ impl<'cfg> RegistryIndex<'cfg> {
8495

8596
// see module comment for why this is structured the way it is
8697
let path = match fs_name.len() {
87-
1 => path.join("1").join(&fs_name),
88-
2 => path.join("2").join(&fs_name),
89-
3 => path.join("3").join(&fs_name[..1]).join(&fs_name),
90-
_ => path.join(&fs_name[0..2])
91-
.join(&fs_name[2..4])
92-
.join(&fs_name),
98+
1 => format!("1/{}", fs_name),
99+
2 => format!("2/{}", fs_name),
100+
3 => format!("3/{}/{}", &fs_name[..1], fs_name),
101+
_ => format!("{}/{}/{}", &fs_name[0..2], &fs_name[2..4], fs_name),
102+
// 1 => Path::new("1").join(fs_name),
103+
// 2 => Path::new("2").join(fs_name),
104+
// 3 => Path::new("3").join(&fs_name[..1]).join(fs_name),
105+
// _ => Path::new(&fs_name[0..2]).join(&fs_name[2..4]).join(fs_name),
93106
};
94-
match File::open(&path) {
95-
Ok(mut f) => {
96-
let mut contents = String::new();
97-
f.read_to_string(&mut contents)?;
98-
let ret: CargoResult<Vec<(Summary, bool)>>;
99-
ret = contents.lines().filter(|l| !l.trim().is_empty())
100-
.map(|l| self.parse_registry_package(l))
101-
.collect();
102-
ret.chain_error(|| {
103-
internal(format!("failed to parse registry's information \
104-
for: {}", name))
105-
})
107+
match load.load(&root, Path::new(&path)) {
108+
Ok(contents) => {
109+
let contents = str::from_utf8(&contents).map_err(|_| {
110+
human("registry index file was not valid utf-8")
111+
})?;
112+
let lines = contents.lines()
113+
.map(|s| s.trim())
114+
.filter(|l| !l.is_empty());
115+
116+
// Attempt forwards-compatibility on the index by ignoring
117+
// everything that we ourselves don't understand, that should
118+
// allow future cargo implementations to break the
119+
// interpretation of each line here and older cargo will simply
120+
// ignore the new lines.
121+
Ok(lines.filter_map(|line| {
122+
self.parse_registry_package(line).ok()
123+
}).collect())
106124
}
107125
Err(..) => Ok(Vec::new()),
108126
}
@@ -161,12 +179,13 @@ impl<'cfg> RegistryIndex<'cfg> {
161179
.set_kind(kind)
162180
.into_dependency())
163181
}
164-
}
165182

166-
impl<'cfg> Registry for RegistryIndex<'cfg> {
167-
fn query(&mut self, dep: &Dependency) -> CargoResult<Vec<Summary>> {
183+
pub fn query(&mut self,
184+
dep: &Dependency,
185+
load: &mut RegistryData)
186+
-> CargoResult<Vec<Summary>> {
168187
let mut summaries = {
169-
let summaries = self.summaries(dep.name())?;
188+
let summaries = self.summaries(dep.name(), load)?;
170189
summaries.iter().filter(|&&(_, yanked)| {
171190
dep.source_id().precise().is_some() || !yanked
172191
}).map(|s| s.0.clone()).collect::<Vec<_>>()
@@ -188,8 +207,4 @@ impl<'cfg> Registry for RegistryIndex<'cfg> {
188207
});
189208
summaries.query(dep)
190209
}
191-
192-
fn supports_checksums(&self) -> bool {
193-
true
194-
}
195210
}

src/cargo/sources/registry/local.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,9 @@ use rustc_serialize::hex::ToHex;
66

77
use core::PackageId;
88
use sources::registry::{RegistryData, RegistryConfig};
9-
use util::{Config, CargoResult, ChainError, human, Sha256, Filesystem};
109
use util::FileLock;
10+
use util::paths;
11+
use util::{Config, CargoResult, ChainError, human, Sha256, Filesystem};
1112

1213
pub struct LocalRegistry<'cfg> {
1314
index_path: Filesystem,
@@ -34,7 +35,11 @@ impl<'cfg> RegistryData for LocalRegistry<'cfg> {
3435
&self.index_path
3536
}
3637

37-
fn config(&self) -> CargoResult<Option<RegistryConfig>> {
38+
fn load(&self, root: &Path, path: &Path) -> CargoResult<Vec<u8>> {
39+
paths::read_bytes(&root.join(path))
40+
}
41+
42+
fn config(&mut self) -> CargoResult<Option<RegistryConfig>> {
3843
// Local registries don't have configuration for remote APIs or anything
3944
// like that
4045
Ok(None)

src/cargo/sources/registry/mod.rs

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,8 @@ struct RegistryDependency {
219219

220220
pub trait RegistryData {
221221
fn index_path(&self) -> &Filesystem;
222-
fn config(&self) -> CargoResult<Option<RegistryConfig>>;
222+
fn load(&self, root: &Path, path: &Path) -> CargoResult<Vec<u8>>;
223+
fn config(&mut self) -> CargoResult<Option<RegistryConfig>>;
223224
fn update_index(&mut self) -> CargoResult<()>;
224225
fn download(&mut self,
225226
pkg: &PackageId,
@@ -274,7 +275,7 @@ impl<'cfg> RegistrySource<'cfg> {
274275
/// Decode the configuration stored within the registry.
275276
///
276277
/// This requires that the index has been at least checked out.
277-
pub fn config(&self) -> CargoResult<Option<RegistryConfig>> {
278+
pub fn config(&mut self) -> CargoResult<Option<RegistryConfig>> {
278279
self.ops.config()
279280
}
280281

@@ -323,12 +324,12 @@ impl<'cfg> Registry for RegistrySource<'cfg> {
323324
// come back with no summaries, then our registry may need to be
324325
// updated, so we fall back to performing a lazy update.
325326
if dep.source_id().precise().is_some() && !self.updated {
326-
if self.index.query(dep)?.is_empty() {
327+
if self.index.query(dep, &mut *self.ops)?.is_empty() {
327328
self.do_update()?;
328329
}
329330
}
330331

331-
self.index.query(dep)
332+
self.index.query(dep, &mut *self.ops)
332333
}
333334

334335
fn supports_checksums(&self) -> bool {
@@ -356,7 +357,7 @@ impl<'cfg> Source for RegistrySource<'cfg> {
356357
}
357358

358359
fn download(&mut self, package: &PackageId) -> CargoResult<Package> {
359-
let hash = self.index.hash(package)?;
360+
let hash = self.index.hash(package, &mut *self.ops)?;
360361
let path = self.ops.download(package, &hash)?;
361362
let path = self.unpack_package(package, &path).chain_error(|| {
362363
internal(format!("failed to unpack package `{}`", package))
@@ -369,7 +370,7 @@ impl<'cfg> Source for RegistrySource<'cfg> {
369370
// differ due to historical Cargo bugs. To paper over these we trash the
370371
// *summary* loaded from the Cargo.toml we just downloaded with the one
371372
// we loaded from the index.
372-
let summaries = self.index.summaries(package.name())?;
373+
let summaries = self.index.summaries(package.name(), &mut *self.ops)?;
373374
let summary = summaries.iter().map(|s| &s.0).find(|s| {
374375
s.package_id() == package
375376
}).expect("summary not found");

0 commit comments

Comments
 (0)