diff --git a/crates/cargo-test-support/src/lib.rs b/crates/cargo-test-support/src/lib.rs index 14e8e8d27ff..0e5e9112810 100644 --- a/crates/cargo-test-support/src/lib.rs +++ b/crates/cargo-test-support/src/lib.rs @@ -1525,6 +1525,7 @@ fn substitute_macros(input: &str) -> String { ("[DOCUMENTING]", " Documenting"), ("[FRESH]", " Fresh"), ("[UPDATING]", " Updating"), + ("[PREFETCHING]", " Prefetching"), ("[ADDING]", " Adding"), ("[REMOVING]", " Removing"), ("[DOCTEST]", " Doc-tests"), diff --git a/crates/cargo-test-support/src/registry.rs b/crates/cargo-test-support/src/registry.rs index 4bb6f2aa43b..60fb78504a1 100644 --- a/crates/cargo-test-support/src/registry.rs +++ b/crates/cargo-test-support/src/registry.rs @@ -7,7 +7,12 @@ use flate2::Compression; use std::collections::HashMap; use std::fs::{self, File}; use std::io::prelude::*; +use std::io::BufReader; +use std::net::{SocketAddr, TcpListener}; use std::path::{Path, PathBuf}; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; +use std::thread; use tar::{Builder, Header}; use url::Url; @@ -213,6 +218,165 @@ pub fn init() { ); } +pub struct RegistryServer { + done: Arc, + server: Option>, + addr: SocketAddr, +} + +impl RegistryServer { + pub fn addr(&self) -> SocketAddr { + self.addr + } +} + +impl Drop for RegistryServer { + fn drop(&mut self) { + self.done.store(true, Ordering::SeqCst); + // NOTE: we can't actually await the server since it's blocked in accept() + let _ = self.server.take().unwrap(); + } +} + +#[must_use] +pub fn serve_registry(registry_path: PathBuf) -> RegistryServer { + let listener = TcpListener::bind("127.0.0.1:0").unwrap(); + let addr = listener.local_addr().unwrap(); + let done = Arc::new(AtomicBool::new(false)); + let done2 = done.clone(); + + let t = thread::spawn(move || { + let mut line = String::new(); + 'server: while !done2.load(Ordering::SeqCst) { + let (socket, _) = listener.accept().unwrap(); + // Let's implement a very naive static file HTTP server. + let mut buf = BufReader::new(socket); + + // First, the request line: + // GET /path HTTPVERSION + line.clear(); + if buf.read_line(&mut line).unwrap() == 0 { + // Connection terminated. + continue; + } + + assert!(line.starts_with("GET "), "got non-GET request: {}", line); + let path = PathBuf::from( + line.split_whitespace() + .skip(1) + .next() + .unwrap() + .trim_start_matches('/'), + ); + + let file = registry_path.join(path); + if file.exists() { + // Grab some other headers we may care about. + let mut if_modified_since = None; + let mut if_none_match = None; + loop { + line.clear(); + if buf.read_line(&mut line).unwrap() == 0 { + continue 'server; + } + + if line == "\r\n" { + // End of headers. + line.clear(); + break; + } + + let value = line + .splitn(2, ':') + .skip(1) + .next() + .map(|v| v.trim()) + .unwrap(); + + if line.starts_with("If-Modified-Since:") { + if_modified_since = Some(value.to_owned()); + } else if line.starts_with("If-None-Match:") { + if_none_match = Some(value.trim_matches('"').to_owned()); + } + } + + // Now grab info about the file. + let data = fs::read(&file).unwrap(); + let etag = Sha256::new().update(&data).finish_hex(); + let last_modified = format!("{:?}", file.metadata().unwrap().modified().unwrap()); + + // Start to construct our response: + let mut any_match = false; + let mut all_match = true; + if let Some(expected) = if_none_match { + if etag != expected { + all_match = false; + } else { + any_match = true; + } + } + if let Some(expected) = if_modified_since { + // NOTE: Equality comparison is good enough for tests. + if last_modified != expected { + all_match = false; + } else { + any_match = true; + } + } + + // Write out the main response line. + if any_match && all_match { + buf.get_mut() + .write_all(b"HTTP/1.1 304 Not Modified\r\n") + .unwrap(); + } else { + buf.get_mut().write_all(b"HTTP/1.1 200 OK\r\n").unwrap(); + } + // TODO: Support 451 for crate index deletions. + + // Write out other headers. + buf.get_mut() + .write_all(format!("Content-Length: {}\r\n", data.len()).as_bytes()) + .unwrap(); + buf.get_mut() + .write_all(format!("ETag: \"{}\"\r\n", etag).as_bytes()) + .unwrap(); + buf.get_mut() + .write_all(format!("Last-Modified: {}\r\n", last_modified).as_bytes()) + .unwrap(); + + // And finally, write out the body. + buf.get_mut().write_all(b"\r\n").unwrap(); + buf.get_mut().write_all(&data).unwrap(); + } else { + loop { + line.clear(); + if buf.read_line(&mut line).unwrap() == 0 { + // Connection terminated. + continue 'server; + } + + if line == "\r\n" { + break; + } + } + + buf.get_mut() + .write_all(b"HTTP/1.1 404 Not Found\r\n\r\n") + .unwrap(); + buf.get_mut().write_all(b"\r\n").unwrap(); + } + buf.get_mut().flush().unwrap(); + } + }); + + RegistryServer { + addr, + server: Some(t), + done, + } +} + pub fn init_registry(registry_path: PathBuf, dl_url: String, api_url: Url, api_path: PathBuf) { // Initialize a new registry. repo(®istry_path) diff --git a/crates/resolver-tests/src/lib.rs b/crates/resolver-tests/src/lib.rs index b32dfc04330..ac2a6cb4383 100644 --- a/crates/resolver-tests/src/lib.rs +++ b/crates/resolver-tests/src/lib.rs @@ -1,6 +1,7 @@ #![allow(clippy::many_single_char_names)] #![allow(clippy::needless_range_loop)] // false positives +use std::borrow::Cow; use std::cell::RefCell; use std::cmp::PartialEq; use std::cmp::{max, min}; diff --git a/src/bin/cargo/cli.rs b/src/bin/cargo/cli.rs index ec3ff0fbe96..b804e8d9c4d 100644 --- a/src/bin/cargo/cli.rs +++ b/src/bin/cargo/cli.rs @@ -44,6 +44,7 @@ Available unstable (nightly-only) flags: -Z terminal-width -- Provide a terminal width to rustc for error truncation -Z namespaced-features -- Allow features with `dep:` prefix -Z weak-dep-features -- Allow `dep_name?/feature` feature syntax + -Z http-registry -- Support HTTP-based crate registries Run with 'cargo -Z [FLAG] [SUBCOMMAND]'" ); diff --git a/src/cargo/core/dependency.rs b/src/cargo/core/dependency.rs index 61795936dc2..b43276a7fba 100644 --- a/src/cargo/core/dependency.rs +++ b/src/cargo/core/dependency.rs @@ -60,7 +60,7 @@ struct SerializedDependency<'a> { target: Option<&'a Platform>, /// The registry URL this dependency is from. /// If None, then it comes from the default registry (crates.io). - registry: Option<&'a str>, + registry: Option, } impl ser::Serialize for Dependency { @@ -79,7 +79,7 @@ impl ser::Serialize for Dependency { features: self.features(), target: self.platform(), rename: self.explicit_name_in_toml().map(|s| s.as_str()), - registry: registry_id.as_ref().map(|sid| sid.url().as_str()), + registry: registry_id.as_ref().map(|sid| sid.as_url().to_string()), } .serialize(s) } diff --git a/src/cargo/core/features.rs b/src/cargo/core/features.rs index 487abcb5146..2a5bd3eecfd 100644 --- a/src/cargo/core/features.rs +++ b/src/cargo/core/features.rs @@ -361,6 +361,7 @@ pub struct CliUnstable { pub weak_dep_features: bool, pub extra_link_arg: bool, pub credential_process: bool, + pub http_registry: bool, } fn deserialize_build_std<'de, D>(deserializer: D) -> Result>, D::Error> @@ -470,6 +471,7 @@ impl CliUnstable { "weak-dep-features" => self.weak_dep_features = parse_empty(k, v)?, "extra-link-arg" => self.extra_link_arg = parse_empty(k, v)?, "credential-process" => self.credential_process = parse_empty(k, v)?, + "http-registry" => self.http_registry = parse_empty(k, v)?, _ => bail!("unknown `-Z` flag specified: {}", k), } diff --git a/src/cargo/core/package.rs b/src/cargo/core/package.rs index d0a4f847b31..1d541858085 100644 --- a/src/cargo/core/package.rs +++ b/src/cargo/core/package.rs @@ -8,12 +8,11 @@ use std::path::{Path, PathBuf}; use std::rc::Rc; use std::time::{Duration, Instant}; -use anyhow::Context; use bytesize::ByteSize; use curl::easy::{Easy, HttpVersion}; use curl::multi::{EasyHandle, Multi}; use lazycell::LazyCell; -use log::{debug, warn}; +use log::debug; use semver::Version; use serde::Serialize; @@ -579,23 +578,6 @@ impl<'cfg> PackageSet<'cfg> { } } -// When dynamically linked against libcurl, we want to ignore some failures -// when using old versions that don't support certain features. -macro_rules! try_old_curl { - ($e:expr, $msg:expr) => { - let result = $e; - if cfg!(target_os = "macos") { - if let Err(e) = result { - warn!("ignoring libcurl {} error: {}", $msg, e); - } - } else { - result.with_context(|| { - anyhow::format_err!("failed to enable {}, is curl not built right?", $msg) - })?; - } - }; -} - impl<'a, 'cfg> Downloads<'a, 'cfg> { /// Starts to download the package for the `id` specified. /// diff --git a/src/cargo/core/registry.rs b/src/cargo/core/registry.rs index 0380c447d39..a362df8117e 100644 --- a/src/cargo/core/registry.rs +++ b/src/cargo/core/registry.rs @@ -1,3 +1,4 @@ +use std::borrow::Cow; use std::collections::{HashMap, HashSet}; use crate::core::PackageSet; @@ -15,6 +16,14 @@ use url::Url; /// /// See also `core::Source`. pub trait Registry { + /// Give source the opportunity to batch pre-fetch dependency information. + fn prefetch( + &mut self, + _deps: &mut dyn Iterator>, + ) -> CargoResult<()> { + Ok(()) + } + /// Attempt to find the packages that match a dependency request. fn query( &mut self, @@ -482,6 +491,58 @@ https://doc.rust-lang.org/cargo/reference/overriding-dependencies.html } impl<'cfg> Registry for PackageRegistry<'cfg> { + fn prefetch(&mut self, deps: &mut dyn Iterator>) -> CargoResult<()> { + assert!(self.patches_locked); + + // We need to partition deps so that we can prefetch dependencies from different + // sources. Note that we do not prefetch from overrides. + let mut deps_per_source = HashMap::new(); + for dep in deps { + // We need to check for patches, as they may tell us to look at a different source. + // If they do, we want to make sure we don't access the original registry + // unnecessarily. + let mut patches = Vec::::new(); + if let Some(extra) = self.patches.get(dep.source_id().canonical_url()) { + patches.extend( + extra + .iter() + .filter(|s| dep.matches_ignoring_source(s.package_id())) + .cloned(), + ); + } + + let source_id = if patches.len() == 1 && dep.is_locked() { + // Perform the prefetch from the patched-in source instead. + patches.remove(0).source_id() + } else { + // The code in `fn query` accesses the original source here, so we do too. + dep.source_id() + }; + + deps_per_source + .entry(source_id) + .or_insert_with(Vec::new) + .push(dep); + } + + for (s, deps) in deps_per_source { + // Ensure the requested source_id is loaded + self.ensure_loaded(s, Kind::Normal).chain_err(|| { + anyhow::format_err!( + "failed to load source for dependency `{}`", + deps[0].package_name() + ) + })?; + + self.sources + .get_mut(s) + .unwrap() + .prefetch(&mut deps.into_iter())?; + } + + Ok(()) + } + fn query( &mut self, dep: &Dependency, diff --git a/src/cargo/core/resolver/mod.rs b/src/cargo/core/resolver/mod.rs index 094c64065b1..e8b19c23794 100644 --- a/src/cargo/core/resolver/mod.rs +++ b/src/cargo/core/resolver/mod.rs @@ -47,6 +47,7 @@ //! that we're implementing something that probably shouldn't be allocating all //! over the place. +use std::borrow::Cow; use std::collections::{BTreeMap, HashMap, HashSet}; use std::mem; use std::rc::Rc; @@ -57,7 +58,7 @@ use log::{debug, trace}; use crate::core::PackageIdSpec; use crate::core::{Dependency, PackageId, Registry, Summary}; use crate::util::config::Config; -use crate::util::errors::CargoResult; +use crate::util::errors::{CargoResult, CargoResultExt}; use crate::util::profile; use self::context::Context; @@ -133,6 +134,18 @@ pub fn resolve( Some(config) => config.cli_unstable().minimal_versions, None => false, }; + + // First, allow the source to batch pre-fetch dependencies we may need. + if config.map_or(false, |c| c.cli_unstable().http_registry) { + registry + .prefetch( + &mut summaries + .iter() + .flat_map(|summary| summary.0.dependencies().iter().map(Cow::Borrowed)), + ) + .chain_err(|| "failed to prefetch dependencies")?; + } + let mut registry = RegistryQueryer::new(registry, replacements, try_to_use, minimal_versions, config); let cx = activate_deps_loop(cx, &mut registry, summaries, config)?; diff --git a/src/cargo/core/source/mod.rs b/src/cargo/core/source/mod.rs index f61e9636374..ef61567fdf6 100644 --- a/src/cargo/core/source/mod.rs +++ b/src/cargo/core/source/mod.rs @@ -1,3 +1,4 @@ +use std::borrow::Cow; use std::collections::hash_map::HashMap; use std::fmt; @@ -27,6 +28,12 @@ pub trait Source { /// the `precise` field in the source id listed. fn requires_precise(&self) -> bool; + /// Give source the opportunity to batch pre-fetch dependency information. + fn prefetch( + &mut self, + deps: &mut dyn ExactSizeIterator>, + ) -> CargoResult<()>; + /// Attempts to find the packages that match a dependency request. fn query(&mut self, dep: &Dependency, f: &mut dyn FnMut(Summary)) -> CargoResult<()>; @@ -129,6 +136,14 @@ impl<'a, T: Source + ?Sized + 'a> Source for Box { (**self).requires_precise() } + /// Forwards to `Source::prefetch`. + fn prefetch( + &mut self, + deps: &mut dyn ExactSizeIterator>, + ) -> CargoResult<()> { + (**self).prefetch(deps) + } + /// Forwards to `Source::query`. fn query(&mut self, dep: &Dependency, f: &mut dyn FnMut(Summary)) -> CargoResult<()> { (**self).query(dep, f) @@ -197,6 +212,13 @@ impl<'a, T: Source + ?Sized + 'a> Source for &'a mut T { (**self).requires_precise() } + fn prefetch( + &mut self, + deps: &mut dyn ExactSizeIterator>, + ) -> CargoResult<()> { + (**self).prefetch(deps) + } + fn query(&mut self, dep: &Dependency, f: &mut dyn FnMut(Summary)) -> CargoResult<()> { (**self).query(dep, f) } diff --git a/src/cargo/core/source/source_id.rs b/src/cargo/core/source/source_id.rs index 5e6322d9e9c..e91f21a9c18 100644 --- a/src/cargo/core/source/source_id.rs +++ b/src/cargo/core/source/source_id.rs @@ -135,6 +135,11 @@ impl SourceId { Ok(SourceId::new(SourceKind::Registry, url)? .with_precise(Some("locked".to_string()))) } + "sparse" => { + let url = string.into_url()?; + Ok(SourceId::new(SourceKind::Registry, url)? + .with_precise(Some("locked".to_string()))) + } "path" => { let url = url.into_url()?; SourceId::new(SourceKind::Path, url) @@ -194,8 +199,9 @@ impl SourceId { pub fn alt_registry(config: &Config, key: &str) -> CargoResult { let url = config.get_registry_index(key)?; + let (kind, url) = (SourceKind::Registry, url); Ok(SourceId::wrap(SourceIdInner { - kind: SourceKind::Registry, + kind, canonical_url: CanonicalUrl::new(&url)?, url, precise: None, @@ -278,7 +284,7 @@ impl SourceId { self, yanked_whitelist, config, - ))), + )?)), SourceKind::LocalRegistry => { let path = match self.inner.url.to_file_path() { Ok(p) => p, diff --git a/src/cargo/macros.rs b/src/cargo/macros.rs index 3ebf3b37f67..763b0b4979a 100644 --- a/src/cargo/macros.rs +++ b/src/cargo/macros.rs @@ -47,3 +47,21 @@ impl fmt::Debug for DisplayAsDebug { fmt::Display::fmt(&self.0, f) } } + +// When dynamically linked against libcurl, we want to ignore some failures +// when using old versions that don't support certain features. +macro_rules! try_old_curl { + ($e:expr, $msg:expr) => { + let result = $e; + if cfg!(target_os = "macos") { + if let Err(e) = result { + log::warn!("ignoring libcurl {} error: {}", $msg, e); + } + } else { + use anyhow::Context; + result.with_context(|| { + anyhow::format_err!("failed to enable {}, is curl not built right?", $msg) + })?; + } + }; +} diff --git a/src/cargo/ops/registry.rs b/src/cargo/ops/registry.rs index 1d1731a7da3..089de29cfed 100644 --- a/src/cargo/ops/registry.rs +++ b/src/cargo/ops/registry.rs @@ -451,7 +451,7 @@ fn registry( } let api_host = { let _lock = config.acquire_package_cache_lock()?; - let mut src = RegistrySource::remote(sid, &HashSet::new(), config); + let mut src = RegistrySource::remote(sid, &HashSet::new(), config)?; // Only update the index if the config is not available or `force` is set. let cfg = src.config(); let mut updated_cfg = || { diff --git a/src/cargo/sources/directory.rs b/src/cargo/sources/directory.rs index 3e6daf034b8..7fcee08cbfd 100644 --- a/src/cargo/sources/directory.rs +++ b/src/cargo/sources/directory.rs @@ -1,3 +1,4 @@ +use std::borrow::Cow; use std::collections::HashMap; use std::fmt::{self, Debug, Formatter}; use std::path::{Path, PathBuf}; @@ -42,6 +43,13 @@ impl<'cfg> Debug for DirectorySource<'cfg> { } impl<'cfg> Source for DirectorySource<'cfg> { + fn prefetch( + &mut self, + _: &mut dyn ExactSizeIterator>, + ) -> CargoResult<()> { + Ok(()) + } + fn query(&mut self, dep: &Dependency, f: &mut dyn FnMut(Summary)) -> CargoResult<()> { let packages = self.packages.values().map(|p| &p.0); let matches = packages.filter(|pkg| dep.matches(pkg.summary())); diff --git a/src/cargo/sources/git/source.rs b/src/cargo/sources/git/source.rs index 3e66dd3cda8..13a12d84ba9 100644 --- a/src/cargo/sources/git/source.rs +++ b/src/cargo/sources/git/source.rs @@ -8,6 +8,7 @@ use crate::util::hex::short_hash; use crate::util::Config; use anyhow::Context; use log::trace; +use std::borrow::Cow; use std::fmt::{self, Debug, Formatter}; use url::Url; @@ -83,6 +84,13 @@ impl<'cfg> Debug for GitSource<'cfg> { } impl<'cfg> Source for GitSource<'cfg> { + fn prefetch( + &mut self, + _: &mut dyn ExactSizeIterator>, + ) -> CargoResult<()> { + Ok(()) + } + fn query(&mut self, dep: &Dependency, f: &mut dyn FnMut(Summary)) -> CargoResult<()> { let src = self .path_source diff --git a/src/cargo/sources/path.rs b/src/cargo/sources/path.rs index 64b0f77ed5a..6c6a2607cf5 100644 --- a/src/cargo/sources/path.rs +++ b/src/cargo/sources/path.rs @@ -1,3 +1,4 @@ +use std::borrow::Cow; use std::fmt::{self, Debug, Formatter}; use std::fs; use std::path::{Path, PathBuf}; @@ -469,6 +470,13 @@ impl<'cfg> Debug for PathSource<'cfg> { } impl<'cfg> Source for PathSource<'cfg> { + fn prefetch( + &mut self, + _: &mut dyn ExactSizeIterator>, + ) -> CargoResult<()> { + Ok(()) + } + fn query(&mut self, dep: &Dependency, f: &mut dyn FnMut(Summary)) -> CargoResult<()> { for s in self.packages.iter().map(|p| p.summary()) { if dep.matches(s) { diff --git a/src/cargo/sources/registry/download.rs b/src/cargo/sources/registry/download.rs new file mode 100644 index 00000000000..8f375f5f384 --- /dev/null +++ b/src/cargo/sources/registry/download.rs @@ -0,0 +1,108 @@ +use crate::core::PackageId; +use crate::sources::registry::make_dep_prefix; +use crate::sources::registry::MaybeLock; +use crate::sources::registry::{ + RegistryData, CRATE_TEMPLATE, LOWER_PREFIX_TEMPLATE, PREFIX_TEMPLATE, VERSION_TEMPLATE, +}; +use crate::util::errors::{CargoResult, CargoResultExt}; +use crate::util::{Config, Filesystem, Sha256}; +use std::fmt::Write as FmtWrite; +use std::fs::{self, File, OpenOptions}; +use std::io::prelude::*; +use std::io::SeekFrom; +use std::path::Path; +use std::str; + +pub(super) fn filename(pkg: PackageId) -> String { + format!("{}-{}.crate", pkg.name(), pkg.version()) +} + +pub(super) fn download( + load: &mut dyn RegistryData, + path: &Path, + pkg: PackageId, + _checksum: &str, +) -> CargoResult { + // Attempt to open an read-only copy first to avoid an exclusive write + // lock and also work with read-only filesystems. Note that we check the + // length of the file like below to handle interrupted downloads. + // + // If this fails then we fall through to the exclusive path where we may + // have to redownload the file. + if let Ok(dst) = File::open(path) { + let meta = dst.metadata()?; + if meta.len() > 0 { + return Ok(MaybeLock::Ready(dst)); + } + } + + let config = load.config()?.unwrap(); + let mut url = config.dl; + if !url.contains(CRATE_TEMPLATE) + && !url.contains(VERSION_TEMPLATE) + && !url.contains(PREFIX_TEMPLATE) + && !url.contains(LOWER_PREFIX_TEMPLATE) + { + write!(url, "/{}/{}/download", CRATE_TEMPLATE, VERSION_TEMPLATE).unwrap(); + } + let prefix = make_dep_prefix(&*pkg.name()); + let url = url + .replace(CRATE_TEMPLATE, &*pkg.name()) + .replace(VERSION_TEMPLATE, &pkg.version().to_string()) + .replace(PREFIX_TEMPLATE, &prefix) + .replace(LOWER_PREFIX_TEMPLATE, &prefix.to_lowercase()); + + Ok(MaybeLock::Download { + url, + descriptor: pkg.to_string(), + }) +} + +pub(super) fn finish_download( + cache_path: &Filesystem, + config: &Config, + pkg: PackageId, + checksum: &str, + data: &[u8], +) -> CargoResult { + // Verify what we just downloaded + let actual = Sha256::new().update(data).finish_hex(); + if actual != checksum { + anyhow::bail!("failed to verify the checksum of `{}`", pkg) + } + + let filename = filename(pkg); + cache_path.create_dir()?; + let path = cache_path.join(&filename); + let path = config.assert_package_cache_locked(&path); + let mut dst = OpenOptions::new() + .create(true) + .read(true) + .write(true) + .open(&path) + .chain_err(|| format!("failed to open `{}`", path.display()))?; + let meta = dst.metadata()?; + if meta.len() > 0 { + return Ok(dst); + } + + dst.write_all(data)?; + dst.seek(SeekFrom::Start(0))?; + Ok(dst) +} + +pub(super) fn is_crate_downloaded( + cache_path: &Filesystem, + config: &Config, + pkg: PackageId, +) -> bool { + let filename = format!("{}-{}.crate", pkg.name(), pkg.version()); + let path = Path::new(&filename); + + let path = cache_path.join(path); + let path = config.assert_package_cache_locked(&path); + if let Ok(meta) = fs::metadata(path) { + return meta.len() > 0; + } + false +} diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs new file mode 100644 index 00000000000..e729bbaa53a --- /dev/null +++ b/src/cargo/sources/registry/http_remote.rs @@ -0,0 +1,1161 @@ +//! Access to a HTTP-based crate registry. +//! +//! See [`HttpRegistry`] for details. + +use crate::core::{PackageId, SourceId}; +use crate::ops; +use crate::sources::registry::download; +use crate::sources::registry::MaybeLock; +use crate::sources::registry::{Fetched, RegistryConfig, RegistryData}; +use crate::util::errors::{CargoResult, CargoResultExt}; +use crate::util::interning::InternedString; +use crate::util::paths; +use crate::util::{self, Config, Filesystem, IntoUrl, Progress, ProgressStyle}; +use bytesize::ByteSize; +use curl::easy::{Easy, HttpVersion, List}; +use curl::multi::{EasyHandle, Multi}; +use log::{debug, trace}; +use std::cell::{Cell, RefCell, RefMut}; +use std::collections::{BTreeMap, HashMap, HashSet}; +use std::fs::File; +use std::io::prelude::*; +use std::path::{Path, PathBuf}; +use std::str; +use std::time::Duration; +use std::time::Instant; +use url::Url; + +const ETAG: &'static [u8] = b"ETag"; +const LAST_MODIFIED: &'static [u8] = b"Last-Modified"; + +/// A registry served by the HTTP-based registry API. +/// +/// This type is primarily accessed through the [`RegistryData`] trait. +/// +/// `HttpRegistry` implements the HTTP-based registry API outlined in [RFC XXX]. Read the RFC for +/// the complete protocol, but _roughly_ the implementation loads each index file (e.g., +/// config.json or re/ge/regex) from an HTTP service rather than from a locally cloned git +/// repository. The remote service can more or less be a static file server that simply serves the +/// contents of the origin git repository. +/// +/// Implemented naively, this leads to a significant amount of network traffic, as a lookup of any +/// index file would need to check with the remote backend if the index file has changed. This +/// cost is somewhat mitigated by the use of HTTP conditional feches (`If-Modified-Since` and +/// `If-None-Match` for `ETag`s) which can be efficiently handled by HTTP/2. +/// +/// In order to take advantage of HTTP/2's ability to efficiently send multiple concurrent HTTP +/// requests over a single connection, `HttpRegistry` supports asynchronous prefetching. The caller +/// queues up a number of index files they think it is likely they will want to access, and +/// `HttpRegistry` fires off requests for each one without synchronously waiting for the response. +/// The caller then drives the processing of the responses, which update the index files that are +/// stored on disk, before moving on to the _actual_ dependency resolution. See +/// [`RegistryIndex::prefetch`] for more details. +/// +/// [RFC XXX]: https://github.com/rust-lang/rfcs/pull/2789 +pub struct HttpRegistry<'cfg> { + index_path: Filesystem, + cache_path: Filesystem, + source_id: SourceId, + config: &'cfg Config, + + /// Store the server URL without the protocol prefix (sparse+) + url: Url, + + /// Cached HTTP handle for synchronous requests (RegistryData::load). + http: RefCell>, + + /// HTTP multi-handle for asynchronous/parallel requests during prefetching. + prefetch: Multi, + + /// Has the client requested a cache update? + /// + /// Only if they have do we double-check the freshness of each locally-stored index file. + requested_update: bool, + + /// State for currently pending prefetch downloads. + downloads: Downloads<'cfg>, + + /// Does the config say that we can use HTTP multiplexing? + multiplexing: bool, + + /// What paths have we already fetched since the last index update? + /// + /// We do not need to double-check any of these index files since we have already done so. + fresh: HashSet, + + /// If we are currently prefetching, all calls to RegistryData::load should go to disk. + is_prefetching: bool, +} + +// NOTE: the download bits are lifted from src/cargo/core/package.rs and tweaked + +/// Helper for downloading crates. +pub struct Downloads<'cfg> { + config: &'cfg Config, + /// When a download is started, it is added to this map. The key is a + /// "token" (see `Download::token`). It is removed once the download is + /// finished. + pending: HashMap, + /// Set of paths currently being downloaded, mapped to their tokens. + /// This should stay in sync with `pending`. + pending_ids: HashMap, + /// The final result of each download. A pair `(token, result)`. This is a + /// temporary holding area, needed because curl can report multiple + /// downloads at once, but the main loop (`wait`) is written to only + /// handle one at a time. + results: Vec<(usize, Result<(), curl::Error>)>, + /// Prefetch requests that we already have a response to. + /// NOTE: Should this maybe be some kind of heap? + eager: BTreeMap, + /// The next ID to use for creating a token (see `Download::token`). + next: usize, + /// Progress bar. + progress: RefCell>>, + /// Number of downloads that have successfully finished. + downloads_finished: usize, + /// Total bytes for all successfully downloaded index files. + downloaded_bytes: u64, + /// Time when downloading started. + start: Instant, + /// Indicates *all* downloads were successful. + success: bool, +} + +struct Download { + /// The token for this download, used as the key of the `Downloads::pending` map + /// and stored in `EasyHandle` as well. + token: usize, + + /// The path of the package that we're downloading. + path: PathBuf, + + /// The name of the package that we're downloading. + name: InternedString, + + /// The version requirements for the dependency line that triggered this fetch. + // NOTE: we can get rid of the HashSet (and other complexity) if we had VersionReq::union + reqs: HashSet, + + /// True if this download is of a direct dependency of the root crate. + is_transitive: bool, + + /// Actual downloaded data, updated throughout the lifetime of this download. + data: RefCell>, + + /// ETag and Last-Modified headers received from the server (if any). + etag: RefCell>, + last_modified: RefCell>, + + /// Statistics updated from the progress callback in libcurl. + total: Cell, + current: Cell, +} + +impl<'cfg> HttpRegistry<'cfg> { + pub fn new(source_id: SourceId, config: &'cfg Config, name: &str) -> HttpRegistry<'cfg> { + let url = source_id + .url() + .to_string() + .trim_start_matches("sparse+") + .into_url() + .expect("a url with the protocol stripped should still be valid"); + + HttpRegistry { + index_path: config.registry_index_path().join(name), + cache_path: config.registry_cache_path().join(name), + source_id, + config, + url, + http: RefCell::new(None), + prefetch: Multi::new(), + multiplexing: false, + downloads: Downloads { + start: Instant::now(), + config, + next: 0, + pending: HashMap::new(), + pending_ids: HashMap::new(), + eager: BTreeMap::new(), + results: Vec::new(), + progress: RefCell::new(Some(Progress::with_style( + "Prefetching", + ProgressStyle::Ratio, + config, + ))), + downloads_finished: 0, + downloaded_bytes: 0, + success: false, + }, + fresh: HashSet::new(), + requested_update: false, + is_prefetching: false, + } + } + + fn http(&self) -> CargoResult> { + let handle = if let Ok(h) = self.http.try_borrow_mut() { + h + } else { + anyhow::bail!("concurrent index downloads are not yet supported"); + }; + + if handle.is_none() { + assert!(self.config.offline()); + anyhow::bail!("can't access remote index: you are in offline mode (--offline)"); + } else { + Ok(RefMut::map(handle, |opt| { + opt.as_mut().expect("!handle.is_none() implies Some") + })) + } + } + + fn handle_http_header(buf: &[u8]) -> Option<(&[u8], &str)> { + if buf.is_empty() { + return None; + } + + let mut parts = buf.splitn(2, |&c| c == b':'); + let tag = parts.next().expect("first item of split is always Some"); + let rest = parts.next()?; + let rest = std::str::from_utf8(rest).ok()?; + let rest = rest.trim(); + + // Don't let server sneak extra lines anywhere. + if rest.contains('\n') { + return None; + } + + Some((tag, rest)) + } +} + +const LAST_UPDATED_FILE: &str = ".last-updated"; + +impl<'cfg> RegistryData for HttpRegistry<'cfg> { + fn prepare(&self) -> CargoResult<()> { + if self.config.offline() { + return Ok(()); + } + + let mut http = if let Ok(h) = self.http.try_borrow_mut() { + h + } else { + anyhow::bail!("concurrent index downloads are not yet supported"); + }; + + if http.is_none() { + // NOTE: lifted from src/cargo/core/package.rs + // + // Ensure that we'll actually be able to acquire an HTTP handle later on + // once we start trying to download crates. This will weed out any + // problems with `.cargo/config` configuration related to HTTP. + // + // This way if there's a problem the error gets printed before we even + // hit the index, which may not actually read this configuration. + let mut handle = ops::http_handle(&self.config)?; + handle.get(true)?; + handle.follow_location(true)?; + + // NOTE: lifted from src/cargo/core/package.rs + // + // This is an option to `libcurl` which indicates that if there's a + // bunch of parallel requests to the same host they all wait until the + // pipelining status of the host is known. This means that we won't + // initiate dozens of connections to crates.io, but rather only one. + // Once the main one is opened we realized that pipelining is possible + // and multiplexing is possible with static.crates.io. All in all this + // reduces the number of connections done to a more manageable state. + try_old_curl!(handle.pipewait(true), "pipewait"); + *http = Some(handle); + } + + Ok(()) + } + + fn start_prefetch(&mut self) -> CargoResult { + // NOTE: lifted from src/cargo/core/package.rs + // + // We've enabled the `http2` feature of `curl` in Cargo, so treat + // failures here as fatal as it would indicate a build-time problem. + // + // Note that the multiplexing support is pretty new so we're having it + // off-by-default temporarily. + // + // Also note that pipelining is disabled as curl authors have indicated + // that it's buggy, and we've empirically seen that it's buggy with HTTP + // proxies. + self.multiplexing = self.config.http_config()?.multiplexing.unwrap_or(true); + + self.prefetch + .pipelining(false, self.multiplexing) + .chain_err(|| "failed to enable multiplexing/pipelining in curl")?; + + // let's not flood crates.io with connections + self.prefetch.set_max_host_connections(2)?; + + self.is_prefetching = true; + Ok(true) + } + + fn prefetch( + &mut self, + root: &Path, + path: &Path, + name: InternedString, + req: Option<&semver::VersionReq>, + is_transitive: bool, + ) -> CargoResult<()> { + // A quick overview of what goes on below: + // + // We first check if we have a local copy of the given index file. + // + // If we don't have a local copy of the index file, we obviously need to fetch it from the + // server. + // + // If we do, we may need to check with the server if the index file has changed upstream. + // This happens if cargo has explicitly requested that we fetch the _latest_ versions of + // dependencies. We do this using a conditional HTTP request using the `Last-Modified` and + // `ETag` headers we got when we fetched the currently cached index file (those headers are + // stored in the first two lines of each index file). That way, if nothing has changed + // (likely the common case), the server doesn't have to send us any data, just a 304 Not + // Modified. + + let pkg = root.join(path); + let bytes; + // TODO: Can we avoid this file-system interaction if we're already downloading? + let was = if pkg.exists() { + if !self.requested_update || self.fresh.contains(path) { + let req = if let Some(req) = req { + req + } else { + // We don't need to fetch this file, and the caller does not care about it, + // so we can just return. + return Ok(()); + }; + + trace!("not prefetching fresh {}", name); + + // We already have this file locally, and we don't need to double-check it with + // upstream because the client hasn't requested an index update. So there's really + // nothing to prefetch. We do keep track of the request though so that we will + // eventually yield this back to the caller who may then want to prefetch other + // transitive dependencies. + use std::collections::btree_map::Entry; + match self.downloads.eager.entry(path.to_path_buf()) { + Entry::Occupied(mut o) => { + o.get_mut().reqs.insert(req.clone()); + // We trust a signal that something is _not_ transitive + // more than a signal that it is transitive. + o.get_mut().is_transitive &= is_transitive; + } + Entry::Vacant(v) => { + if self.fresh.contains(path) { + debug!("yielding already-prefetched {}", name); + } + let mut reqs = HashSet::new(); + reqs.insert(req.clone()); + v.insert(Fetched { + path: path.to_path_buf(), + name, + reqs, + is_transitive, + }); + } + } + return Ok(()); + } + + // We have a local copy that we need to double-check the contents of. + // First, extract the `Last-Modified` and `Etag` headers. + trace!("prefetch load {} from disk", path.display()); + bytes = paths::read_bytes(&pkg)?; + let mut lines = bytes.splitn(3, |&c| c == b'\n'); + let etag = lines.next().expect("splitn always returns >=1 item"); + let last_modified = if let Some(lm) = lines.next() { + lm + } else { + anyhow::bail!("index file is missing HTTP header header"); + }; + let rest = if let Some(rest) = lines.next() { + rest + } else { + anyhow::bail!("index file is missing HTTP header header"); + }; + + assert!(!self.config.offline()); + + let etag = std::str::from_utf8(etag)?; + let last_modified = std::str::from_utf8(last_modified)?; + Some((etag, last_modified, rest)) + } else { + None + }; + + // If the path is already being fetched, don't fetch it again. + // Just note down the version requirement and move on. + if let Some(token) = self.downloads.pending_ids.get(path) { + let (dl, _) = self + .downloads + .pending + .get_mut(token) + .expect("invalid token"); + + trace!("amending dependency that we're already fetching: {}", name); + if let Some(req) = req { + trace!("adding req {}", req); + dl.reqs.insert(req.clone()); + } + dl.is_transitive &= is_transitive; + + return Ok(()); + } else if self.fresh.contains(path) { + // This must have been a 404 when we initially prefetched it. + return Ok(()); + } else if let Some(f) = self.downloads.eager.get_mut(path) { + // We can't hit this case. + // The index file must exist for the path to be in `eager`, + // but since that's the case, we should have caught this + // in the eager check _in_ the pkg.exists() path. + unreachable!( + "index file `{}` is in eager, but file doesn't exist", + f.path.display() + ); + } + + if was.is_some() { + debug!("double-checking freshness of {}", path.display()); + } + + // Looks like we're going to have to bite the bullet and do a network request. + self.prepare()?; + + let mut handle = ops::http_handle(self.config)?; + debug!("prefetch {}{}", self.url, path.display()); + handle.get(true)?; + handle.url(&format!("{}{}", self.url, path.display()))?; + handle.follow_location(true)?; + + // Enable HTTP/2 if possible. + if self.multiplexing { + try_old_curl!(handle.http_version(HttpVersion::V2), "HTTP2"); + } else { + handle.http_version(HttpVersion::V11)?; + } + + // This is an option to `libcurl` which indicates that if there's a + // bunch of parallel requests to the same host they all wait until the + // pipelining status of the host is known. This means that we won't + // initiate dozens of connections to crates.io, but rather only one. + // Once the main one is opened we realized that pipelining is possible + // and multiplexing is possible with static.crates.io. All in all this + // reduces the number of connections done to a more manageable state. + try_old_curl!(handle.pipewait(true), "pipewait"); + + // Make sure we don't send data back if it's the same as we have in the index. + if let Some((ref etag, ref last_modified, _)) = was { + let mut list = List::new(); + if !etag.is_empty() { + list.append(&format!("If-None-Match: {}", etag))?; + } + if !last_modified.is_empty() { + list.append(&format!("If-Modified-Since: {}", last_modified))?; + } + handle.http_headers(list)?; + } + + // We're going to have a bunch of downloads all happening "at the same time". + // So, we need some way to track what headers/data/responses are for which request. + // We do that through this token. Each request (and associated response) gets one. + let token = self.downloads.next; + self.downloads.next += 1; + debug!("downloading {} as {}", path.display(), token); + assert_eq!( + self.downloads.pending_ids.insert(path.to_path_buf(), token), + None, + "path queued for download more than once" + ); + let mut reqs = HashSet::new(); + if let Some(req) = req { + reqs.insert(req.clone()); + } + + // Each write should go to self.downloads.pending[&token].data. + // Since the write function must be 'static, we access downloads through a thread-local. + // That thread-local is set up in `next_prefetched` when it calls self.prefetch.perform, + // which is what ultimately calls this method. + handle.write_function(move |buf| { + // trace!("{} - {} bytes of data", token, buf.len()); + tls::with(|downloads| { + if let Some(downloads) = downloads { + downloads.pending[&token] + .0 + .data + .borrow_mut() + .extend_from_slice(buf); + } + }); + Ok(buf.len()) + })?; + + // Same goes for the progress function -- it goes through thread-local storage. + handle.progress(true)?; + handle.progress_function(move |dl_total, dl_cur, _, _| { + tls::with(|downloads| match downloads { + Some(d) => d.progress(token, dl_total as u64, dl_cur as u64), + None => false, + }) + })?; + + // And ditto for the header function. + handle.header_function(move |buf| { + if let Some((tag, value)) = Self::handle_http_header(buf) { + let is_etag = buf.eq_ignore_ascii_case(ETAG); + let is_lm = buf.eq_ignore_ascii_case(LAST_MODIFIED); + if is_etag || is_lm { + debug!( + "{} - got header {}: {}", + token, + std::str::from_utf8(tag) + .expect("both ETAG and LAST_MODIFIED are valid strs"), + value + ); + + // Append a new line to each so we can easily prepend to the index file. + let mut s = String::with_capacity(value.len() + 1); + s.push_str(value); + s.push('\n'); + tls::with(|downloads| { + if let Some(downloads) = downloads { + let into = if is_etag { + &downloads.pending[&token].0.etag + } else { + &downloads.pending[&token].0.last_modified + }; + *into.borrow_mut() = Some(s); + } + }) + } + } + + true + })?; + + // If the progress bar isn't enabled then it may be awhile before the + // first index file finishes downloading so we inform immediately that + // we're prefetching here. + if self.downloads.downloads_finished == 0 + && self.downloads.pending.is_empty() + && !self + .downloads + .progress + .borrow() + .as_ref() + .unwrap() + .is_enabled() + { + self.downloads + .config + .shell() + .status("Prefetching", "index files ...")?; + } + + let dl = Download { + token, + data: RefCell::new(Vec::new()), + path: path.to_path_buf(), + name, + reqs, + is_transitive, + etag: RefCell::new(None), + last_modified: RefCell::new(None), + total: Cell::new(0), + current: Cell::new(0), + }; + + // Finally add the request we've lined up to the pool of requests that cURL manages. + let mut handle = self.prefetch.add(handle)?; + handle.set_token(token)?; + self.downloads.pending.insert(dl.token, (dl, handle)); + self.downloads.tick(WhyTick::DownloadStarted)?; + + Ok(()) + } + + fn next_prefetched(&mut self) -> CargoResult> { + while !self.downloads.pending.is_empty() || !self.downloads.eager.is_empty() { + // We may already have packages that are ready to go. This takes care of grabbing the + // next of those, while ensuring that we yield every distinct version requirement for + // each package. + // + // TODO: Use the nightly BTreeMap::pop_first when stable. + if let Some(path) = self.downloads.eager.keys().next().cloned() { + let fetched = self.downloads.eager.remove(&path).unwrap(); + + if fetched.reqs.is_empty() { + // This index file was proactively fetched even though it did not appear as a + // dependency, so we should not yield it back for future exploration. + trace!( + "not yielding fetch result for {} with no requirements", + fetched.name + ); + continue; + } + trace!("yielding fetch result for {}", fetched.name); + return Ok(Some(fetched)); + } + + // We don't have any fetched results immediately ready to be yielded, + // so we need to check if curl has made any progress. + assert_eq!( + self.downloads.pending.len(), + self.downloads.pending_ids.len() + ); + // Note the `tls::set` here which sets up the thread-local storage needed to access + // self.downloads from `write_function` and `header_function` above. + let _remaining_in_multi = tls::set(&self.downloads, || { + self.prefetch + .perform() + .chain_err(|| "failed to perform http requests") + })?; + // trace!("handles remaining: {}", _remaining_in_multi); + + // Walk all the messages cURL came across in case anything completed. + let results = &mut self.downloads.results; + let pending = &self.downloads.pending; + self.prefetch.messages(|msg| { + let token = msg.token().expect("failed to read token"); + let handle = &pending[&token].1; + if let Some(result) = msg.result_for(handle) { + results.push((token, result)); + } else { + debug!("message without a result (?)"); + } + }); + + // Walk all the requests that completed and handle their responses. + // + // This will ultimately add more replies to self.downloads.eager, which we'll yield as + // we continue around the outer loop. + while let Some((token, result)) = self.downloads.results.pop() { + trace!("{} finished with {:?}", token, result); + + let (dl, handle) = self + .downloads + .pending + .remove(&token) + .expect("got a token for a non-in-progress transfer"); + + let data = dl.data.into_inner(); + let mut handle = self.prefetch.remove(handle)?; + self.downloads.pending_ids.remove(&dl.path); + + let fetched = Fetched { + path: dl.path, + name: dl.name, + reqs: dl.reqs, + is_transitive: dl.is_transitive, + }; + assert!( + self.fresh.insert(fetched.path.clone()), + "downloaded the index file `{}` twice during prefetching", + fetched.path.display(), + ); + + let code = handle.response_code()?; + debug!( + "index file for {} downloaded with status code {}", + fetched.name, + handle.response_code()? + ); + + // This gets really noisy very quickly: + // self.config.shell().status("Prefetched", &fetched.name)?; + + self.downloads.downloads_finished += 1; + self.downloads.downloaded_bytes += dl.total.get(); + self.downloads.tick(WhyTick::DownloadFinished)?; + + match code { + 200 => { + // We got data back, hooray! + // Let's update the index file. + let path = self.config.assert_package_cache_locked(&self.index_path); + let pkg = path.join(&fetched.path); + paths::create_dir_all(pkg.parent().expect("pkg is a file"))?; + let mut file = paths::create(pkg)?; + file.write_all(dl.etag.into_inner().as_deref().unwrap_or("\n").as_bytes())?; + file.write_all( + dl.last_modified + .into_inner() + .as_deref() + .unwrap_or("\n") + .as_bytes(), + )?; + file.write_all(&data)?; + file.flush()?; + + assert!( + self.downloads + .eager + .insert(fetched.path.clone(), fetched) + .is_none(), + "download finished for already-finished path" + ); + } + 304 => { + // Not Modified response. + // There's nothing for us to do -- the index file is up to date. + // The only thing that matters is telling the caller about this package. + assert!( + self.downloads + .eager + .insert(fetched.path.clone(), fetched) + .is_none(), + "download finished for already-finished path" + ); + } + 403 | 404 | 410 | 451 => { + // Variants of a Not Found response. + // + // We treat Forbidden as just being another expression for 404 from a + // server that does not want to reveal file names. + // + // We treat Gone and Unavailable for Legal Reasons as equivalent to 404, + // since they still mean that the crate isn't there. + // + // Since the crate doesn't exist, we simply do not yield it. We also remove + // the index file if it exists. Errors will eventually be yielded by + // load(). + let path = self.config.assert_package_cache_locked(&self.index_path); + let pkg = path.join(&fetched.path); + if pkg.exists() { + paths::remove_file(pkg)?; + } + } + code => { + anyhow::bail!( + "prefetch: server returned unexpected HTTP status code {} for {}{}: {}", + code, + self.url, + fetched.path.display(), + String::from_utf8_lossy(&data) + .lines() + .next() + .expect("there is always a first line"), + ); + } + } + } + + if !self.downloads.eager.is_empty() { + continue; + } + + if self.downloads.pending.is_empty() { + // We're all done! + break; + } + + // We have no more replies to provide the caller with, + // so we need to wait until cURL has something new for us. + let timeout = self + .prefetch + .get_timeout()? + .unwrap_or_else(|| Duration::new(5, 0)); + self.prefetch + .wait(&mut [], timeout) + .chain_err(|| "failed to wait on curl `Multi`")?; + } + + debug!("prefetched all transitive dependencies"); + self.is_prefetching = false; + Ok(None) + } + + fn index_path(&self) -> &Filesystem { + // NOTE: I'm pretty sure this method is unnecessary. + // The only place it is used is to set `.path` in `RegistryIndex`, + // which only uses it to call `assert_index_locked below`... + &self.index_path + } + + fn assert_index_locked<'a>(&self, path: &'a Filesystem) -> &'a Path { + self.config.assert_package_cache_locked(path) + } + + fn current_version(&self) -> Option { + // TODO: Can we use the time of the last call to update_index here? + None + } + + fn update_index_file(&mut self, root: &Path, path: &Path) -> CargoResult { + let pkg = root.join(path); + if pkg.exists() { + paths::remove_file(&pkg)?; + } + // Also reset self.fresh so we don't hit an assertion failure if we re-download. + self.fresh.remove(path); + Ok(true) + } + + fn load( + &mut self, + root: &Path, + path: &Path, + data: &mut dyn FnMut(&[u8]) -> CargoResult<()>, + ) -> CargoResult<()> { + // NOTE: This is pretty much a synchronous version of the prefetch() + next_prefetched() + // dance. Much of the code is sort-of duplicated, which isn't great, but it's moderately + // straightforward and works. When the real resolver supports a load returning "not yet", + // load and prefetch can be merged. + + let pkg = root.join(path); + let bytes; + let was = if pkg.exists() { + // We have a local copy -- extract the `Last-Modified` and `Etag` headers. + trace!("load {} from disk", path.display()); + + bytes = paths::read_bytes(&pkg)?; + let mut lines = bytes.splitn(3, |&c| c == b'\n'); + let etag = lines.next().expect("splitn always returns >=1 item"); + let last_modified = if let Some(lm) = lines.next() { + lm + } else { + anyhow::bail!("index file is missing HTTP header header"); + }; + let rest = if let Some(rest) = lines.next() { + rest + } else { + anyhow::bail!("index file is missing HTTP header header"); + }; + + let is_fresh = if !self.requested_update { + trace!( + "using local {} as user did not request update", + path.display() + ); + true + } else if self.config.offline() { + trace!("using local {} in offline mode", path.display()); + true + } else if self.is_prefetching { + trace!("using local {} in load while prefetching", path.display()); + true + } else if self.fresh.contains(path) { + trace!( + "using local {} as it was already prefetched", + path.display() + ); + true + } else { + debug!("double-checking freshness of {}", path.display()); + false + }; + + if is_fresh { + return data(rest); + } else { + // We cannot trust the index files and need to double-check with server. + let etag = std::str::from_utf8(etag)?; + let last_modified = std::str::from_utf8(last_modified)?; + Some((etag, last_modified, rest)) + } + } else if self.fresh.contains(path) { + // This must have been a 404. + anyhow::bail!("crate does not exist in the registry"); + } else { + assert!(!self.is_prefetching); + None + }; + + if self.config.offline() { + anyhow::bail!( + "can't download index file from '{}': you are in offline mode (--offline)", + self.url + ); + } + + self.prepare()?; + let mut handle = self.http()?; + debug!("fetch {}{}", self.url, path.display()); + handle.url(&format!("{}{}", self.url, path.display()))?; + + if let Some((ref etag, ref last_modified, _)) = was { + let mut list = List::new(); + list.append(&format!("If-None-Match: {}", etag))?; + list.append(&format!("If-Modified-Since: {}", last_modified))?; + handle.http_headers(list)?; + } + + let mut contents = Vec::new(); + let mut etag = None; + let mut last_modified = None; + let mut transfer = handle.transfer(); + transfer.write_function(|buf| { + contents.extend_from_slice(buf); + Ok(buf.len()) + })?; + + // Capture ETag and Last-Modified. + transfer.header_function(|buf| { + if let Some((tag, value)) = Self::handle_http_header(buf) { + let is_etag = tag.eq_ignore_ascii_case(ETAG); + let is_lm = tag.eq_ignore_ascii_case(LAST_MODIFIED); + if is_etag || is_lm { + // Append a new line to each so we can easily prepend to the index file. + let mut s = String::with_capacity(value.len() + 1); + s.push_str(value); + s.push('\n'); + if is_etag { + etag = Some(s); + } else if is_lm { + last_modified = Some(s); + } + } + } + + true + })?; + + transfer + .perform() + .chain_err(|| format!("failed to fetch index file `{}`", path.display()))?; + drop(transfer); + + // Avoid the same conditional headers being sent in future re-uses of the `Easy` client. + let mut list = List::new(); + list.append("If-Modified-Since:")?; + list.append("If-None-Match:")?; + handle.http_headers(list)?; + let response_code = handle.response_code()?; + drop(handle); + + debug!("index file downloaded with status code {}", response_code,); + + // Make sure we don't double-check the file again if it's loaded again. + assert!( + self.fresh.insert(path.to_path_buf()), + "downloaded the index file `{}` twice", + path.display(), + ); + + match response_code { + 200 => {} + 304 => { + // Not Modified response. + let (_, _, bytes) = + was.expect("conditional request response implies we have local index file"); + return data(bytes); + } + 403 | 404 | 410 | 451 => { + // The crate was deleted from the registry. + if was.is_some() { + // Make sure we delete the local index file. + debug!("crate {} was deleted from the registry", path.display()); + paths::remove_file(&pkg)?; + } + anyhow::bail!("crate has been deleted from the registry"); + } + code => { + anyhow::bail!( + "load: server returned unexpected HTTP status code {} for {}{}", + code, + self.url, + path.display() + ); + } + } + + paths::create_dir_all(pkg.parent().expect("pkg is a file"))?; + let mut file = paths::create(&root.join(path))?; + file.write_all(etag.as_deref().unwrap_or("\n").as_bytes())?; + file.write_all(last_modified.as_deref().unwrap_or("\n").as_bytes())?; + file.write_all(&contents)?; + file.flush()?; + data(&contents) + } + + fn config(&mut self) -> CargoResult> { + debug!("loading config"); + self.prepare()?; + let path = self + .config + .assert_package_cache_locked(&self.index_path) + .to_path_buf(); + let mut config = None; + self.load(&path, Path::new("config.json"), &mut |json| { + config = Some(serde_json::from_slice(json)?); + Ok(()) + })?; + trace!("config loaded"); + Ok(config) + } + + fn update_index(&mut self) -> CargoResult<()> { + if self.config.offline() { + return Ok(()); + } + if self.config.cli_unstable().no_index_update { + return Ok(()); + } + if self.config.frozen() { + anyhow::bail!("attempting to update a http repository, but --frozen was specified") + } + if !self.config.network_allowed() { + anyhow::bail!("can't update a http repository in offline mode") + } + // Make sure the index is only updated once per session since it is an + // expensive operation. This generally only happens when the resolver + // is run multiple times, such as during `cargo publish`. + if self.config.updated_sources().contains(&self.source_id) { + return Ok(()); + } + + let path = self.config.assert_package_cache_locked(&self.index_path); + self.config + .shell() + .status("Updating", self.source_id.display_index())?; + + // Actually updating the index is more or less a no-op for this implementation. + // All it does is ensure that a subsequent load/prefetch will double-check files with the + // server rather than rely on a locally cached copy of the index files. + + debug!("updating the index"); + self.requested_update = true; + self.fresh.clear(); + self.config.updated_sources().insert(self.source_id); + + // Create a dummy file to record the mtime for when we updated the + // index. + if !path.exists() { + paths::create_dir_all(&path)?; + } + paths::create(&path.join(LAST_UPDATED_FILE))?; + + Ok(()) + } + + fn download(&mut self, pkg: PackageId, checksum: &str) -> CargoResult { + let filename = download::filename(pkg); + let path = self.cache_path.join(&filename); + let path = self.config.assert_package_cache_locked(&path); + download::download(self, &path, pkg, checksum) + } + + fn finish_download( + &mut self, + pkg: PackageId, + checksum: &str, + data: &[u8], + ) -> CargoResult { + download::finish_download(&self.cache_path, &self.config, pkg, checksum, data) + } + + fn is_crate_downloaded(&self, pkg: PackageId) -> bool { + download::is_crate_downloaded(&self.cache_path, &self.config, pkg) + } +} + +impl<'cfg> Downloads<'cfg> { + fn progress(&self, token: usize, total: u64, cur: u64) -> bool { + let dl = &self.pending[&token].0; + dl.total.set(total); + dl.current.set(cur); + if self.tick(WhyTick::DownloadUpdate).is_err() { + return false; + } + + true + } + + fn tick(&self, why: WhyTick) -> CargoResult<()> { + if let WhyTick::DownloadUpdate = why { + // We don't show progress for individual downloads. + return Ok(()); + } + + let mut progress = self.progress.borrow_mut(); + let progress = progress.as_mut().unwrap(); + + // NOTE: should we show something about self.eager? + progress.tick( + self.downloads_finished, + self.downloads_finished + self.pending.len(), + ) + } +} + +#[derive(Copy, Clone)] +enum WhyTick { + DownloadStarted, + DownloadUpdate, + DownloadFinished, +} + +impl<'cfg> Drop for Downloads<'cfg> { + fn drop(&mut self) { + let progress = self.progress.get_mut().take().unwrap(); + // Don't print a download summary if we're not using a progress bar, + // we've already printed lots of `Prefetching...` items. + if !progress.is_enabled() { + return; + } + // If we didn't download anything, no need for a summary. + if self.downloads_finished == 0 { + return; + } + // If an error happened, let's not clutter up the output. + if !self.success { + return; + } + // pick the correct plural of crate(s) + let index_files = if self.downloads_finished == 1 { + "index file" + } else { + "index files" + }; + let status = format!( + "{} {} ({}) in {}", + self.downloads_finished, + index_files, + ByteSize(self.downloaded_bytes), + util::elapsed(self.start.elapsed()) + ); + // Clear progress before displaying final summary. + drop(progress); + drop(self.config.shell().status("Prefetched", status)); + } +} + +mod tls { + use std::cell::Cell; + + use super::Downloads; + + thread_local!(static PTR: Cell = Cell::new(0)); + + pub(crate) fn with(f: impl FnOnce(Option<&Downloads<'_>>) -> R) -> R { + let ptr = PTR.with(|p| p.get()); + if ptr == 0 { + f(None) + } else { + unsafe { f(Some(&*(ptr as *const Downloads<'_>))) } + } + } + + pub(crate) fn set(dl: &Downloads<'_>, f: impl FnOnce() -> R) -> R { + struct Reset<'a, T: Copy>(&'a Cell, T); + + impl<'a, T: Copy> Drop for Reset<'a, T> { + fn drop(&mut self) { + self.0.set(self.1); + } + } + + PTR.with(|p| { + let _reset = Reset(p, p.get()); + p.set(dl as *const Downloads<'_> as usize); + f() + }) + } +} diff --git a/src/cargo/sources/registry/index.rs b/src/cargo/sources/registry/index.rs index f7690f3d652..ef36972218e 100644 --- a/src/cargo/sources/registry/index.rs +++ b/src/cargo/sources/registry/index.rs @@ -68,13 +68,14 @@ use crate::core::dependency::Dependency; use crate::core::{PackageId, SourceId, Summary}; -use crate::sources::registry::{RegistryData, RegistryPackage}; +use crate::sources::registry::{make_dep_index_path, RegistryData, RegistryPackage}; use crate::util::interning::InternedString; use crate::util::paths; use crate::util::{internal, CargoResult, Config, Filesystem, ToSemver}; use log::info; use semver::{Version, VersionReq}; -use std::collections::{HashMap, HashSet}; +use std::borrow::Cow; +use std::collections::{hash_map::Entry, HashMap, HashSet}; use std::fs; use std::path::Path; use std::str; @@ -111,6 +112,9 @@ impl<'s> Iterator for UncanonicalizedIter<'s> { return None; } + // TODO: + // This implementation can currently generate paths like en/v-/env_logger, + // which doesn't _seem_ like a useful candidate to test? let ret = Some( self.input .chars() @@ -329,16 +333,10 @@ impl<'cfg> RegistryIndex<'cfg> { // See module comment in `registry/mod.rs` for why this is structured // the way it is. - let fs_name = name - .chars() - .flat_map(|c| c.to_lowercase()) - .collect::(); - let raw_path = match fs_name.len() { - 1 => format!("1/{}", fs_name), - 2 => format!("2/{}", fs_name), - 3 => format!("3/{}/{}", &fs_name[..1], fs_name), - _ => format!("{}/{}/{}", &fs_name[0..2], &fs_name[2..4], fs_name), - }; + let raw_path = make_dep_index_path(&name); + let raw_path = raw_path + .to_str() + .expect("path was generated from utf-8 name"); // Attempt to handle misspellings by searching for a chain of related // names to the original `raw_path` name. Only return summaries @@ -367,6 +365,22 @@ impl<'cfg> RegistryIndex<'cfg> { Ok(self.summaries_cache.get_mut(&name).unwrap()) } + pub fn update_index_file( + &mut self, + pkg: InternedString, + load: &mut dyn RegistryData, + ) -> CargoResult { + let path = load.index_path(); + let root = load.assert_index_locked(path).to_path_buf(); + let path = make_dep_index_path(&pkg); + if load.update_index_file(&root, &path)? { + self.summaries_cache.remove(&pkg); + Ok(true) + } else { + Ok(false) + } + } + pub fn query_inner( &mut self, dep: &Dependency, @@ -455,6 +469,178 @@ impl<'cfg> RegistryIndex<'cfg> { .any(|summary| summary.yanked); Ok(found) } + + pub fn prefetch( + &mut self, + deps: &mut dyn ExactSizeIterator>, + yanked_whitelist: &HashSet, + load: &mut dyn RegistryData, + ) -> CargoResult<()> { + // For some registry backends, it's expensive to fetch each individual index file, and the + // process can be sped up significantly by fetching many index files in advance. For + // backends where that is the case, we do an approximate walk of all transitive + // dependencies and fetch their index file in a pipelined fashion. This means that by the + // time the individual loads (see load.load in Summary::parse), those should all be quite + // fast. + // + // We have the advantage here of being able to play fast and loose with the exact + // dependency requirements. It's fine if we fetch a bit too much, since the incremental + // cost of each index file is small. + if self.config.offline() || !load.start_prefetch()? { + // Backend does not support prefetching. + return Ok(()); + } + + load.prepare()?; + + let root = load.assert_index_locked(&self.path); + let cache_root = root.join(".cache"); + let index_version = load.current_version(); + + log::debug!("prefetching transitive dependencies"); + + // Since we allow dependency cycles in crates, we may end up walking in circles forever if + // we just iteratively handled each candidate as we discovered it. The real resolver is + // smart about how it avoids walking endlessly in cycles, but in this simple greedy + // resolver we play fast-and-loose, and instead just keep track of dependencies we have + // already looked at and just don't walk them again. + let mut walked = HashSet::new(); + + // Seed the prefetching with everything from the lockfile. + // + // This allows us to start downloads of a tonne of index files we otherwise would not + // discover until much later, which saves us many RTTs. On a dependency graph like that of + // cargo itself, it cut my download time to 1/5th. + // + // Note that the greedy fetch below actually ends up fetching additional dependencies even + // if nothing has change in the dependency graph. This is because the lockfile contains + // only the dependencies we actually _used_ last time. Thus, any dependencies that the + // greedy algorithm (erroneously) thinks we need will still need to be queued for download. + for pkg in yanked_whitelist { + if pkg.source_id() == self.source_id { + let name = pkg.name(); + log::trace!("prefetching from lockfile: {}", name); + load.prefetch(root, &make_dep_index_path(&*name), name, None, true)?; + } + } + + // Also seed the prefetching with the root dependencies. + // + // It's important that we do this _before_ we handle any responses to downloads, + // since all the prefetches from above are marked as being transitive. We need to mark + // direct depenendencies as such before we start iterating, otherwise we will erroneously + // ignore their dev-dependencies when they're yielded by next_prefetched. + for dep in deps { + walked.insert((dep.package_name(), dep.version_req().clone())); + log::trace!( + "prefetching from direct dependencies: {}", + dep.package_name() + ); + + // NOTE: We do not use UncanonicalizedIter here or below because if the user gave a + // misspelling, it's fine if we don't prefetch their misspelling. The resolver will be + // a bit slower, but then give them an error. + load.prefetch( + root, + &make_dep_index_path(&*dep.package_name()), + dep.package_name(), + Some(dep.version_req()), + false, + )?; + } + + // Now, continuously iterate by walking dependencies we've loaded and fetching the index + // entry for _their_ dependencies. + while let Some(fetched) = load.next_prefetched()? { + log::trace!("got prefetched {}", fetched.name); + let summaries = if let Some(s) = self.summaries_cache.get_mut(&fetched.name()) { + s + } else { + let summaries = Summaries::parse( + index_version.as_deref(), + root, + &cache_root, + fetched.path(), + self.source_id, + load, + self.config, + )?; + + let summaries = if let Some(s) = summaries { s } else { continue }; + + match self.summaries_cache.entry(fetched.name()) { + Entry::Vacant(v) => v.insert(summaries), + Entry::Occupied(mut o) => { + let _ = o.insert(summaries); + o.into_mut() + } + } + }; + + for (version, maybe_summary) in &mut summaries.versions { + log::trace!("consider prefetching version {}", version); + if !fetched.version_reqs().any(|vr| vr.matches(&version)) { + // The crate that pulled in this crate as a dependency did not care about this + // particular version, so we don't need to walk its dependencies. + // + // We _could_ simply walk every transitive dependency, and it probably wouldn't + // be _that_ bad. But over time it'd mean that a bunch of index files are + // pulled down even though they're no longer used anywhere in the dependency + // closure. This, again, probably doesn't matter, and it would make the logic + // here _much_ simpler, but for now we try to do better. + // + // Note that another crate in the dependency closure might still pull in this + // version because that crate has a different set of requirements. + continue; + } + + let summary = + maybe_summary.parse(self.config, &summaries.raw_data, self.source_id)?; + + if summary.yanked { + // This version has been yanked, so let's not even go there. + continue; + } + + for dep in summary.summary.dependencies() { + if dep.source_id() != self.source_id { + // This dependency lives in a different source, so we won't be prefetching + // anything from there anyway. + // + // It is _technically_ possible that a dependency in a different source + // then pulls in a dependency from _this_ source again, but we'll let that + // go to the slow path. + continue; + } + + // Don't pull in dev-dependencies of transitive dependencies. + if fetched.is_transitive && !dep.is_transitive() { + log::trace!( + "not prefetching transitive dev-dependency {}", + dep.package_name() + ); + continue; + } + + if !walked.insert((dep.package_name(), dep.version_req().clone())) { + // We've already walked this dependency -- no need to do so again. + continue; + } + + log::trace!("prefetching transitive dependency {}", dep.package_name()); + load.prefetch( + root, + &make_dep_index_path(&*dep.package_name()), + dep.package_name(), + Some(dep.version_req()), + true, + )?; + } + } + } + + Ok(()) + } } impl Summaries { diff --git a/src/cargo/sources/registry/local.rs b/src/cargo/sources/registry/local.rs index d35345eb86c..73730973dc3 100644 --- a/src/cargo/sources/registry/local.rs +++ b/src/cargo/sources/registry/local.rs @@ -47,7 +47,7 @@ impl<'cfg> RegistryData for LocalRegistry<'cfg> { } fn load( - &self, + &mut self, root: &Path, path: &Path, data: &mut dyn FnMut(&[u8]) -> CargoResult<()>, diff --git a/src/cargo/sources/registry/mod.rs b/src/cargo/sources/registry/mod.rs index 159b0952900..f168eccca5c 100644 --- a/src/cargo/sources/registry/mod.rs +++ b/src/cargo/sources/registry/mod.rs @@ -369,11 +369,77 @@ impl<'a> RegistryDependency<'a> { } } +/// An indicator that the prefetching for a given package has completed. +/// +/// To retrieve the index data for the package, use `Summaries::parse`. +pub struct Fetched { + name: InternedString, + path: PathBuf, + // NOTE: we can get rid of the HashSet (and other complexity) if we had VersionReq::union + reqs: HashSet, + is_transitive: bool, +} + +impl Fetched { + pub fn name(&self) -> InternedString { + self.name + } + + pub fn path(&self) -> &Path { + &self.path + } + + pub fn version_reqs(&self) -> impl Iterator { + self.reqs.iter() + } +} + pub trait RegistryData { fn prepare(&self) -> CargoResult<()>; fn index_path(&self) -> &Filesystem; + + /// Initiate a prefetch phase. + /// + /// During prefetch, a greedy dependency solver will talk the transitive dependency closure of + /// the package being built and call `prefetch` on each dependency. This allows an + /// implementation to pipeline the download of information for those dependencies, rather than + /// relying on synchronous calls to `load` later on. + /// + /// If this method returns `false` (the default), no prefetching happens. + fn start_prefetch(&mut self) -> CargoResult { + Ok(false) + } + + /// Enqueue a prefetch of the given package. + /// + /// The package path, name, and dependency versions requirements are passed back from + /// `next_prefetched` so that they can be used to inform future calls to `prefetch`. + /// + /// If `req` is `None`, the index file will be downloaded, but will not be yielded by + /// `next_prefetched`. This is useful if you already have transitive closure of index entries + /// you wish to fetch. + fn prefetch( + &mut self, + _root: &Path, + _path: &Path, + _name: InternedString, + _req: Option<&semver::VersionReq>, + _is_transitive: bool, + ) -> CargoResult<()> { + Ok(()) + } + + /// Dequeue the next available prefetched index file. + fn next_prefetched(&mut self) -> CargoResult> { + Ok(None) + } + + fn update_index_file(&mut self, _root: &Path, _path: &Path) -> CargoResult { + Ok(false) + } + fn load( - &self, + &mut self, root: &Path, path: &Path, data: &mut dyn FnMut(&[u8]) -> CargoResult<()>, @@ -396,6 +462,8 @@ pub enum MaybeLock { Download { url: String, descriptor: String }, } +mod download; +mod http_remote; mod index; mod local; mod remote; @@ -411,10 +479,24 @@ impl<'cfg> RegistrySource<'cfg> { source_id: SourceId, yanked_whitelist: &HashSet, config: &'cfg Config, - ) -> RegistrySource<'cfg> { + ) -> CargoResult> { let name = short_name(source_id); - let ops = remote::RemoteRegistry::new(source_id, config, &name); - RegistrySource::new(source_id, config, &name, Box::new(ops), yanked_whitelist) + let ops = if source_id.url().scheme().starts_with("sparse+") { + if !config.cli_unstable().http_registry { + anyhow::bail!("Usage of HTTP-based registries requires `-Z http-registry`"); + } + + Box::new(http_remote::HttpRegistry::new(source_id, config, &name)) as Box<_> + } else { + Box::new(remote::RemoteRegistry::new(source_id, config, &name)) as Box<_> + }; + Ok(RegistrySource::new( + source_id, + config, + &name, + ops, + yanked_whitelist, + )) } pub fn local( @@ -526,6 +608,11 @@ impl<'cfg> RegistrySource<'cfg> { } fn do_update(&mut self) -> CargoResult<()> { + // NOTE: It is really bad if this method is called after prefetching has completed. + // It will cause every subsequent `load` to double-check with the server again + // _synchronously_. If this is ever called, we should arguably re-run prefetching, or the + // following build will be quite slow. Consider using update_index_file instead. + self.ops.update_index()?; let path = self.ops.index_path(); self.index = index::RegistryIndex::new(self.source_id, path, self.config); @@ -564,6 +651,21 @@ impl<'cfg> RegistrySource<'cfg> { } impl<'cfg> Source for RegistrySource<'cfg> { + fn prefetch( + &mut self, + deps: &mut dyn ExactSizeIterator>, + ) -> CargoResult<()> { + // In query, if a dependency is locked, we see if we can get away with querying it without + // doing an index update. Only if that fails do we update the index and then try again. + // Since we're in the prefetching stage here, we never want to update the index regardless + // of whether any given dependency is locked or not. Instead, we just prefetch all the + // current dependencies regardless of whether they're locked or not. If an index update is + // needed later, we'll deal with it at that time. + self.index + .prefetch(deps, &self.yanked_whitelist, &mut *self.ops)?; + Ok(()) + } + fn query(&mut self, dep: &Dependency, f: &mut dyn FnMut(Summary)) -> CargoResult<()> { // If this is a precise dependency, then it came from a lock file and in // theory the registry is known to contain this version. If, however, we @@ -582,8 +684,25 @@ impl<'cfg> Source for RegistrySource<'cfg> { if called { return Ok(()); } else { - debug!("falling back to an update"); - self.do_update()?; + // We failed to query the dependency based on the currently available index files. + // This probably means that our index file for `dep` is outdated, and does not + // contain the requested version. + // + // If the registry we are using supports per-file index updates, we tell it to + // update just the given index file and then try the query again. Otherwise, we + // fall back to a full index update. + if self + .index + .update_index_file(dep.package_name(), &mut *self.ops)? + { + debug!( + "selectively refreshed index file for {}", + dep.package_name() + ); + } else { + debug!("falling back to an update"); + self.do_update()?; + } } } @@ -658,8 +777,63 @@ impl<'cfg> Source for RegistrySource<'cfg> { fn is_yanked(&mut self, pkg: PackageId) -> CargoResult { if !self.updated { - self.do_update()?; + // Try selectively updating just the index file for this package if possible. + if !self.index.update_index_file(pkg.name(), &mut *self.ops)? { + // It's not, so update the whole index. + self.do_update()?; + } } self.index.is_yanked(pkg, &mut *self.ops) } } + +fn make_dep_index_path(name: &str) -> PathBuf { + let fs_name = name + .chars() + .flat_map(|c| c.to_lowercase()) + .collect::(); + let raw_path = match fs_name.len() { + 1 => format!("1/{}", fs_name), + 2 => format!("2/{}", fs_name), + 3 => format!("3/{}/{}", &fs_name[..1], fs_name), + _ => format!("{}/{}/{}", &fs_name[0..2], &fs_name[2..4], fs_name), + }; + PathBuf::from(raw_path) +} + +fn make_dep_prefix(name: &str) -> String { + match name.len() { + 1 => String::from("1"), + 2 => String::from("2"), + 3 => format!("3/{}", &name[..1]), + _ => format!("{}/{}", &name[0..2], &name[2..4]), + } +} + +#[cfg(test)] +mod tests { + use super::{make_dep_index_path, make_dep_prefix}; + + #[test] + fn dep_path() { + use std::path::Path; + assert_eq!(make_dep_index_path("a"), Path::new("1/a")); + assert_eq!(make_dep_index_path("A"), Path::new("1/a")); + assert_eq!(make_dep_index_path("ab"), Path::new("2/ab")); + assert_eq!(make_dep_index_path("Ab"), Path::new("2/ab")); + assert_eq!(make_dep_index_path("abc"), Path::new("3/a/abc")); + assert_eq!(make_dep_index_path("Abc"), Path::new("3/a/abc")); + assert_eq!(make_dep_index_path("AbCd"), Path::new("ab/cd/abcd")); + assert_eq!(make_dep_index_path("aBcDe"), Path::new("ab/cd/abcde")); + } + + #[test] + fn dep_prefix() { + assert_eq!(make_dep_prefix("a"), "1"); + assert_eq!(make_dep_prefix("ab"), "2"); + assert_eq!(make_dep_prefix("abc"), "3/a"); + assert_eq!(make_dep_prefix("Abc"), "3/A"); + assert_eq!(make_dep_prefix("AbCd"), "Ab/Cd"); + assert_eq!(make_dep_prefix("aBcDe"), "aB/cD"); + } +} diff --git a/src/cargo/sources/registry/remote.rs b/src/cargo/sources/registry/remote.rs index 2e44d9ae3ea..9fcd469d4e5 100644 --- a/src/cargo/sources/registry/remote.rs +++ b/src/cargo/sources/registry/remote.rs @@ -1,34 +1,20 @@ use crate::core::{GitReference, PackageId, SourceId}; use crate::sources::git; +use crate::sources::registry::download; use crate::sources::registry::MaybeLock; -use crate::sources::registry::{ - RegistryConfig, RegistryData, CRATE_TEMPLATE, LOWER_PREFIX_TEMPLATE, PREFIX_TEMPLATE, - VERSION_TEMPLATE, -}; +use crate::sources::registry::{RegistryConfig, RegistryData}; use crate::util::errors::{CargoResult, CargoResultExt}; use crate::util::interning::InternedString; use crate::util::paths; -use crate::util::{Config, Filesystem, Sha256}; +use crate::util::{Config, Filesystem}; use lazycell::LazyCell; use log::{debug, trace}; use std::cell::{Cell, Ref, RefCell}; -use std::fmt::Write as FmtWrite; -use std::fs::{self, File, OpenOptions}; -use std::io::prelude::*; -use std::io::SeekFrom; +use std::fs::File; use std::mem; use std::path::Path; use std::str; -fn make_dep_prefix(name: &str) -> String { - match name.len() { - 1 => String::from("1"), - 2 => String::from("2"), - 3 => format!("3/{}", &name[..1]), - _ => format!("{}/{}", &name[0..2], &name[2..4]), - } -} - pub struct RemoteRegistry<'cfg> { index_path: Filesystem, cache_path: Filesystem, @@ -135,10 +121,6 @@ impl<'cfg> RemoteRegistry<'cfg> { *self.tree.borrow_mut() = Some(tree); Ok(Ref::map(self.tree.borrow(), |s| s.as_ref().unwrap())) } - - fn filename(&self, pkg: PackageId) -> String { - format!("{}-{}.crate", pkg.name(), pkg.version()) - } } const LAST_UPDATED_FILE: &str = ".last-updated"; @@ -167,7 +149,7 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> { } fn load( - &self, + &mut self, _root: &Path, path: &Path, data: &mut dyn FnMut(&[u8]) -> CargoResult<()>, @@ -247,44 +229,11 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> { Ok(()) } - fn download(&mut self, pkg: PackageId, _checksum: &str) -> CargoResult { - let filename = self.filename(pkg); - - // Attempt to open an read-only copy first to avoid an exclusive write - // lock and also work with read-only filesystems. Note that we check the - // length of the file like below to handle interrupted downloads. - // - // If this fails then we fall through to the exclusive path where we may - // have to redownload the file. + fn download(&mut self, pkg: PackageId, checksum: &str) -> CargoResult { + let filename = download::filename(pkg); let path = self.cache_path.join(&filename); let path = self.config.assert_package_cache_locked(&path); - if let Ok(dst) = File::open(&path) { - let meta = dst.metadata()?; - if meta.len() > 0 { - return Ok(MaybeLock::Ready(dst)); - } - } - - let config = self.config()?.unwrap(); - let mut url = config.dl; - if !url.contains(CRATE_TEMPLATE) - && !url.contains(VERSION_TEMPLATE) - && !url.contains(PREFIX_TEMPLATE) - && !url.contains(LOWER_PREFIX_TEMPLATE) - { - write!(url, "/{}/{}/download", CRATE_TEMPLATE, VERSION_TEMPLATE).unwrap(); - } - let prefix = make_dep_prefix(&*pkg.name()); - let url = url - .replace(CRATE_TEMPLATE, &*pkg.name()) - .replace(VERSION_TEMPLATE, &pkg.version().to_string()) - .replace(PREFIX_TEMPLATE, &prefix) - .replace(LOWER_PREFIX_TEMPLATE, &prefix.to_lowercase()); - - Ok(MaybeLock::Download { - url, - descriptor: pkg.to_string(), - }) + download::download(self, &path, pkg, checksum) } fn finish_download( @@ -293,42 +242,11 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> { checksum: &str, data: &[u8], ) -> CargoResult { - // Verify what we just downloaded - let actual = Sha256::new().update(data).finish_hex(); - if actual != checksum { - anyhow::bail!("failed to verify the checksum of `{}`", pkg) - } - - let filename = self.filename(pkg); - self.cache_path.create_dir()?; - let path = self.cache_path.join(&filename); - let path = self.config.assert_package_cache_locked(&path); - let mut dst = OpenOptions::new() - .create(true) - .read(true) - .write(true) - .open(&path) - .chain_err(|| format!("failed to open `{}`", path.display()))?; - let meta = dst.metadata()?; - if meta.len() > 0 { - return Ok(dst); - } - - dst.write_all(data)?; - dst.seek(SeekFrom::Start(0))?; - Ok(dst) + download::finish_download(&self.cache_path, &self.config, pkg, checksum, data) } fn is_crate_downloaded(&self, pkg: PackageId) -> bool { - let filename = format!("{}-{}.crate", pkg.name(), pkg.version()); - let path = Path::new(&filename); - - let path = self.cache_path.join(path); - let path = self.config.assert_package_cache_locked(&path); - if let Ok(meta) = fs::metadata(path) { - return meta.len() > 0; - } - false + download::is_crate_downloaded(&self.cache_path, &self.config, pkg) } } @@ -338,18 +256,3 @@ impl<'cfg> Drop for RemoteRegistry<'cfg> { self.tree.borrow_mut().take(); } } - -#[cfg(test)] -mod tests { - use super::make_dep_prefix; - - #[test] - fn dep_prefix() { - assert_eq!(make_dep_prefix("a"), "1"); - assert_eq!(make_dep_prefix("ab"), "2"); - assert_eq!(make_dep_prefix("abc"), "3/a"); - assert_eq!(make_dep_prefix("Abc"), "3/A"); - assert_eq!(make_dep_prefix("AbCd"), "Ab/Cd"); - assert_eq!(make_dep_prefix("aBcDe"), "aB/cD"); - } -} diff --git a/src/cargo/sources/replaced.rs b/src/cargo/sources/replaced.rs index 7f4a622fd84..efe2d5d3a11 100644 --- a/src/cargo/sources/replaced.rs +++ b/src/cargo/sources/replaced.rs @@ -1,6 +1,7 @@ use crate::core::source::MaybePackage; use crate::core::{Dependency, Package, PackageId, Source, SourceId, Summary}; use crate::util::errors::{CargoResult, CargoResultExt}; +use std::borrow::Cow; pub struct ReplacedSource<'cfg> { to_replace: SourceId, @@ -39,6 +40,25 @@ impl<'cfg> Source for ReplacedSource<'cfg> { self.inner.requires_precise() } + fn prefetch( + &mut self, + deps: &mut dyn ExactSizeIterator>, + ) -> CargoResult<()> { + let (replace_with, to_replace) = (self.replace_with, self.to_replace); + self.inner + .prefetch( + &mut deps + .map(|dep| Cow::Owned(dep.into_owned().map_source(to_replace, replace_with))), + ) + .chain_err(|| { + format!( + "failed to prefetch from replaced source {}", + self.to_replace + ) + })?; + Ok(()) + } + fn query(&mut self, dep: &Dependency, f: &mut dyn FnMut(Summary)) -> CargoResult<()> { let (replace_with, to_replace) = (self.replace_with, self.to_replace); let dep = dep.clone().map_source(to_replace, replace_with); diff --git a/src/cargo/util/canonical_url.rs b/src/cargo/util/canonical_url.rs index c6f30527932..01b8df7e871 100644 --- a/src/cargo/util/canonical_url.rs +++ b/src/cargo/util/canonical_url.rs @@ -1,4 +1,4 @@ -use crate::util::errors::CargoResult; +use crate::util::{errors::CargoResult, IntoUrl}; use std::hash::{self, Hash}; use url::Url; @@ -56,6 +56,17 @@ impl CanonicalUrl { url.path_segments_mut().unwrap().pop().push(&last); } + // Ignore the protocol specifier (if any). + if url.scheme().starts_with("sparse+") { + // NOTE: it is illegal to use set_scheme to change sparse+http(s) to http(s). + url = url + .to_string() + .strip_prefix("sparse+") + .expect("we just found that prefix") + .into_url() + .expect("a valid url without a protocol specifier should still be valid"); + } + Ok(CanonicalUrl(url)) } diff --git a/tests/testsuite/alt_registry.rs b/tests/testsuite/alt_registry.rs index 748d5dcaaa5..cd9038a8967 100644 --- a/tests/testsuite/alt_registry.rs +++ b/tests/testsuite/alt_registry.rs @@ -795,7 +795,7 @@ fn alt_reg_metadata() { "uses_default_features": true, "features": [], "target": null, - "registry": "file:[..]/alternative-registry" + "registry": "registry+file:[..]/alternative-registry" }, { "name": "iodep", @@ -948,7 +948,7 @@ fn alt_reg_metadata() { "uses_default_features": true, "features": [], "target": null, - "registry": "file:[..]/alternative-registry" + "registry": "registry+file:[..]/alternative-registry" }, { "name": "iodep", @@ -997,7 +997,7 @@ fn alt_reg_metadata() { "uses_default_features": true, "features": [], "target": null, - "registry": "file:[..]/alternative-registry" + "registry": "registry+file:[..]/alternative-registry" } ], "targets": "{...}", @@ -1092,7 +1092,7 @@ fn unknown_registry() { "uses_default_features": true, "features": [], "target": null, - "registry": "file:[..]/alternative-registry" + "registry": "registry+file:[..]/alternative-registry" } ], "targets": "{...}", diff --git a/tests/testsuite/http_registry.rs b/tests/testsuite/http_registry.rs new file mode 100644 index 00000000000..e639b7a69dd --- /dev/null +++ b/tests/testsuite/http_registry.rs @@ -0,0 +1,954 @@ +//! Tests for HTTP registry sources. + +// Many of these tests are copied from registry.rs. +// It'd be nice if we could share them instead. +// Also, there are many tests in registry.rs that aren't specific to registry. +// It'd be nice if those were in their own module. + +use cargo_test_support::paths::{self, CargoPathExt}; +use cargo_test_support::registry::{ + registry_path, serve_registry, Dependency, Package, RegistryServer, +}; +use cargo_test_support::t; +use cargo_test_support::{basic_manifest, project}; +use std::fs; +use std::path::Path; + +fn cargo(p: &cargo_test_support::Project, s: &str) -> cargo_test_support::Execs { + let mut e = p.cargo(s); + e.arg("-Zhttp-registry").masquerade_as_nightly_cargo(); + e +} + +fn setup() -> RegistryServer { + let server = serve_registry(registry_path()); + + let root = paths::root(); + t!(fs::create_dir(&root.join(".cargo"))); + t!(fs::write( + root.join(".cargo/config"), + format!( + " + [source.crates-io] + registry = 'https://wut' + replace-with = 'my-awesome-http-registry' + + [source.my-awesome-http-registry] + registry = 'sparse+http://{}' + ", + server.addr() + ) + )); + + server +} + +#[cargo_test] +fn not_on_stable() { + let _server = setup(); + let p = project() + .file( + "Cargo.toml", + r#" + [project] + name = "foo" + version = "0.0.1" + authors = [] + + [dependencies] + bar = ">= 0.0.0" + "#, + ) + .file("src/main.rs", "fn main() {}") + .build(); + + Package::new("bar", "0.0.1").publish(); + + p.cargo("build") + .with_status(101) + .with_stderr(&format!( + "\ +error: failed to get `bar` as a dependency of package `foo v0.0.1 ([..])` + +Caused by: + failed to load source for dependency `bar` + +Caused by: + Unable to update registry `https://github.com/rust-lang/crates.io-index` + +Caused by: + Usage of HTTP-based registries requires `-Z http-registry` +" + )) + .run(); +} + +#[cargo_test] +fn simple() { + let server = setup(); + let url = format!("sparse+http://{}", server.addr()); + let p = project() + .file( + "Cargo.toml", + r#" + [project] + name = "foo" + version = "0.0.1" + authors = [] + + [dependencies] + bar = ">= 0.0.0" + "#, + ) + .file("src/main.rs", "fn main() {}") + .build(); + + Package::new("bar", "0.0.1").publish(); + + cargo(&p, "build") + .with_stderr(&format!( + "\ +[UPDATING] `{reg}` index +[PREFETCHING] index files ... +[DOWNLOADING] crates ... +[DOWNLOADED] bar v0.0.1 (registry `{reg}`) +[COMPILING] bar v0.0.1 +[COMPILING] foo v0.0.1 ([CWD]) +[FINISHED] dev [unoptimized + debuginfo] target(s) in [..]s +", + reg = url + )) + .run(); + + cargo(&p, "clean").run(); + + // Don't download a second time + cargo(&p, "build") + .with_stderr( + "\ +[PREFETCHING] index files ... +[COMPILING] bar v0.0.1 +[COMPILING] foo v0.0.1 ([CWD]) +[FINISHED] dev [unoptimized + debuginfo] target(s) in [..]s +", + ) + .run(); +} + +#[cargo_test] +fn deps() { + let server = setup(); + let url = format!("sparse+http://{}", server.addr()); + let p = project() + .file( + "Cargo.toml", + r#" + [project] + name = "foo" + version = "0.0.1" + authors = [] + + [dependencies] + bar = ">= 0.0.0" + "#, + ) + .file("src/main.rs", "fn main() {}") + .build(); + + Package::new("baz", "0.0.1").publish(); + Package::new("bar", "0.0.1").dep("baz", "*").publish(); + + cargo(&p, "build") + .with_stderr(&format!( + "\ +[UPDATING] `{reg}` index +[PREFETCHING] index files ... +[DOWNLOADING] crates ... +[DOWNLOADED] [..] v0.0.1 (registry `{reg}`) +[DOWNLOADED] [..] v0.0.1 (registry `{reg}`) +[COMPILING] baz v0.0.1 +[COMPILING] bar v0.0.1 +[COMPILING] foo v0.0.1 ([CWD]) +[FINISHED] dev [unoptimized + debuginfo] target(s) in [..]s +", + reg = url + )) + .run(); +} + +#[cargo_test] +fn nonexistent() { + let _server = setup(); + Package::new("init", "0.0.1").publish(); + + let p = project() + .file( + "Cargo.toml", + r#" + [project] + name = "foo" + version = "0.0.1" + authors = [] + + [dependencies] + nonexistent = ">= 0.0.0" + "#, + ) + .file("src/main.rs", "fn main() {}") + .build(); + + cargo(&p, "build") + .with_status(101) + .with_stderr( + "\ +[UPDATING] [..] index +[PREFETCHING] index files ... +error: no matching package named `nonexistent` found +location searched: registry [..] +required by package `foo v0.0.1 ([..])` +", + ) + .run(); +} + +#[cargo_test] +fn update_registry() { + let server = setup(); + let url = format!("sparse+http://{}", server.addr()); + Package::new("init", "0.0.1").publish(); + + let p = project() + .file( + "Cargo.toml", + r#" + [project] + name = "foo" + version = "0.0.1" + authors = [] + + [dependencies] + notyet = ">= 0.0.0" + "#, + ) + .file("src/main.rs", "fn main() {}") + .build(); + + cargo(&p, "build") + .with_status(101) + .with_stderr_contains( + "\ +error: no matching package named `notyet` found +location searched: registry `[..]` +required by package `foo v0.0.1 ([..])` +", + ) + .run(); + + Package::new("notyet", "0.0.1").publish(); + + cargo(&p, "build") + .with_stderr(format!( + "\ +[UPDATING] `{reg}` index +[PREFETCHING] index files ... +[DOWNLOADING] crates ... +[DOWNLOADED] notyet v0.0.1 (registry `{reg}`) +[COMPILING] notyet v0.0.1 +[COMPILING] foo v0.0.1 ([CWD]) +[FINISHED] dev [unoptimized + debuginfo] target(s) in [..]s +", + reg = url + )) + .run(); +} + +#[cargo_test] +fn update_publish_then_update() { + let server = setup(); + let url = format!("sparse+http://{}", server.addr()); + + // First generate a Cargo.lock and a clone of the registry index at the + // "head" of the current registry. + let p = project() + .file( + "Cargo.toml", + r#" + [project] + name = "foo" + version = "0.5.0" + authors = [] + + [dependencies] + a = "0.1.0" + "#, + ) + .file("src/main.rs", "fn main() {}") + .build(); + Package::new("a", "0.1.0").publish(); + cargo(&p, "build").run(); + + // Next, publish a new package and back up the copy of the registry we just + // created. + Package::new("a", "0.1.1").publish(); + let registry = paths::home().join(".cargo/registry"); + let backup = paths::root().join("registry-backup"); + t!(fs::rename(®istry, &backup)); + + // Generate a Cargo.lock with the newer version, and then move the old copy + // of the registry back into place. + let p2 = project() + .at("foo2") + .file( + "Cargo.toml", + r#" + [project] + name = "foo" + version = "0.5.0" + authors = [] + + [dependencies] + a = "0.1.1" + "#, + ) + .file("src/main.rs", "fn main() {}") + .build(); + cargo(&p2, "build").run(); + registry.rm_rf(); + t!(fs::rename(&backup, ®istry)); + t!(fs::rename( + p2.root().join("Cargo.lock"), + p.root().join("Cargo.lock") + )); + + // Finally, build the first project again (with our newer Cargo.lock) which + // should download the new index file from the registry, download the new crate, and + // then build everything again. + cargo(&p, "build") + .with_stderr(format!( + "\ +[PREFETCHING] index files ... +[DOWNLOADING] crates ... +[DOWNLOADED] a v0.1.1 (registry `{reg}`) +[COMPILING] a v0.1.1 +[COMPILING] foo v0.5.0 ([CWD]) +[FINISHED] dev [unoptimized + debuginfo] target(s) in [..]s +", + reg = url + )) + .run(); +} + +#[cargo_test] +fn update_multiple_packages() { + let server = setup(); + let url = format!("sparse+http://{}", server.addr()); + let p = project() + .file( + "Cargo.toml", + r#" + [project] + name = "foo" + version = "0.5.0" + authors = [] + + [dependencies] + a = "*" + b = "*" + c = "*" + "#, + ) + .file("src/main.rs", "fn main() {}") + .build(); + + Package::new("a", "0.1.0").publish(); + Package::new("b", "0.1.0").publish(); + Package::new("c", "0.1.0").publish(); + + cargo(&p, "fetch").run(); + + Package::new("a", "0.1.1").publish(); + Package::new("b", "0.1.1").publish(); + Package::new("c", "0.1.1").publish(); + + cargo(&p, "update -pa -pb") + .with_stderr( + "\ +[UPDATING] `[..]` index +[PREFETCHING] index files ... +[UPDATING] a v0.1.0 -> v0.1.1 +[UPDATING] b v0.1.0 -> v0.1.1 +", + ) + .run(); + + cargo(&p, "update -pb -pc") + .with_stderr( + "\ +[UPDATING] `[..]` index +[PREFETCHING] index files ... +[UPDATING] c v0.1.0 -> v0.1.1 +", + ) + .run(); + + cargo(&p, "build") + .with_stderr_contains(format!("[DOWNLOADED] a v0.1.1 (registry `{}`)", url)) + .with_stderr_contains(format!("[DOWNLOADED] b v0.1.1 (registry `{}`)", url)) + .with_stderr_contains(format!("[DOWNLOADED] c v0.1.1 (registry `{}`)", url)) + .with_stderr_contains("[COMPILING] a v0.1.1") + .with_stderr_contains("[COMPILING] b v0.1.1") + .with_stderr_contains("[COMPILING] c v0.1.1") + .with_stderr_contains("[COMPILING] foo v0.5.0 ([..])") + .run(); +} + +#[cargo_test] +fn bundled_crate_in_registry() { + let _server = setup(); + let p = project() + .file( + "Cargo.toml", + r#" + [project] + name = "foo" + version = "0.5.0" + authors = [] + + [dependencies] + bar = "0.1" + baz = "0.1" + "#, + ) + .file("src/main.rs", "fn main() {}") + .build(); + + Package::new("bar", "0.1.0").publish(); + Package::new("baz", "0.1.0") + .dep("bar", "0.1.0") + .file( + "Cargo.toml", + r#" + [package] + name = "baz" + version = "0.1.0" + authors = [] + + [dependencies] + bar = { path = "bar", version = "0.1.0" } + "#, + ) + .file("src/lib.rs", "") + .file("bar/Cargo.toml", &basic_manifest("bar", "0.1.0")) + .file("bar/src/lib.rs", "") + .publish(); + + cargo(&p, "run").run(); +} + +#[cargo_test] +fn update_same_prefix_oh_my_how_was_this_a_bug() { + let _server = setup(); + let p = project() + .file( + "Cargo.toml", + r#" + [project] + name = "ugh" + version = "0.5.0" + authors = [] + + [dependencies] + foo = "0.1" + "#, + ) + .file("src/main.rs", "fn main() {}") + .build(); + + Package::new("foobar", "0.2.0").publish(); + Package::new("foo", "0.1.0") + .dep("foobar", "0.2.0") + .publish(); + + cargo(&p, "generate-lockfile").run(); + cargo(&p, "update -pfoobar --precise=0.2.0").run(); +} + +#[cargo_test] +fn use_semver() { + let _server = setup(); + let p = project() + .file( + "Cargo.toml", + r#" + [project] + name = "bar" + version = "0.5.0" + authors = [] + + [dependencies] + foo = "1.2.3-alpha.0" + "#, + ) + .file("src/main.rs", "fn main() {}") + .build(); + + Package::new("foo", "1.2.3-alpha.0").publish(); + + cargo(&p, "build").run(); +} + +#[cargo_test] +fn use_semver_package_incorrectly() { + let _server = setup(); + let p = project() + .file( + "Cargo.toml", + r#" + [workspace] + members = ["a", "b"] + "#, + ) + .file( + "a/Cargo.toml", + r#" + [project] + name = "a" + version = "0.1.1-alpha.0" + authors = [] + "#, + ) + .file( + "b/Cargo.toml", + r#" + [project] + name = "b" + version = "0.1.0" + authors = [] + + [dependencies] + a = { version = "^0.1", path = "../a" } + "#, + ) + .file("a/src/main.rs", "fn main() {}") + .file("b/src/main.rs", "fn main() {}") + .build(); + + cargo(&p, "build") + .with_status(101) + .with_stderr( + "\ +error: no matching package named `a` found +location searched: [..] +prerelease package needs to be specified explicitly +a = { version = \"0.1.1-alpha.0\" } +required by package `b v0.1.0 ([..])` +", + ) + .run(); +} + +#[cargo_test] +fn only_download_relevant() { + let _server = setup(); + let p = project() + .file( + "Cargo.toml", + r#" + [project] + name = "bar" + version = "0.5.0" + authors = [] + + [target.foo.dependencies] + foo = "*" + [dev-dependencies] + bar = "*" + [dependencies] + baz = "*" + "#, + ) + .file("src/main.rs", "fn main() {}") + .build(); + + Package::new("foo", "0.1.0").publish(); + Package::new("bar", "0.1.0").publish(); + Package::new("baz", "0.1.0").publish(); + + cargo(&p, "build") + .with_stderr( + "\ +[UPDATING] `[..]` index +[PREFETCHING] index files ... +[DOWNLOADING] crates ... +[DOWNLOADED] baz v0.1.0 ([..]) +[COMPILING] baz v0.1.0 +[COMPILING] bar v0.5.0 ([..]) +[FINISHED] dev [unoptimized + debuginfo] target(s) in [..]s +", + ) + .run(); +} + +#[cargo_test] +fn resolve_and_backtracking() { + let _server = setup(); + let p = project() + .file( + "Cargo.toml", + r#" + [project] + name = "bar" + version = "0.5.0" + authors = [] + + [dependencies] + foo = "*" + "#, + ) + .file("src/main.rs", "fn main() {}") + .build(); + + Package::new("foo", "0.1.1") + .feature_dep("bar", "0.1", &["a", "b"]) + .publish(); + Package::new("foo", "0.1.0").publish(); + + cargo(&p, "build").run(); +} + +#[cargo_test] +fn disallow_network() { + let _server = setup(); + let p = project() + .file( + "Cargo.toml", + r#" + [project] + name = "bar" + version = "0.5.0" + authors = [] + + [dependencies] + foo = "*" + "#, + ) + .file("src/main.rs", "fn main() {}") + .build(); + + // TODO: this should also check that we don't access the network for things we have in cache. + cargo(&p, "build --frozen") + .with_status(101) + .with_stderr( + "\ +[ERROR] failed to prefetch dependencies + +Caused by: + failed to load source for dependency `foo` + +Caused by: + Unable to update registry [..] + +Caused by: + failed to update replaced source registry `https://github.com/rust-lang/crates.io-index` + +Caused by: + attempting to update a http repository, but --frozen was specified +", + ) + .run(); +} + +#[cargo_test] +fn add_dep_dont_update_registry() { + let _server = setup(); + let p = project() + .file( + "Cargo.toml", + r#" + [project] + name = "bar" + version = "0.5.0" + authors = [] + + [dependencies] + baz = { path = "baz" } + "#, + ) + .file("src/main.rs", "fn main() {}") + .file( + "baz/Cargo.toml", + r#" + [project] + name = "baz" + version = "0.5.0" + authors = [] + + [dependencies] + remote = "0.3" + "#, + ) + .file("baz/src/lib.rs", "") + .build(); + + Package::new("remote", "0.3.4").publish(); + + cargo(&p, "build").run(); + + p.change_file( + "Cargo.toml", + r#" + [project] + name = "bar" + version = "0.5.0" + authors = [] + + [dependencies] + baz = { path = "baz" } + remote = "0.3" + "#, + ); + + cargo(&p, "build") + .with_stderr( + "\ +[PREFETCHING] index files ... +[COMPILING] bar v0.5.0 ([..]) +[FINISHED] [..] +", + ) + .run(); +} + +#[cargo_test] +fn bump_version_dont_update_registry() { + let _server = setup(); + let p = project() + .file( + "Cargo.toml", + r#" + [project] + name = "bar" + version = "0.5.0" + authors = [] + + [dependencies] + baz = { path = "baz" } + "#, + ) + .file("src/main.rs", "fn main() {}") + .file( + "baz/Cargo.toml", + r#" + [project] + name = "baz" + version = "0.5.0" + authors = [] + + [dependencies] + remote = "0.3" + "#, + ) + .file("baz/src/lib.rs", "") + .build(); + + Package::new("remote", "0.3.4").publish(); + + cargo(&p, "build").run(); + + p.change_file( + "Cargo.toml", + r#" + [project] + name = "bar" + version = "0.6.0" + authors = [] + + [dependencies] + baz = { path = "baz" } + "#, + ); + + cargo(&p, "build") + .with_stderr( + "\ +[COMPILING] bar v0.6.0 ([..]) +[FINISHED] [..] +", + ) + .run(); +} + +#[cargo_test] +fn toml_lies_but_index_is_truth() { + let _server = setup(); + Package::new("foo", "0.2.0").publish(); + Package::new("bar", "0.3.0") + .dep("foo", "0.2.0") + .file( + "Cargo.toml", + r#" + [project] + name = "bar" + version = "0.3.0" + authors = [] + + [dependencies] + foo = "0.1.0" + "#, + ) + .file("src/lib.rs", "extern crate foo;") + .publish(); + + let p = project() + .file( + "Cargo.toml", + r#" + [project] + name = "bar" + version = "0.5.0" + authors = [] + + [dependencies] + bar = "0.3" + "#, + ) + .file("src/main.rs", "fn main() {}") + .build(); + + cargo(&p, "build -v").run(); +} + +#[cargo_test] +fn rename_deps_and_features() { + let _server = setup(); + Package::new("foo", "0.1.0") + .file("src/lib.rs", "pub fn f1() {}") + .publish(); + Package::new("foo", "0.2.0") + .file("src/lib.rs", "pub fn f2() {}") + .publish(); + Package::new("bar", "0.2.0") + .add_dep( + Dependency::new("foo01", "0.1.0") + .package("foo") + .optional(true), + ) + .add_dep(Dependency::new("foo02", "0.2.0").package("foo")) + .feature("another", &["foo01"]) + .file( + "src/lib.rs", + r#" + extern crate foo02; + #[cfg(feature = "foo01")] + extern crate foo01; + + pub fn foo() { + foo02::f2(); + #[cfg(feature = "foo01")] + foo01::f1(); + } + "#, + ) + .publish(); + + let p = project() + .file( + "Cargo.toml", + r#" + [project] + name = "a" + version = "0.5.0" + authors = [] + + [dependencies] + bar = "0.2" + "#, + ) + .file( + "src/main.rs", + " + extern crate bar; + fn main() { bar::foo(); } + ", + ) + .build(); + + cargo(&p, "build").run(); + cargo(&p, "build --features bar/foo01").run(); + cargo(&p, "build --features bar/another").run(); +} + +#[cargo_test] +fn ignore_invalid_json_lines() { + let _server = setup(); + Package::new("foo", "0.1.0").publish(); + Package::new("foo", "0.1.1").invalid_json(true).publish(); + Package::new("foo", "0.2.0").publish(); + + let p = project() + .file( + "Cargo.toml", + r#" + [project] + name = "a" + version = "0.5.0" + authors = [] + + [dependencies] + foo = '0.1.0' + foo02 = { version = '0.2.0', package = 'foo' } + "#, + ) + .file("src/lib.rs", "") + .build(); + + cargo(&p, "build").run(); +} + +#[cargo_test] +fn readonly_registry_still_works() { + let _server = setup(); + Package::new("foo", "0.1.0").publish(); + + let p = project() + .file( + "Cargo.toml", + r#" + [project] + name = "a" + version = "0.5.0" + authors = [] + + [dependencies] + foo = '0.1.0' + "#, + ) + .file("src/lib.rs", "") + .build(); + + cargo(&p, "generate-lockfile").run(); + cargo(&p, "fetch --locked").run(); + chmod_readonly(&paths::home(), true); + cargo(&p, "build").run(); + // make sure we un-readonly the files afterwards so "cargo clean" can remove them (#6934) + chmod_readonly(&paths::home(), false); + + fn chmod_readonly(path: &Path, readonly: bool) { + for entry in t!(path.read_dir()) { + let entry = t!(entry); + let path = entry.path(); + if t!(entry.file_type()).is_dir() { + chmod_readonly(&path, readonly); + } else { + set_readonly(&path, readonly); + } + } + set_readonly(path, readonly); + } + + fn set_readonly(path: &Path, readonly: bool) { + let mut perms = t!(path.metadata()).permissions(); + perms.set_readonly(readonly); + t!(fs::set_permissions(path, perms)); + } +} diff --git a/tests/testsuite/main.rs b/tests/testsuite/main.rs index 8af5858b373..2fc2006a98e 100644 --- a/tests/testsuite/main.rs +++ b/tests/testsuite/main.rs @@ -57,6 +57,7 @@ mod git_auth; mod git_gc; mod glob_targets; mod help; +mod http_registry; mod init; mod install; mod install_upgrade; diff --git a/tests/testsuite/search.rs b/tests/testsuite/search.rs index 0d239b3b455..94ba9fff17c 100644 --- a/tests/testsuite/search.rs +++ b/tests/testsuite/search.rs @@ -150,7 +150,7 @@ fn not_update() { paths::home().join(".cargo"), ); let lock = cfg.acquire_package_cache_lock().unwrap(); - let mut regsrc = RegistrySource::remote(sid, &HashSet::new(), &cfg); + let mut regsrc = RegistrySource::remote(sid, &HashSet::new(), &cfg).unwrap(); regsrc.update().unwrap(); drop(lock);