Skip to content

Commit 35c55a9

Browse files
committed
Auto merge of #7368 - alexcrichton:canonical-urls-omg, r=ehuss
Work with canonical URLs in `[patch]` This commit addresses an issue with how the resolver processes `[patch]` annotations in manifests and lock files. Previously the resolver would use the raw `Url` coming out of a manifest, but the rest of resolution, when comparing `SourceId`, uses a canonical form of a `Url` rather than the actual raw `Url`. This ended up causing discrepancies like those found in #7282. To fix the issue all `patch` intermediate storage in the resolver uses a newly-added `CanonicalUrl` type instead of a `Url`. This `CanonicalUrl` is then also used throughout the codebase, and all lookups in the resolver as switched to using `CanonicalUrl` instead of `Url`, which... Closes #7282
2 parents 658bde1 + e545412 commit 35c55a9

File tree

9 files changed

+200
-91
lines changed

9 files changed

+200
-91
lines changed

src/cargo/core/registry.rs

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use crate::core::PackageSet;
99
use crate::core::{Dependency, PackageId, Source, SourceId, SourceMap, Summary};
1010
use crate::sources::config::SourceConfigMap;
1111
use crate::util::errors::{CargoResult, CargoResultExt};
12-
use crate::util::{profile, Config};
12+
use crate::util::{profile, CanonicalUrl, Config};
1313

1414
/// Source of information about a group of packages.
1515
///
@@ -75,9 +75,9 @@ pub struct PackageRegistry<'cfg> {
7575
yanked_whitelist: HashSet<PackageId>,
7676
source_config: SourceConfigMap<'cfg>,
7777

78-
patches: HashMap<Url, Vec<Summary>>,
78+
patches: HashMap<CanonicalUrl, Vec<Summary>>,
7979
patches_locked: bool,
80-
patches_available: HashMap<Url, Vec<PackageId>>,
80+
patches_available: HashMap<CanonicalUrl, Vec<PackageId>>,
8181
}
8282

8383
/// A map of all "locked packages" which is filled in when parsing a lock file
@@ -230,6 +230,8 @@ impl<'cfg> PackageRegistry<'cfg> {
230230
/// `query` until `lock_patches` is called below, which should be called
231231
/// once all patches have been added.
232232
pub fn patch(&mut self, url: &Url, deps: &[Dependency]) -> CargoResult<()> {
233+
let canonical = CanonicalUrl::new(url)?;
234+
233235
// First up we need to actually resolve each `deps` specification to
234236
// precisely one summary. We're not using the `query` method below as it
235237
// internally uses maps we're building up as part of this method
@@ -284,7 +286,7 @@ impl<'cfg> PackageRegistry<'cfg> {
284286
url
285287
)
286288
}
287-
if summary.package_id().source_id().url() == url {
289+
if *summary.package_id().source_id().canonical_url() == canonical {
288290
failure::bail!(
289291
"patch for `{}` in `{}` points to the same source, but \
290292
patches must point to different sources",
@@ -317,8 +319,8 @@ impl<'cfg> PackageRegistry<'cfg> {
317319
// `lock` method) and otherwise store the unlocked summaries in
318320
// `patches` to get locked in a future call to `lock_patches`.
319321
let ids = unlocked_summaries.iter().map(|s| s.package_id()).collect();
320-
self.patches_available.insert(url.clone(), ids);
321-
self.patches.insert(url.clone(), unlocked_summaries);
322+
self.patches_available.insert(canonical.clone(), ids);
323+
self.patches.insert(canonical, unlocked_summaries);
322324

323325
Ok(())
324326
}
@@ -340,8 +342,11 @@ impl<'cfg> PackageRegistry<'cfg> {
340342
self.patches_locked = true;
341343
}
342344

343-
pub fn patches(&self) -> &HashMap<Url, Vec<Summary>> {
344-
&self.patches
345+
pub fn patches(&self) -> Vec<Summary> {
346+
self.patches
347+
.values()
348+
.flat_map(|v| v.iter().cloned())
349+
.collect()
345350
}
346351

347352
fn load(&mut self, source_id: SourceId, kind: Kind) -> CargoResult<()> {
@@ -472,7 +477,7 @@ impl<'cfg> Registry for PackageRegistry<'cfg> {
472477
// This means that `dep.matches(..)` will always return false, when
473478
// what we really care about is the name/version match.
474479
let mut patches = Vec::<Summary>::new();
475-
if let Some(extra) = self.patches.get(dep.source_id().url()) {
480+
if let Some(extra) = self.patches.get(dep.source_id().canonical_url()) {
476481
patches.extend(
477482
extra
478483
.iter()
@@ -605,7 +610,11 @@ impl<'cfg> Registry for PackageRegistry<'cfg> {
605610
}
606611
}
607612

608-
fn lock(locked: &LockedMap, patches: &HashMap<Url, Vec<PackageId>>, summary: Summary) -> Summary {
613+
fn lock(
614+
locked: &LockedMap,
615+
patches: &HashMap<CanonicalUrl, Vec<PackageId>>,
616+
summary: Summary,
617+
) -> Summary {
609618
let pair = locked
610619
.get(&summary.source_id())
611620
.and_then(|map| map.get(&*summary.name()))
@@ -669,7 +678,7 @@ fn lock(locked: &LockedMap, patches: &HashMap<Url, Vec<PackageId>>, summary: Sum
669678
// map, and we see if `id` is contained in the list of patches
670679
// for that url. If it is then this lock is still valid,
671680
// otherwise the lock is no longer valid.
672-
match patches.get(dep.source_id().url()) {
681+
match patches.get(dep.source_id().canonical_url()) {
673682
Some(list) => list.contains(&id),
674683
None => false,
675684
}

src/cargo/core/resolver/resolve.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@ use std::collections::{HashMap, HashSet};
33
use std::fmt;
44
use std::iter::FromIterator;
55

6-
use url::Url;
7-
86
use crate::core::dependency::Kind;
97
use crate::core::{Dependency, PackageId, PackageIdSpec, Summary, Target};
108
use crate::util::errors::CargoResult;
@@ -114,8 +112,8 @@ impl Resolve {
114112
self.graph.path_to_top(pkg)
115113
}
116114

117-
pub fn register_used_patches(&mut self, patches: &HashMap<Url, Vec<Summary>>) {
118-
for summary in patches.values().flat_map(|v| v) {
115+
pub fn register_used_patches(&mut self, patches: &[Summary]) {
116+
for summary in patches {
119117
if self.iter().any(|id| id == summary.package_id()) {
120118
continue;
121119
}

src/cargo/core/source/source_id.rs

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,9 @@ use url::Url;
1515

1616
use crate::core::PackageId;
1717
use crate::ops;
18-
use crate::sources::git;
1918
use crate::sources::DirectorySource;
2019
use crate::sources::{GitSource, PathSource, RegistrySource, CRATES_IO_INDEX};
21-
use crate::util::{CargoResult, Config, IntoUrl};
20+
use crate::util::{CanonicalUrl, CargoResult, Config, IntoUrl};
2221

2322
lazy_static::lazy_static! {
2423
static ref SOURCE_ID_CACHE: Mutex<HashSet<&'static SourceIdInner>> = Mutex::new(HashSet::new());
@@ -34,8 +33,8 @@ pub struct SourceId {
3433
struct SourceIdInner {
3534
/// The source URL.
3635
url: Url,
37-
/// The result of `git::canonicalize_url()` on `url` field.
38-
canonical_url: Url,
36+
/// The canonical version of the above url
37+
canonical_url: CanonicalUrl,
3938
/// The source kind.
4039
kind: Kind,
4140
/// For example, the exact Git revision of the specified branch for a Git Source.
@@ -80,7 +79,7 @@ impl SourceId {
8079
fn new(kind: Kind, url: Url) -> CargoResult<SourceId> {
8180
let source_id = SourceId::wrap(SourceIdInner {
8281
kind,
83-
canonical_url: git::canonicalize_url(&url)?,
82+
canonical_url: CanonicalUrl::new(&url)?,
8483
url,
8584
precise: None,
8685
name: None,
@@ -216,7 +215,7 @@ impl SourceId {
216215
let url = config.get_registry_index(key)?;
217216
Ok(SourceId::wrap(SourceIdInner {
218217
kind: Kind::Registry,
219-
canonical_url: git::canonicalize_url(&url)?,
218+
canonical_url: CanonicalUrl::new(&url)?,
220219
url,
221220
precise: None,
222221
name: Some(key.to_string()),
@@ -228,6 +227,12 @@ impl SourceId {
228227
&self.inner.url
229228
}
230229

230+
/// Gets the canonical URL of this source, used for internal comparison
231+
/// purposes.
232+
pub fn canonical_url(&self) -> &CanonicalUrl {
233+
&self.inner.canonical_url
234+
}
235+
231236
pub fn display_index(self) -> String {
232237
if self.is_default_registry() {
233238
"crates.io index".to_string()
@@ -508,7 +513,7 @@ impl Hash for SourceId {
508513
fn hash<S: hash::Hasher>(&self, into: &mut S) {
509514
self.inner.kind.hash(into);
510515
match self.inner.kind {
511-
Kind::Git(_) => self.inner.canonical_url.as_str().hash(into),
516+
Kind::Git(_) => self.inner.canonical_url.hash(into),
512517
_ => self.inner.url.as_str().hash(into),
513518
}
514519
}

src/cargo/ops/resolve.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,7 @@ pub fn resolve_with_previous<'cfg>(
343343
Some(ws.config()),
344344
ws.features().require(Feature::public_dependency()).is_ok(),
345345
)?;
346-
resolved.register_used_patches(registry.patches());
346+
resolved.register_used_patches(&registry.patches());
347347
if register_patches {
348348
// It would be good if this warning was more targeted and helpful
349349
// (such as showing close candidates that failed to match). However,

src/cargo/sources/git/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
pub use self::source::{canonicalize_url, GitSource};
1+
pub use self::source::GitSource;
22
pub use self::utils::{fetch, GitCheckout, GitDatabase, GitRemote, GitRevision};
33
mod source;
44
mod utils;

src/cargo/sources/git/source.rs

Lines changed: 24 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ impl<'cfg> GitSource<'cfg> {
2727
assert!(source_id.is_git(), "id is not git, id={}", source_id);
2828

2929
let remote = GitRemote::new(source_id.url());
30-
let ident = ident(source_id.url())?;
30+
let ident = ident(&source_id);
3131

3232
let reference = match source_id.precise() {
3333
Some(s) => GitReference::Rev(s.to_string()),
@@ -59,58 +59,17 @@ impl<'cfg> GitSource<'cfg> {
5959
}
6060
}
6161

62-
fn ident(url: &Url) -> CargoResult<String> {
63-
let url = canonicalize_url(url)?;
64-
let ident = url
62+
fn ident(id: &SourceId) -> String {
63+
let ident = id
64+
.canonical_url()
65+
.raw_canonicalized_url()
6566
.path_segments()
66-
.and_then(|mut s| s.next_back())
67+
.and_then(|s| s.rev().next())
6768
.unwrap_or("");
6869

6970
let ident = if ident == "" { "_empty" } else { ident };
7071

71-
Ok(format!("{}-{}", ident, short_hash(&url)))
72-
}
73-
74-
// Some hacks and heuristics for making equivalent URLs hash the same.
75-
pub fn canonicalize_url(url: &Url) -> CargoResult<Url> {
76-
let mut url = url.clone();
77-
78-
// cannot-be-a-base-urls (e.g., `github.com:rust-lang-nursery/rustfmt.git`)
79-
// are not supported.
80-
if url.cannot_be_a_base() {
81-
failure::bail!(
82-
"invalid url `{}`: cannot-be-a-base-URLs are not supported",
83-
url
84-
)
85-
}
86-
87-
// Strip a trailing slash.
88-
if url.path().ends_with('/') {
89-
url.path_segments_mut().unwrap().pop_if_empty();
90-
}
91-
92-
// HACK: for GitHub URLs specifically, just lower-case
93-
// everything. GitHub treats both the same, but they hash
94-
// differently, and we're gonna be hashing them. This wants a more
95-
// general solution, and also we're almost certainly not using the
96-
// same case conversion rules that GitHub does. (See issue #84.)
97-
if url.host_str() == Some("github.com") {
98-
url.set_scheme("https").unwrap();
99-
let path = url.path().to_lowercase();
100-
url.set_path(&path);
101-
}
102-
103-
// Repos can generally be accessed with or without `.git` extension.
104-
let needs_chopping = url.path().ends_with(".git");
105-
if needs_chopping {
106-
let last = {
107-
let last = url.path_segments().unwrap().next_back().unwrap();
108-
last[..last.len() - 4].to_owned()
109-
};
110-
url.path_segments_mut().unwrap().pop().push(&last);
111-
}
112-
113-
Ok(url)
72+
format!("{}-{}", ident, short_hash(id.canonical_url()))
11473
}
11574

11675
impl<'cfg> Debug for GitSource<'cfg> {
@@ -241,56 +200,54 @@ impl<'cfg> Source for GitSource<'cfg> {
241200
#[cfg(test)]
242201
mod test {
243202
use super::ident;
203+
use crate::core::{GitReference, SourceId};
244204
use crate::util::IntoUrl;
245-
use url::Url;
246205

247206
#[test]
248207
pub fn test_url_to_path_ident_with_path() {
249-
let ident = ident(&url("https://github.com/carlhuda/cargo")).unwrap();
208+
let ident = ident(&src("https://github.com/carlhuda/cargo"));
250209
assert!(ident.starts_with("cargo-"));
251210
}
252211

253212
#[test]
254213
pub fn test_url_to_path_ident_without_path() {
255-
let ident = ident(&url("https://github.com")).unwrap();
214+
let ident = ident(&src("https://github.com"));
256215
assert!(ident.starts_with("_empty-"));
257216
}
258217

259218
#[test]
260219
fn test_canonicalize_idents_by_stripping_trailing_url_slash() {
261-
let ident1 = ident(&url("https://github.com/PistonDevelopers/piston/")).unwrap();
262-
let ident2 = ident(&url("https://github.com/PistonDevelopers/piston")).unwrap();
220+
let ident1 = ident(&src("https://github.com/PistonDevelopers/piston/"));
221+
let ident2 = ident(&src("https://github.com/PistonDevelopers/piston"));
263222
assert_eq!(ident1, ident2);
264223
}
265224

266225
#[test]
267226
fn test_canonicalize_idents_by_lowercasing_github_urls() {
268-
let ident1 = ident(&url("https://github.com/PistonDevelopers/piston")).unwrap();
269-
let ident2 = ident(&url("https://github.com/pistondevelopers/piston")).unwrap();
227+
let ident1 = ident(&src("https://github.com/PistonDevelopers/piston"));
228+
let ident2 = ident(&src("https://github.com/pistondevelopers/piston"));
270229
assert_eq!(ident1, ident2);
271230
}
272231

273232
#[test]
274233
fn test_canonicalize_idents_by_stripping_dot_git() {
275-
let ident1 = ident(&url("https://github.com/PistonDevelopers/piston")).unwrap();
276-
let ident2 = ident(&url("https://github.com/PistonDevelopers/piston.git")).unwrap();
234+
let ident1 = ident(&src("https://github.com/PistonDevelopers/piston"));
235+
let ident2 = ident(&src("https://github.com/PistonDevelopers/piston.git"));
277236
assert_eq!(ident1, ident2);
278237
}
279238

280239
#[test]
281240
fn test_canonicalize_idents_different_protocols() {
282-
let ident1 = ident(&url("https://github.com/PistonDevelopers/piston")).unwrap();
283-
let ident2 = ident(&url("git://github.com/PistonDevelopers/piston")).unwrap();
241+
let ident1 = ident(&src("https://github.com/PistonDevelopers/piston"));
242+
let ident2 = ident(&src("git://github.com/PistonDevelopers/piston"));
284243
assert_eq!(ident1, ident2);
285244
}
286245

287-
#[test]
288-
fn test_canonicalize_cannot_be_a_base_urls() {
289-
assert!(ident(&url("github.com:PistonDevelopers/piston")).is_err());
290-
assert!(ident(&url("google.com:PistonDevelopers/piston")).is_err());
291-
}
292-
293-
fn url(s: &str) -> Url {
294-
s.into_url().unwrap()
246+
fn src(s: &str) -> SourceId {
247+
SourceId::for_git(
248+
&s.into_url().unwrap(),
249+
GitReference::Branch("master".to_string()),
250+
)
251+
.unwrap()
295252
}
296253
}

0 commit comments

Comments
 (0)