Skip to content

Commit c98308e

Browse files
committed
feat: use stable hash from rustc-stable-hash
This helps `-Ztrim-paths` build a stable cross-platform path for the registry and git sources. Sources files then can be found from the same path when debugging. See rust-lang#13171 (comment) A few caveats: * This will invalidate the current downloaded caches. Need to put this in the Cargo CHANGELOG. * As a consequence of changing how `SourceId` is hashed, the global cache tracker is also affected because Cargo writes source identifiers (e.g. `index.crates.io-6f17d22bba15001f`) to SQLite. * https://github.com/rust-lang/cargo/blob/6e236509b2331eef64df844b7bbc8ed352294107/src/cargo/core/global_cache_tracker.rs#L388-L391 * The performance of rustc-stable-hash is slightly worse than the old SipHasher in std on short things like `SourceId`, but for long stuff like fingerprint. See appendix. StableHasher is used in several places (some might not be needed?): * Rebuild detection (fingerprints) * Rustc version, including all the CLI args running `rustc -vV`. * https://github.com/rust-lang/cargo/blob/6e236509b2331eef64df844b7bbc8ed352294107/src/cargo/util/rustc.rs#L326 * https://github.com/rust-lang/cargo/blob/6e236509b2331eef64df844b7bbc8ed352294107/src/cargo/util/rustc.rs#L381 * Build caches * https://github.com/rust-lang/cargo/blob/6e236509b2331eef64df844b7bbc8ed352294107/src/cargo/core/compiler/fingerprint/mod.rs#L1456 * Compute rustc `-C metadata` * stable hash for SourceId * https://github.com/rust-lang/cargo/blob/6e236509b2331eef64df844b7bbc8ed352294107/src/cargo/core/package_id.rs#L207 * Also read and hash contents from custom target JSON file. * https://github.com/rust-lang/cargo/blob/6e236509b2331eef64df844b7bbc8ed352294107/src/cargo/core/compiler/compile_kind.rs#L81-L91 * `UnitInner::dep_hash` * This is to distinguish same units having different features set between normal and build dependencies. * https://github.com/rust-lang/cargo/blob/6e236509b2331eef64df844b7bbc8ed352294107/src/cargo/ops/cargo_compile/mod.rs#L627 * Hash file contents for `cargo package` to verify if files were modified before and after the build. * https://github.com/rust-lang/cargo/blob/6e236509b2331eef64df844b7bbc8ed352294107/src/cargo/ops/cargo_package.rs#L999 * Rusc diagnostics deduplication * https://github.com/rust-lang/cargo/blob/6e236509b2331eef64df844b7bbc8ed352294107/src/cargo/core/compiler/job_queue/mod.rs#L311 * Places using `SourceId` identifier like `registry/src` path, and `-Zscript` target directories. Appendix -------- Benchmark on x86_64-unknown-linux-gnu ``` bench_hasher/RustcStableHasher/URL time: [33.843 ps 33.844 ps 33.845 ps] change: [-0.0167% -0.0049% +0.0072%] (p = 0.44 > 0.05) No change in performance detected. Found 10 outliers among 100 measurements (10.00%) 5 (5.00%) low severe 3 (3.00%) high mild 2 (2.00%) high severe bench_hasher/SipHasher/URL time: [18.954 ns 18.954 ns 18.955 ns] change: [-0.1281% -0.0951% -0.0644%] (p = 0.00 < 0.05) Change within noise threshold. Found 14 outliers among 100 measurements (14.00%) 3 (3.00%) low severe 4 (4.00%) low mild 3 (3.00%) high mild 4 (4.00%) high severe bench_hasher/RustcStableHasher/lorem ipsum time: [659.18 ns 659.20 ns 659.22 ns] change: [-0.0192% -0.0062% +0.0068%] (p = 0.34 > 0.05) No change in performance detected. Found 12 outliers among 100 measurements (12.00%) 4 (4.00%) low severe 3 (3.00%) low mild 3 (3.00%) high mild 2 (2.00%) high severe bench_hasher/SipHasher/lorem ipsum time: [1.2006 µs 1.2008 µs 1.2010 µs] change: [+0.0117% +0.0467% +0.0808%] (p = 0.01 < 0.05) Change within noise threshold. Found 1 outliers among 100 measurements (1.00%) 1 (1.00%) high mild ```
1 parent 69e5959 commit c98308e

File tree

8 files changed

+67
-57
lines changed

8 files changed

+67
-57
lines changed

src/cargo/core/compiler/build_runner/compilation_files.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -681,7 +681,7 @@ fn compute_metadata(
681681
}
682682

683683
Metadata {
684-
meta_hash: UnitHash(hasher.finish()),
684+
meta_hash: UnitHash(Hasher::finish(&hasher)),
685685
use_extra_filename: use_extra_filename(bcx, unit),
686686
}
687687
}

src/cargo/core/compiler/compile_kind.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,6 @@ impl CompileTarget {
195195
self.name.hash(&mut hasher);
196196
}
197197
}
198-
hasher.finish()
198+
Hasher::finish(&hasher)
199199
}
200200
}

src/cargo/core/compiler/fingerprint/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1552,7 +1552,7 @@ fn calculate_normal(
15521552
local: Mutex::new(local),
15531553
memoized_hash: Mutex::new(None),
15541554
metadata,
1555-
config: config.finish(),
1555+
config: Hasher::finish(&config),
15561556
compile_kind,
15571557
rustflags: extra_flags,
15581558
fs_status: FsStatus::Stale,

src/cargo/core/source_id.rs

Lines changed: 56 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -782,70 +782,99 @@ mod tests {
782782
// Otherwise please just leave a comment in your PR as to why the hash value is
783783
// changing and why the old value can't be easily preserved.
784784
//
785-
// The hash value depends on endianness and bit-width, so we only run this test on
786-
// little-endian 64-bit CPUs (such as x86-64 and ARM64) where it matches the
787-
// well-known value.
785+
// The hash value should be stable across platforms, and doesn't depend on
786+
// endianness and bit-width. One caveat is that absolute paths is inherently
787+
// different on Windows than on Unix-like platforms. Unless we omit or strip
788+
// the prefix components (e.g. `C:`), there is not way to have a
789+
// cross-platform stable hash for absolute paths.
788790
#[test]
789-
#[cfg(all(target_endian = "little", target_pointer_width = "64"))]
790791
fn test_cratesio_hash() {
791792
let gctx = GlobalContext::default().unwrap();
792793
let crates_io = SourceId::crates_io(&gctx).unwrap();
793-
assert_eq!(crate::util::hex::short_hash(&crates_io), "1ecc6299db9ec823");
794+
assert_eq!(crate::util::hex::short_hash(&crates_io), "83d63c3e13aca8cc");
794795
}
795796

796797
// See the comment in `test_cratesio_hash`.
797798
//
798799
// Only test on non-Windows as paths on Windows will get different hashes.
799800
#[test]
800-
#[cfg(all(target_endian = "little", target_pointer_width = "64", not(windows)))]
801801
fn test_stable_hash() {
802802
use std::hash::Hasher;
803803
use std::path::Path;
804804

805+
use crate::util::StableHasher;
806+
807+
#[cfg(not(windows))]
808+
let ws_root = Path::new("/tmp/ws");
809+
#[cfg(windows)]
810+
let ws_root = Path::new(r"C:\\tmp\ws");
811+
805812
let gen_hash = |source_id: SourceId| {
806-
let mut hasher = std::collections::hash_map::DefaultHasher::new();
807-
source_id.stable_hash(Path::new("/tmp/ws"), &mut hasher);
808-
hasher.finish()
813+
let mut hasher = StableHasher::new();
814+
source_id.stable_hash(ws_root, &mut hasher);
815+
Hasher::finish(&hasher)
809816
};
810817

818+
let source_id = SourceId::crates_io(&GlobalContext::default().unwrap()).unwrap();
819+
assert_eq!(gen_hash(source_id), 7062945687441624357);
820+
assert_eq!(crate::util::hex::short_hash(&source_id), "25cdd57fae9f0462");
821+
811822
let url = "https://my-crates.io".into_url().unwrap();
812823
let source_id = SourceId::for_registry(&url).unwrap();
813-
assert_eq!(gen_hash(source_id), 18108075011063494626);
814-
assert_eq!(crate::util::hex::short_hash(&source_id), "fb60813d6cb8df79");
824+
assert_eq!(gen_hash(source_id), 8310250053664888498);
825+
assert_eq!(crate::util::hex::short_hash(&source_id), "b2d65deb64f05373");
815826

816827
let url = "https://your-crates.io".into_url().unwrap();
817828
let source_id = SourceId::for_alt_registry(&url, "alt").unwrap();
818-
assert_eq!(gen_hash(source_id), 12862859764592646184);
819-
assert_eq!(crate::util::hex::short_hash(&source_id), "09c10fd0cbd74bce");
829+
assert_eq!(gen_hash(source_id), 14149534903000258933);
830+
assert_eq!(crate::util::hex::short_hash(&source_id), "755952de063f5dc4");
820831

821832
let url = "sparse+https://my-crates.io".into_url().unwrap();
822833
let source_id = SourceId::for_registry(&url).unwrap();
823-
assert_eq!(gen_hash(source_id), 8763561830438022424);
824-
assert_eq!(crate::util::hex::short_hash(&source_id), "d1ea0d96f6f759b5");
834+
assert_eq!(gen_hash(source_id), 16249512552851930162);
835+
assert_eq!(crate::util::hex::short_hash(&source_id), "327cfdbd92dd81e1");
825836

826837
let url = "sparse+https://your-crates.io".into_url().unwrap();
827838
let source_id = SourceId::for_alt_registry(&url, "alt").unwrap();
828-
assert_eq!(gen_hash(source_id), 5159702466575482972);
829-
assert_eq!(crate::util::hex::short_hash(&source_id), "135d23074253cb78");
839+
assert_eq!(gen_hash(source_id), 6156697384053352292);
840+
assert_eq!(crate::util::hex::short_hash(&source_id), "64a713b6a6fb7055");
830841

831842
let url = "file:///tmp/ws/crate".into_url().unwrap();
832843
let source_id = SourceId::for_git(&url, GitReference::DefaultBranch).unwrap();
833-
assert_eq!(gen_hash(source_id), 15332537265078583985);
834-
assert_eq!(crate::util::hex::short_hash(&source_id), "73a808694abda756");
835-
836-
let path = Path::new("/tmp/ws/crate");
844+
assert_eq!(gen_hash(source_id), 473480029881867801);
845+
assert_eq!(crate::util::hex::short_hash(&source_id), "199e591d94239206");
837846

847+
let path = &ws_root.join("crate");
838848
let source_id = SourceId::for_local_registry(path).unwrap();
839-
assert_eq!(gen_hash(source_id), 18446533307730842837);
840-
assert_eq!(crate::util::hex::short_hash(&source_id), "52a84cc73f6fd48b");
849+
#[cfg(not(windows))]
850+
{
851+
assert_eq!(gen_hash(source_id), 11515846423845066584);
852+
assert_eq!(crate::util::hex::short_hash(&source_id), "58d73c154f81d09f");
853+
}
854+
#[cfg(windows)]
855+
{
856+
assert_eq!(gen_hash(source_id), 6146331155906064276);
857+
assert_eq!(crate::util::hex::short_hash(&source_id), "946fb2239f274c55");
858+
}
841859

842860
let source_id = SourceId::for_path(path).unwrap();
843-
assert_eq!(gen_hash(source_id), 8764714075439899829);
844-
assert_eq!(crate::util::hex::short_hash(&source_id), "e1ddd48578620fc1");
861+
assert_eq!(gen_hash(source_id), 215644081443634269);
862+
#[cfg(not(windows))]
863+
assert_eq!(crate::util::hex::short_hash(&source_id), "64bace89c92b101f");
864+
#[cfg(windows)]
865+
assert_eq!(crate::util::hex::short_hash(&source_id), "01e1e6c391813fb6");
845866

846867
let source_id = SourceId::for_directory(path).unwrap();
847-
assert_eq!(gen_hash(source_id), 17459999773908528552);
848-
assert_eq!(crate::util::hex::short_hash(&source_id), "6568fe2c2fab5bfe");
868+
#[cfg(not(windows))]
869+
{
870+
assert_eq!(gen_hash(source_id), 6127590343904940368);
871+
assert_eq!(crate::util::hex::short_hash(&source_id), "505191d1f3920955");
872+
}
873+
#[cfg(windows)]
874+
{
875+
assert_eq!(gen_hash(source_id), 10423446877655960172);
876+
assert_eq!(crate::util::hex::short_hash(&source_id), "6c8ad69db585a790");
877+
}
849878
}
850879

851880
#[test]

src/cargo/ops/cargo_compile/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -656,7 +656,7 @@ fn traverse_and_share(
656656
.collect();
657657
// Here, we have recursively traversed this unit's dependencies, and hashed them: we can
658658
// finalize the dep hash.
659-
let new_dep_hash = dep_hash.finish();
659+
let new_dep_hash = Hasher::finish(&dep_hash);
660660

661661
// This is the key part of the sharing process: if the unit is a runtime dependency, whose
662662
// target is the same as the host, we canonicalize the compile kind to `CompileKind::Host`.

src/cargo/util/hasher.rs

Lines changed: 3 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,5 @@
1-
//! Implementation of a hasher that produces the same values across releases.
1+
//! A hasher that produces the same values across releases and platforms.
22
//!
3-
//! The hasher should be fast and have a low chance of collisions (but is not
4-
//! sufficient for cryptographic purposes).
5-
#![allow(deprecated)]
3+
//! This is a wrapper around [`rustc_stable_hash::StableHasher`].
64
7-
use std::hash::{Hasher, SipHasher};
8-
9-
pub struct StableHasher(SipHasher);
10-
11-
impl StableHasher {
12-
pub fn new() -> StableHasher {
13-
StableHasher(SipHasher::new())
14-
}
15-
}
16-
17-
impl Hasher for StableHasher {
18-
fn finish(&self) -> u64 {
19-
self.0.finish()
20-
}
21-
fn write(&mut self, bytes: &[u8]) {
22-
self.0.write(bytes)
23-
}
24-
}
5+
pub use rustc_stable_hash::StableSipHasher128 as StableHasher;

src/cargo/util/hex.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ pub fn to_hex(num: u64) -> String {
1010
pub fn hash_u64<H: Hash>(hashable: H) -> u64 {
1111
let mut hasher = StableHasher::new();
1212
hashable.hash(&mut hasher);
13-
hasher.finish()
13+
Hasher::finish(&hasher)
1414
}
1515

1616
pub fn hash_u64_file(mut file: &File) -> std::io::Result<u64> {
@@ -23,7 +23,7 @@ pub fn hash_u64_file(mut file: &File) -> std::io::Result<u64> {
2323
}
2424
hasher.write(&buf[..n]);
2525
}
26-
Ok(hasher.finish())
26+
Ok(Hasher::finish(&hasher))
2727
}
2828

2929
pub fn short_hash<H: Hash>(hashable: &H) -> String {

src/cargo/util/rustc.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -381,7 +381,7 @@ fn rustc_fingerprint(
381381
_ => (),
382382
}
383383

384-
Ok(hasher.finish())
384+
Ok(Hasher::finish(&hasher))
385385
}
386386

387387
fn process_fingerprint(cmd: &ProcessBuilder, extra_fingerprint: u64) -> u64 {
@@ -391,5 +391,5 @@ fn process_fingerprint(cmd: &ProcessBuilder, extra_fingerprint: u64) -> u64 {
391391
let mut env = cmd.get_envs().iter().collect::<Vec<_>>();
392392
env.sort_unstable();
393393
env.hash(&mut hasher);
394-
hasher.finish()
394+
Hasher::finish(&hasher)
395395
}

0 commit comments

Comments
 (0)