Skip to content

Commit 29fc71d

Browse files
committed
zstd support
Based on [1], but using compression level 3 for speed rather than 19 for competitiveness with xz. [1] rust-lang#109
1 parent 300b5ec commit 29fc71d

File tree

4 files changed

+47
-2
lines changed

4 files changed

+47
-2
lines changed

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ walkdir = "2"
1818
xz2 = "0.1.4"
1919
num_cpus = "1"
2020
remove_dir_all = "0.5"
21+
zstd = { version = "0.10.0", features = ["zstdmt"] }
2122

2223
[dependencies.clap]
2324
features = ["derive"]

src/compression.rs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,21 @@ use flate2::{read::GzDecoder, write::GzEncoder};
33
use rayon::prelude::*;
44
use std::{convert::TryFrom, fmt, io::Read, io::Write, path::Path, str::FromStr};
55
use xz2::{read::XzDecoder, write::XzEncoder};
6+
use zstd::stream::{read::Decoder as ZstdDecoder, write::Encoder as ZstdEncoder};
67

78
#[derive(Debug, Copy, Clone)]
89
pub enum CompressionFormat {
910
Gz,
1011
Xz,
12+
Zstd,
1113
}
1214

1315
impl CompressionFormat {
1416
pub(crate) fn detect_from_path(path: impl AsRef<Path>) -> Option<Self> {
1517
match path.as_ref().extension().and_then(|e| e.to_str()) {
1618
Some("gz") => Some(CompressionFormat::Gz),
1719
Some("xz") => Some(CompressionFormat::Xz),
20+
Some("zst") => Some(CompressionFormat::Zstd),
1821
_ => None,
1922
}
2023
}
@@ -23,6 +26,7 @@ impl CompressionFormat {
2326
match self {
2427
CompressionFormat::Gz => "gz",
2528
CompressionFormat::Xz => "xz",
29+
CompressionFormat::Zstd => "zst",
2630
}
2731
}
2832

@@ -48,6 +52,17 @@ impl CompressionFormat {
4852
.encoder()?;
4953
Box::new(XzEncoder::new_stream(file, stream))
5054
}
55+
CompressionFormat::Zstd => {
56+
// zstd's default compression level is 3, which is on par with gzip but much faster
57+
let mut enc = ZstdEncoder::new(file, 3).context("failed to initialize zstd encoder")?;
58+
// Long-distance matching provides a substantial benefit for our tarballs, and
59+
// actually makes compressiong *faster*.
60+
enc.long_distance_matching(true).context("zst long_distance_matching")?;
61+
// Enable multithreaded mode. zstd seems to be faster when using the number of
62+
// physical CPU cores rather than logical/SMT threads.
63+
enc.multithread(num_cpus::get_physical() as u32).context("zst multithreaded")?;
64+
Box::new(enc)
65+
}
5166
})
5267
}
5368

@@ -56,6 +71,7 @@ impl CompressionFormat {
5671
Ok(match self {
5772
CompressionFormat::Gz => Box::new(GzDecoder::new(file)),
5873
CompressionFormat::Xz => Box::new(XzDecoder::new(file)),
74+
CompressionFormat::Zstd => Box::new(ZstdDecoder::new(file)?),
5975
})
6076
}
6177
}
@@ -73,6 +89,7 @@ impl TryFrom<&'_ str> for CompressionFormats {
7389
match format.trim() {
7490
"gz" => parsed.push(CompressionFormat::Gz),
7591
"xz" => parsed.push(CompressionFormat::Xz),
92+
"zst" => parsed.push(CompressionFormat::Zstd),
7693
other => anyhow::bail!("unknown compression format: {}", other),
7794
}
7895
}
@@ -97,6 +114,7 @@ impl fmt::Display for CompressionFormats {
97114
fmt::Display::fmt(match format {
98115
CompressionFormat::Xz => "xz",
99116
CompressionFormat::Gz => "gz",
117+
CompressionFormat::Zstd => "zst",
100118
}, f)?;
101119
}
102120
Ok(())
@@ -113,6 +131,10 @@ impl CompressionFormats {
113131
pub(crate) fn iter(&self) -> impl Iterator<Item = CompressionFormat> + '_ {
114132
self.0.iter().map(|i| *i)
115133
}
134+
135+
pub(crate) fn len(&self) -> usize {
136+
self.0.len()
137+
}
116138
}
117139

118140
pub(crate) trait Encoder: Send + Write {
@@ -133,6 +155,13 @@ impl<W: Send + Write> Encoder for XzEncoder<W> {
133155
}
134156
}
135157

158+
impl<W: Send + Write> Encoder for ZstdEncoder<'_, W> {
159+
fn finish(mut self: Box<Self>) -> Result<(), Error> {
160+
ZstdEncoder::do_finish(self.as_mut()).context("failed to finish .zst file")?;
161+
Ok(())
162+
}
163+
}
164+
136165
pub(crate) struct CombinedEncoder {
137166
encoders: Vec<Box<dyn Encoder>>,
138167
}

src/tarballer.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,13 +49,13 @@ impl Tarballer {
4949
.context("failed to collect file paths")?;
5050
files.sort_by(|a, b| a.bytes().rev().cmp(b.bytes().rev()));
5151

52-
// Write the tar into both encoded files. We write all directories
52+
// Write the tar the both encoded files. We write all directories
5353
// first, so files may be directly created. (See rust-lang/rustup.rs#1092.)
5454
let buf = BufWriter::with_capacity(1024 * 1024, encoder);
5555
let mut builder = Builder::new(buf);
5656

5757
let pool = rayon::ThreadPoolBuilder::new()
58-
.num_threads(2)
58+
.num_threads(self.compression_formats.len())
5959
.build()
6060
.unwrap();
6161
pool.install(move || {

test.sh

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1218,6 +1218,21 @@ generate_compression_formats_multiple() {
12181218
}
12191219
runtest generate_compression_formats_multiple
12201220

1221+
generate_compression_formats_multiple_zst() {
1222+
try sh "$S/gen-installer.sh" \
1223+
--image-dir="$TEST_DIR/image1" \
1224+
--work-dir="$WORK_DIR" \
1225+
--output-dir="$OUT_DIR" \
1226+
--package-name="rustc" \
1227+
--component-name="rustc" \
1228+
--compression-formats="gz,zst"
1229+
1230+
try test -e "${OUT_DIR}/rustc.tar.gz"
1231+
try test ! -e "${OUT_DIR}/rustc.tar.xz"
1232+
try test -e "${OUT_DIR}/rustc.tar.zst"
1233+
}
1234+
runtest generate_compression_formats_multiple_zst
1235+
12211236
generate_compression_formats_error() {
12221237
expect_fail sh "$S/gen-installer.sh" \
12231238
--image-dir="$TEST_DIR/image1" \

0 commit comments

Comments
 (0)