Skip to content

Commit ff0dc0d

Browse files
committed
Avoid blocking on CloseHandle
On Windows closing a file involves CloseHandle, which can be quite slow (6+ms) for various reasons, including circumstances outside our control such as virus scanners, content indexing, device driver cache write-back synchronisation and so forth. Rather than having a super long list of all the things users need to do to optimise the performance of CloseHandle, use a small threadpool to avoid blocking package extraction when closing file handles. This does run a risk of resource exhaustion, but as we only have 20k files in the largest package today that should not be a problem in practice. https://www.mercurial-scm.org/pipermail/mercurial-devel/2016-January/078404.html provided inspiration for this. My benchmark system went from 21/22s to 11s with this change with both 4 or 8 threads.
1 parent fd6c31f commit ff0dc0d

File tree

5 files changed

+167
-20
lines changed

5 files changed

+167
-20
lines changed

Cargo.lock

Lines changed: 17 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,11 @@ scopeguard = "1"
4343
semver = "0.9"
4444
sha2 = "0.8"
4545
strsim = "0.9.1"
46-
tar = "0.4"
46+
tar = "0.4.26"
4747
tempdir = "0.3.4"
4848
# FIXME(issue #1788)
4949
term = "=0.5.1"
50+
threadpool = "1"
5051
time = "0.1.34"
5152
toml = "0.5"
5253
url = "1"

src/dist/component/package.rs

Lines changed: 140 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use crate::dist::component::transaction::*;
77

88
use crate::dist::temp;
99
use crate::errors::*;
10+
use crate::utils::notifications::Notification;
1011
use crate::utils::utils;
1112

1213
use std::collections::HashSet;
@@ -194,13 +195,17 @@ fn set_file_perms(_dest_path: &Path, _src_path: &Path) -> Result<()> {
194195
pub struct TarPackage<'a>(DirectoryPackage, temp::Dir<'a>);
195196

196197
impl<'a> TarPackage<'a> {
197-
pub fn new<R: Read>(stream: R, temp_cfg: &'a temp::Cfg) -> Result<Self> {
198+
pub fn new<R: Read>(
199+
stream: R,
200+
temp_cfg: &'a temp::Cfg,
201+
notify_handler: Option<&'a dyn Fn(Notification<'_>)>,
202+
) -> Result<Self> {
198203
let temp_dir = temp_cfg.new_directory()?;
199204
let mut archive = tar::Archive::new(stream);
200205
// The rust-installer packages unpack to a directory called
201206
// $pkgname-$version-$target. Skip that directory when
202207
// unpacking.
203-
unpack_without_first_dir(&mut archive, &*temp_dir)?;
208+
unpack_without_first_dir(&mut archive, &*temp_dir, notify_handler)?;
204209

205210
Ok(TarPackage(
206211
DirectoryPackage::new(temp_dir.to_owned(), false)?,
@@ -209,11 +214,122 @@ impl<'a> TarPackage<'a> {
209214
}
210215
}
211216

212-
fn unpack_without_first_dir<R: Read>(archive: &mut tar::Archive<R>, path: &Path) -> Result<()> {
217+
#[cfg(windows)]
218+
mod unpacker {
219+
use std::sync::atomic::{AtomicUsize, Ordering};
220+
use std::sync::Arc;
221+
use threadpool;
222+
223+
use crate::utils::notifications::Notification;
224+
225+
pub struct Unpacker<'a> {
226+
n_files: Arc<AtomicUsize>,
227+
pool: threadpool::ThreadPool,
228+
notify_handler: Option<&'a dyn Fn(Notification<'_>)>,
229+
}
230+
231+
impl<'a> Unpacker<'a> {
232+
pub fn new(notify_handler: Option<&'a dyn Fn(Notification<'_>)>) -> Unpacker {
233+
// Defaults to hardware thread count threads; this is suitable for
234+
// our needs as IO bound operations tend to show up as write latencies
235+
// rather than close latencies, so we don't need to look at
236+
// more threads to get more IO dispatched at this stage in the process.
237+
let pool = threadpool::Builder::new()
238+
.thread_name("CloseHandle".into())
239+
.build();
240+
Unpacker {
241+
n_files: Arc::new(AtomicUsize::new(0)),
242+
pool: pool,
243+
notify_handler: notify_handler,
244+
}
245+
}
246+
247+
pub fn handle(&mut self, unpacked: tar::Unpacked) {
248+
if let tar::Unpacked::File(f) = unpacked {
249+
self.n_files.fetch_add(1, Ordering::Relaxed);
250+
let n_files = self.n_files.clone();
251+
self.pool.execute(move || {
252+
drop(f);
253+
n_files.fetch_sub(1, Ordering::Relaxed);
254+
});
255+
}
256+
}
257+
}
258+
259+
impl<'a> Drop for Unpacker<'a> {
260+
fn drop(&mut self) {
261+
// Some explanation is in order. Even though the tar we are reading from (if
262+
// any) will have had its FileWithProgress download tracking
263+
// completed before we hit drop, that is not true if we are unwinding due to a
264+
// failure, where the logical ownership of the progress bar is
265+
// ambiguous, and as the tracker itself is abstracted out behind
266+
// notifications etc we cannot just query for that. So: we assume no
267+
// more reads of the underlying tar will take place: either the
268+
// error unwinding will stop reads, or we completed; either way, we
269+
// notify finished to the tracker to force a reset to zero; we set
270+
// the units to files, show our progress, and set our units back
271+
// afterwards. The largest archives today - rust docs - have ~20k
272+
// items, and the download tracker's progress is confounded with
273+
// actual handling of data today, we synthesis a data buffer and
274+
// pretend to have bytes to deliver.
275+
self.notify_handler
276+
.map(|handler| handler(Notification::DownloadFinished));
277+
self.notify_handler
278+
.map(|handler| handler(Notification::DownloadPushUnits("handles")));
279+
let mut prev_files = self.n_files.load(Ordering::Relaxed);
280+
self.notify_handler.map(|handler| {
281+
handler(Notification::DownloadContentLengthReceived(
282+
prev_files as u64,
283+
))
284+
});
285+
if prev_files > 50 {
286+
println!("Closing {} deferred file handles", prev_files);
287+
}
288+
let buf: Vec<u8> = vec![0; prev_files];
289+
assert!(32767 > prev_files);
290+
let mut current_files = prev_files;
291+
while current_files != 0 {
292+
use std::thread::sleep;
293+
sleep(std::time::Duration::from_millis(100));
294+
prev_files = current_files;
295+
current_files = self.n_files.load(Ordering::Relaxed);
296+
let step_count = prev_files - current_files;
297+
self.notify_handler.map(|handler| {
298+
handler(Notification::DownloadDataReceived(&buf[0..step_count]))
299+
});
300+
}
301+
self.pool.join();
302+
self.notify_handler
303+
.map(|handler| handler(Notification::DownloadFinished));
304+
self.notify_handler
305+
.map(|handler| handler(Notification::DownloadPopUnits));
306+
}
307+
}
308+
}
309+
310+
#[cfg(not(windows))]
311+
mod unpacker {
312+
use crate::utils::notifications::Notification;
313+
pub struct Unpacker {}
314+
impl Unpacker {
315+
pub fn new<'a>(_notify_handler: Option<&'a dyn Fn(Notification<'_>)>) -> Unpacker {
316+
Unpacker {}
317+
}
318+
pub fn handle(&mut self, _unpacked: tar::Unpacked) {}
319+
}
320+
}
321+
322+
fn unpack_without_first_dir<'a, R: Read>(
323+
archive: &mut tar::Archive<R>,
324+
path: &Path,
325+
notify_handler: Option<&'a dyn Fn(Notification<'_>)>,
326+
) -> Result<()> {
327+
let mut unpacker = unpacker::Unpacker::new(notify_handler);
213328
let entries = archive
214329
.entries()
215330
.chain_err(|| ErrorKind::ExtractingPackage)?;
216331
let mut checked_parents: HashSet<PathBuf> = HashSet::new();
332+
217333
for entry in entries {
218334
let mut entry = entry.chain_err(|| ErrorKind::ExtractingPackage)?;
219335
let relpath = {
@@ -249,6 +365,7 @@ fn unpack_without_first_dir<R: Read>(archive: &mut tar::Archive<R>, path: &Path)
249365
entry.set_preserve_mtime(false);
250366
entry
251367
.unpack(&full_path)
368+
.map(|unpacked| unpacker.handle(unpacked))
252369
.chain_err(|| ErrorKind::ExtractingPackage)?;
253370
}
254371

@@ -277,9 +394,17 @@ impl<'a> Package for TarPackage<'a> {
277394
pub struct TarGzPackage<'a>(TarPackage<'a>);
278395

279396
impl<'a> TarGzPackage<'a> {
280-
pub fn new<R: Read>(stream: R, temp_cfg: &'a temp::Cfg) -> Result<Self> {
397+
pub fn new<R: Read>(
398+
stream: R,
399+
temp_cfg: &'a temp::Cfg,
400+
notify_handler: Option<&'a dyn Fn(Notification<'_>)>,
401+
) -> Result<Self> {
281402
let stream = flate2::read::GzDecoder::new(stream);
282-
Ok(TarGzPackage(TarPackage::new(stream, temp_cfg)?))
403+
Ok(TarGzPackage(TarPackage::new(
404+
stream,
405+
temp_cfg,
406+
notify_handler,
407+
)?))
283408
}
284409
}
285410

@@ -305,9 +430,17 @@ impl<'a> Package for TarGzPackage<'a> {
305430
pub struct TarXzPackage<'a>(TarPackage<'a>);
306431

307432
impl<'a> TarXzPackage<'a> {
308-
pub fn new<R: Read>(stream: R, temp_cfg: &'a temp::Cfg) -> Result<Self> {
433+
pub fn new<R: Read>(
434+
stream: R,
435+
temp_cfg: &'a temp::Cfg,
436+
notify_handler: Option<&'a dyn Fn(Notification<'_>)>,
437+
) -> Result<Self> {
309438
let stream = xz2::read::XzDecoder::new(stream);
310-
Ok(TarXzPackage(TarPackage::new(stream, temp_cfg)?))
439+
Ok(TarXzPackage(TarPackage::new(
440+
stream,
441+
temp_cfg,
442+
notify_handler,
443+
)?))
311444
}
312445
}
313446

src/dist/manifestation.rs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -204,20 +204,20 @@ impl Manifestation {
204204
component.target.as_ref(),
205205
));
206206

207-
let gz;
208-
let xz;
209207
let notification_converter = |notification: crate::utils::Notification<'_>| {
210208
notify_handler(notification.into());
211209
};
210+
let gz;
211+
let xz;
212212
let reader =
213213
utils::FileReaderWithProgress::new_file(&installer_file, &notification_converter)?;
214214
let package: &dyn Package = match format {
215215
Format::Gz => {
216-
gz = TarGzPackage::new(reader, temp_cfg)?;
216+
gz = TarGzPackage::new(reader, temp_cfg, Some(&notification_converter))?;
217217
&gz
218218
}
219219
Format::Xz => {
220-
xz = TarXzPackage::new(reader, temp_cfg)?;
220+
xz = TarXzPackage::new(reader, temp_cfg, Some(&notification_converter))?;
221221
&xz
222222
}
223223
};
@@ -407,7 +407,8 @@ impl Manifestation {
407407
};
408408
let reader =
409409
utils::FileReaderWithProgress::new_file(&installer_file, &notification_converter)?;
410-
let package: &dyn Package = &TarGzPackage::new(reader, temp_cfg)?;
410+
let package: &dyn Package =
411+
&TarGzPackage::new(reader, temp_cfg, Some(&notification_converter))?;
411412

412413
for component in package.components() {
413414
tx = package.install(&self.installation, &component, None, tx)?;

src/install.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,8 @@ impl<'a> InstallMethod<'a> {
8989
notify_handler(notification.into());
9090
};
9191
let reader = utils::FileReaderWithProgress::new_file(&src, &notification_converter)?;
92-
let package: &dyn Package = &TarGzPackage::new(reader, temp_cfg)?;
92+
let package: &dyn Package =
93+
&TarGzPackage::new(reader, temp_cfg, Some(&notification_converter))?;
9394

9495
let mut tx = Transaction::new(prefix.clone(), temp_cfg, notify_handler);
9596

0 commit comments

Comments
 (0)