Skip to content

Commit b6a098d

Browse files
authored
[sled-agent] Replace du(1) with a Rust function (#6482)
Currently, the `zone_bundle::disk_usage` function in sled-agent shells out to `du(1)` and parses its output. This works fine, but it's a bit flaky, especially when running the corresponding tests on "weird" platforms such as NixOS, where the `du(1)` binary doesn't live in `/usr/bin/du` (see #6479). This commit replaces the code that shells out to `/usr/bin/du` with a pure-Rust rewrite. There's a unit test that asserts that the disk usage calculated by the new implementation matches the value calculated by `du(1)`, with code for handling the potential inaccuracy due to `du(1)` operating in terms of 512B blocks on macOS. Closes #6479
1 parent 165d42a commit b6a098d

File tree

3 files changed

+99
-62
lines changed

3 files changed

+99
-62
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

sled-agent/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ zone.workspace = true
9393
static_assertions.workspace = true
9494
omicron-workspace-hack.workspace = true
9595
slog-error-chain.workspace = true
96+
walkdir.workspace = true
9697

9798
[target.'cfg(target_os = "illumos")'.dependencies]
9899
opte-ioctl.workspace = true

sled-agent/src/zone_bundle.rs

Lines changed: 97 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -582,6 +582,11 @@ pub enum BundleError {
582582

583583
#[error("Instance is terminating")]
584584
InstanceTerminating,
585+
586+
/// The `walkdir` crate's errors already include the path which could not be
587+
/// read (if one exists), so we can just wrap them directly.
588+
#[error(transparent)]
589+
WalkDir(#[from] walkdir::Error),
585590
}
586591

587592
// Helper function to write an array of bytes into the tar archive, with
@@ -1533,7 +1538,7 @@ async fn compute_bundle_utilization(
15331538
// TODO-correctness: This takes into account the directories themselves,
15341539
// and may be not quite what we want. But it is very easy and pretty
15351540
// close.
1536-
let bytes_used = disk_usage(dir).await?;
1541+
let bytes_used = dir_size(dir).await?;
15371542
debug!(log, "computed bytes used"; "bytes_used" => bytes_used);
15381543
out.insert(
15391544
dir.clone(),
@@ -1545,61 +1550,27 @@ async fn compute_bundle_utilization(
15451550

15461551
// Return the number of bytes occupied by the provided directory.
15471552
//
1548-
// This returns an error if:
1549-
//
1550-
// - The "du" command fails
1551-
// - Parsing stdout fails
1552-
// - Parsing the actual size as a u64 fails
1553-
async fn disk_usage(path: &Utf8PathBuf) -> Result<u64, BundleError> {
1554-
// Each OS implements slightly different `du` options.
1555-
//
1556-
// Linux and illumos support the "apparent" size in bytes, though using
1557-
// different options. macOS doesn't support bytes at all, and has a minimum
1558-
// block size of 512.
1559-
//
1560-
// We'll suffer the lower resolution on macOS, and get higher resolution on
1561-
// the others.
1562-
cfg_if::cfg_if! {
1563-
if #[cfg(target_os = "illumos")] {
1564-
const BLOCK_SIZE: u64 = 1;
1565-
const DU_ARG: &str = "-A";
1566-
} else if #[cfg(target_os = "linux")] {
1567-
const BLOCK_SIZE: u64 = 1;
1568-
const DU_ARG: &str = "-b";
1569-
} else if #[cfg(target_os = "macos")] {
1570-
const BLOCK_SIZE: u64 = 512;
1571-
const DU_ARG: &str = "-k";
1572-
} else {
1573-
compile_error!("unsupported target OS");
1553+
// This returns an error if reading any file or directory within the provided
1554+
// directory fails.
1555+
async fn dir_size(path: &Utf8PathBuf) -> Result<u64, BundleError> {
1556+
let path = path.clone();
1557+
// We could, alternatively, just implement this using `tokio::fs` to read
1558+
// the directory and so on. However, the `tokio::fs` APIs are basically just
1559+
// a wrapper around `spawn_blocking`-ing code that does the `std::fs`
1560+
// functions. So, putting the whole thing in one big `spawn_blocking`
1561+
// closure that just does all the blocking fs ops lets us spend less time
1562+
// creating and destroying tiny blocking tasks.
1563+
tokio::task::spawn_blocking(move || {
1564+
let mut sum = 0;
1565+
for entry in walkdir::WalkDir::new(&path).into_iter() {
1566+
let entry = entry?;
1567+
sum += entry.metadata()?.len()
15741568
}
1575-
}
1576-
const DU: &str = "/usr/bin/du";
1577-
let args = &[DU_ARG, "-s", path.as_str()];
1578-
let output = Command::new(DU).args(args).output().await.map_err(|err| {
1579-
BundleError::Command { cmd: format!("{DU} {}", args.join(" ")), err }
1580-
})?;
1581-
let err = |msg: &str| {
1582-
BundleError::Cleanup(anyhow!(
1583-
"failed to fetch disk usage for {}: {}",
1584-
path,
1585-
msg,
1586-
))
1587-
};
1588-
if !output.status.success() {
1589-
return Err(err("du command failed"));
1590-
}
1591-
let Ok(s) = std::str::from_utf8(&output.stdout) else {
1592-
return Err(err("non-UTF8 stdout"));
1593-
};
1594-
let Some(line) = s.lines().next() else {
1595-
return Err(err("no lines in du output"));
1596-
};
1597-
let Some(part) = line.trim().split_ascii_whitespace().next() else {
1598-
return Err(err("no disk usage size computed in output"));
1599-
};
1600-
part.parse()
1601-
.map(|x: u64| x.saturating_mul(BLOCK_SIZE))
1602-
.map_err(|_| err("failed to parse du output"))
1569+
1570+
Ok(sum)
1571+
})
1572+
.await
1573+
.expect("spawned blocking task should not be cancelled or panic")
16031574
}
16041575

16051576
// Return the quota for a ZFS dataset, or the available size.
@@ -1659,22 +1630,86 @@ async fn zfs_quota(path: &Utf8PathBuf) -> Result<u64, BundleError> {
16591630

16601631
#[cfg(test)]
16611632
mod tests {
1662-
use super::disk_usage;
1663-
use super::Utf8PathBuf;
1633+
use super::*;
16641634

16651635
#[tokio::test]
1666-
async fn test_disk_usage() {
1636+
async fn test_dir_size() {
16671637
let path =
16681638
Utf8PathBuf::from(concat!(env!("CARGO_MANIFEST_DIR"), "/src"));
1669-
let usage = disk_usage(&path).await.unwrap();
1670-
// Run `du -As /path/to/omicron/sled-agent/src`, which currently shows this
1671-
// directory is ~450 KiB.
1639+
let usage = dir_size(&path).await.unwrap();
16721640
assert!(
16731641
usage >= 1024 * 400,
16741642
"sled-agent manifest directory disk usage not correct?"
16751643
);
16761644
let path = Utf8PathBuf::from("/some/nonexistent/path");
1677-
assert!(disk_usage(&path).await.is_err());
1645+
assert!(dir_size(&path).await.is_err());
1646+
}
1647+
1648+
#[tokio::test]
1649+
// Different operating systems ship slightly different versions of `du(1)`,
1650+
// with differing behaviors. We really only care that the `dir_size`
1651+
// function behaves the same as the illumos `du(1)`, so skip this test on
1652+
// other systems.
1653+
#[cfg_attr(not(target_os = "illumos"), ignore)]
1654+
async fn test_dir_size_matches_du() {
1655+
const DU: &str = "du";
1656+
async fn dir_size_du(path: &Utf8PathBuf) -> Result<u64, BundleError> {
1657+
let args = &["-A", "-s", path.as_str()];
1658+
let output =
1659+
Command::new(DU).args(args).output().await.map_err(|err| {
1660+
BundleError::Command {
1661+
cmd: format!("{DU} {}", args.join(" ")),
1662+
err,
1663+
}
1664+
})?;
1665+
let err = |msg: &str| {
1666+
BundleError::Cleanup(anyhow!(
1667+
"failed to fetch disk usage for {}: {}",
1668+
path,
1669+
msg,
1670+
))
1671+
};
1672+
if !output.status.success() {
1673+
return Err(err("du command failed"));
1674+
}
1675+
let Ok(s) = std::str::from_utf8(&output.stdout) else {
1676+
return Err(err("non-UTF8 stdout"));
1677+
};
1678+
let Some(line) = s.lines().next() else {
1679+
return Err(err("no lines in du output"));
1680+
};
1681+
let Some(part) = line.trim().split_ascii_whitespace().next() else {
1682+
return Err(err("no disk usage size computed in output"));
1683+
};
1684+
part.parse().map_err(|_| err("failed to parse du output"))
1685+
}
1686+
1687+
let du_output =
1688+
Command::new(DU).arg("--version").output().await.unwrap();
1689+
eprintln!(
1690+
"du --version:\n{}\n",
1691+
String::from_utf8_lossy(&du_output.stdout)
1692+
);
1693+
1694+
let path =
1695+
Utf8PathBuf::from(concat!(env!("CARGO_MANIFEST_DIR"), "/src"));
1696+
let t0 = std::time::Instant::now();
1697+
let usage =
1698+
dbg!(dir_size(&path).await).expect("disk usage for src dir failed");
1699+
eprintln!("dir_size({path}) took {:?}", t0.elapsed());
1700+
1701+
let t0 = std::time::Instant::now();
1702+
// Run `du -As /path/to/omicron/sled-agent/src`, which currently shows this
1703+
// directory is ~450 KiB.
1704+
let du_usage =
1705+
dbg!(dir_size_du(&path).await).expect("running du failed!");
1706+
eprintln!("du -s {path} took {:?}", t0.elapsed());
1707+
1708+
assert_eq!(
1709+
usage, du_usage,
1710+
"expected `dir_size({path})` to == `du -s {path}`\n\
1711+
{usage}B (`dir_size`) != {du_usage}B (`du`)",
1712+
);
16781713
}
16791714
}
16801715

0 commit comments

Comments
 (0)