@@ -582,6 +582,11 @@ pub enum BundleError {
582582
583583 #[ error( "Instance is terminating" ) ]
584584 InstanceTerminating ,
585+
586+ /// The `walkdir` crate's errors already include the path which could not be
587+ /// read (if one exists), so we can just wrap them directly.
588+ #[ error( transparent) ]
589+ WalkDir ( #[ from] walkdir:: Error ) ,
585590}
586591
587592// Helper function to write an array of bytes into the tar archive, with
@@ -1533,7 +1538,7 @@ async fn compute_bundle_utilization(
15331538 // TODO-correctness: This takes into account the directories themselves,
15341539 // and may be not quite what we want. But it is very easy and pretty
15351540 // close.
1536- let bytes_used = disk_usage ( dir) . await ?;
1541+ let bytes_used = dir_size ( dir) . await ?;
15371542 debug ! ( log, "computed bytes used" ; "bytes_used" => bytes_used) ;
15381543 out. insert (
15391544 dir. clone ( ) ,
@@ -1545,61 +1550,27 @@ async fn compute_bundle_utilization(
15451550
15461551// Return the number of bytes occupied by the provided directory.
15471552//
1548- // This returns an error if:
1549- //
1550- // - The "du" command fails
1551- // - Parsing stdout fails
1552- // - Parsing the actual size as a u64 fails
1553- async fn disk_usage ( path : & Utf8PathBuf ) -> Result < u64 , BundleError > {
1554- // Each OS implements slightly different `du` options.
1555- //
1556- // Linux and illumos support the "apparent" size in bytes, though using
1557- // different options. macOS doesn't support bytes at all, and has a minimum
1558- // block size of 512.
1559- //
1560- // We'll suffer the lower resolution on macOS, and get higher resolution on
1561- // the others.
1562- cfg_if:: cfg_if! {
1563- if #[ cfg( target_os = "illumos" ) ] {
1564- const BLOCK_SIZE : u64 = 1 ;
1565- const DU_ARG : & str = "-A" ;
1566- } else if #[ cfg( target_os = "linux" ) ] {
1567- const BLOCK_SIZE : u64 = 1 ;
1568- const DU_ARG : & str = "-b" ;
1569- } else if #[ cfg( target_os = "macos" ) ] {
1570- const BLOCK_SIZE : u64 = 512 ;
1571- const DU_ARG : & str = "-k" ;
1572- } else {
1573- compile_error!( "unsupported target OS" ) ;
1553+ // This returns an error if reading any file or directory within the provided
1554+ // directory fails.
1555+ async fn dir_size ( path : & Utf8PathBuf ) -> Result < u64 , BundleError > {
1556+ let path = path. clone ( ) ;
1557+ // We could, alternatively, just implement this using `tokio::fs` to read
1558+ // the directory and so on. However, the `tokio::fs` APIs are basically just
1559+ // a wrapper around `spawn_blocking`-ing code that does the `std::fs`
1560+ // functions. So, putting the whole thing in one big `spawn_blocking`
1561+ // closure that just does all the blocking fs ops lets us spend less time
1562+ // creating and destroying tiny blocking tasks.
1563+ tokio:: task:: spawn_blocking ( move || {
1564+ let mut sum = 0 ;
1565+ for entry in walkdir:: WalkDir :: new ( & path) . into_iter ( ) {
1566+ let entry = entry?;
1567+ sum += entry. metadata ( ) ?. len ( )
15741568 }
1575- }
1576- const DU : & str = "/usr/bin/du" ;
1577- let args = & [ DU_ARG , "-s" , path. as_str ( ) ] ;
1578- let output = Command :: new ( DU ) . args ( args) . output ( ) . await . map_err ( |err| {
1579- BundleError :: Command { cmd : format ! ( "{DU} {}" , args. join( " " ) ) , err }
1580- } ) ?;
1581- let err = |msg : & str | {
1582- BundleError :: Cleanup ( anyhow ! (
1583- "failed to fetch disk usage for {}: {}" ,
1584- path,
1585- msg,
1586- ) )
1587- } ;
1588- if !output. status . success ( ) {
1589- return Err ( err ( "du command failed" ) ) ;
1590- }
1591- let Ok ( s) = std:: str:: from_utf8 ( & output. stdout ) else {
1592- return Err ( err ( "non-UTF8 stdout" ) ) ;
1593- } ;
1594- let Some ( line) = s. lines ( ) . next ( ) else {
1595- return Err ( err ( "no lines in du output" ) ) ;
1596- } ;
1597- let Some ( part) = line. trim ( ) . split_ascii_whitespace ( ) . next ( ) else {
1598- return Err ( err ( "no disk usage size computed in output" ) ) ;
1599- } ;
1600- part. parse ( )
1601- . map ( |x : u64 | x. saturating_mul ( BLOCK_SIZE ) )
1602- . map_err ( |_| err ( "failed to parse du output" ) )
1569+
1570+ Ok ( sum)
1571+ } )
1572+ . await
1573+ . expect ( "spawned blocking task should not be cancelled or panic" )
16031574}
16041575
16051576// Return the quota for a ZFS dataset, or the available size.
@@ -1659,22 +1630,86 @@ async fn zfs_quota(path: &Utf8PathBuf) -> Result<u64, BundleError> {
16591630
16601631#[ cfg( test) ]
16611632mod tests {
1662- use super :: disk_usage;
1663- use super :: Utf8PathBuf ;
1633+ use super :: * ;
16641634
16651635 #[ tokio:: test]
1666- async fn test_disk_usage ( ) {
1636+ async fn test_dir_size ( ) {
16671637 let path =
16681638 Utf8PathBuf :: from ( concat ! ( env!( "CARGO_MANIFEST_DIR" ) , "/src" ) ) ;
1669- let usage = disk_usage ( & path) . await . unwrap ( ) ;
1670- // Run `du -As /path/to/omicron/sled-agent/src`, which currently shows this
1671- // directory is ~450 KiB.
1639+ let usage = dir_size ( & path) . await . unwrap ( ) ;
16721640 assert ! (
16731641 usage >= 1024 * 400 ,
16741642 "sled-agent manifest directory disk usage not correct?"
16751643 ) ;
16761644 let path = Utf8PathBuf :: from ( "/some/nonexistent/path" ) ;
1677- assert ! ( disk_usage( & path) . await . is_err( ) ) ;
1645+ assert ! ( dir_size( & path) . await . is_err( ) ) ;
1646+ }
1647+
1648+ #[ tokio:: test]
1649+ // Different operating systems ship slightly different versions of `du(1)`,
1650+ // with differing behaviors. We really only care that the `dir_size`
1651+ // function behaves the same as the illumos `du(1)`, so skip this test on
1652+ // other systems.
1653+ #[ cfg_attr( not( target_os = "illumos" ) , ignore) ]
1654+ async fn test_dir_size_matches_du ( ) {
1655+ const DU : & str = "du" ;
1656+ async fn dir_size_du ( path : & Utf8PathBuf ) -> Result < u64 , BundleError > {
1657+ let args = & [ "-A" , "-s" , path. as_str ( ) ] ;
1658+ let output =
1659+ Command :: new ( DU ) . args ( args) . output ( ) . await . map_err ( |err| {
1660+ BundleError :: Command {
1661+ cmd : format ! ( "{DU} {}" , args. join( " " ) ) ,
1662+ err,
1663+ }
1664+ } ) ?;
1665+ let err = |msg : & str | {
1666+ BundleError :: Cleanup ( anyhow ! (
1667+ "failed to fetch disk usage for {}: {}" ,
1668+ path,
1669+ msg,
1670+ ) )
1671+ } ;
1672+ if !output. status . success ( ) {
1673+ return Err ( err ( "du command failed" ) ) ;
1674+ }
1675+ let Ok ( s) = std:: str:: from_utf8 ( & output. stdout ) else {
1676+ return Err ( err ( "non-UTF8 stdout" ) ) ;
1677+ } ;
1678+ let Some ( line) = s. lines ( ) . next ( ) else {
1679+ return Err ( err ( "no lines in du output" ) ) ;
1680+ } ;
1681+ let Some ( part) = line. trim ( ) . split_ascii_whitespace ( ) . next ( ) else {
1682+ return Err ( err ( "no disk usage size computed in output" ) ) ;
1683+ } ;
1684+ part. parse ( ) . map_err ( |_| err ( "failed to parse du output" ) )
1685+ }
1686+
1687+ let du_output =
1688+ Command :: new ( DU ) . arg ( "--version" ) . output ( ) . await . unwrap ( ) ;
1689+ eprintln ! (
1690+ "du --version:\n {}\n " ,
1691+ String :: from_utf8_lossy( & du_output. stdout)
1692+ ) ;
1693+
1694+ let path =
1695+ Utf8PathBuf :: from ( concat ! ( env!( "CARGO_MANIFEST_DIR" ) , "/src" ) ) ;
1696+ let t0 = std:: time:: Instant :: now ( ) ;
1697+ let usage =
1698+ dbg ! ( dir_size( & path) . await ) . expect ( "disk usage for src dir failed" ) ;
1699+ eprintln ! ( "dir_size({path}) took {:?}" , t0. elapsed( ) ) ;
1700+
1701+ let t0 = std:: time:: Instant :: now ( ) ;
1702+ // Run `du -As /path/to/omicron/sled-agent/src`, which currently shows this
1703+ // directory is ~450 KiB.
1704+ let du_usage =
1705+ dbg ! ( dir_size_du( & path) . await ) . expect ( "running du failed!" ) ;
1706+ eprintln ! ( "du -s {path} took {:?}" , t0. elapsed( ) ) ;
1707+
1708+ assert_eq ! (
1709+ usage, du_usage,
1710+ "expected `dir_size({path})` to == `du -s {path}`\n \
1711+ {usage}B (`dir_size`) != {du_usage}B (`du`)",
1712+ ) ;
16781713 }
16791714}
16801715
0 commit comments