From 9c19ebc99f77c31247b297d3b030943d83f96922 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 12 May 2025 15:43:01 -0700 Subject: [PATCH 1/5] Make dladm commands async --- illumos-utils/src/dladm.rs | 155 ++++++++++++++----------- illumos-utils/src/fakes/dladm.rs | 10 +- illumos-utils/src/lib.rs | 19 ++- illumos-utils/src/link.rs | 16 +-- illumos-utils/src/running_zone.rs | 17 +-- installinator/src/bootstrap.rs | 10 +- sled-agent/src/bootstrap/pre_server.rs | 55 +++++---- sled-agent/src/config.rs | 5 +- sled-agent/src/probe_manager.rs | 2 +- sled-agent/src/services.rs | 21 ++-- sled-agent/src/sled_agent.rs | 6 +- sled-hardware/src/cleanup.rs | 34 +++--- sled-hardware/src/underlay.rs | 13 ++- sled-hardware/types/src/underlay.rs | 4 +- 14 files changed, 215 insertions(+), 152 deletions(-) diff --git a/illumos-utils/src/dladm.rs b/illumos-utils/src/dladm.rs index 226d1be309f..5645b6aa5c9 100644 --- a/illumos-utils/src/dladm.rs +++ b/illumos-utils/src/dladm.rs @@ -6,12 +6,13 @@ use crate::link::{Link, LinkKind}; use crate::zone::IPADM; -use crate::{ExecutionError, PFEXEC, execute}; +use crate::{ExecutionError, PFEXEC, execute_async}; use omicron_common::api::external::MacAddr; use omicron_common::vlan::VlanID; use serde::{Deserialize, Serialize}; use std::str::FromStr; use std::str::Utf8Error; +use tokio::process::Command; pub const VNIC_PREFIX: &str = "ox"; pub const VNIC_PREFIX_CONTROL: &str = "oxControl"; @@ -153,7 +154,7 @@ pub struct Etherstub(pub String); pub struct EtherstubVnic(pub String); /// Identifies that an object may be used to create a VNIC. -pub trait VnicSource { +pub trait VnicSource: Send + Sync { fn name(&self) -> &str; } @@ -175,6 +176,7 @@ pub struct Dladm(()); /// Describes the API for interfacing with Data links. /// /// This is a trait so that it can be faked out for tests. +#[async_trait::async_trait] pub trait Api: Send + Sync { /// Creates a new VNIC atop a physical device. /// @@ -183,7 +185,7 @@ pub trait Api: Send + Sync { /// * `vnic_name`: Exact name of the VNIC to be created. /// * `mac`: An optional unicast MAC address for the newly created NIC. /// * `vlan`: An optional VLAN ID for VLAN tagging. - fn create_vnic( + async fn create_vnic( &self, source: &(dyn VnicSource + 'static), vnic_name: &str, @@ -191,7 +193,7 @@ pub trait Api: Send + Sync { vlan: Option, mtu: usize, ) -> Result<(), CreateVnicError> { - let mut command = std::process::Command::new(PFEXEC); + let mut command = Command::new(PFEXEC); let mut args = vec![ DLADM.to_string(), "create-vnic".to_string(), @@ -216,7 +218,7 @@ pub trait Api: Send + Sync { args.push(vnic_name.to_string()); let cmd = command.args(&args); - execute(cmd).map_err(|err| CreateVnicError { + execute_async(cmd).await.map_err(|err| CreateVnicError { name: vnic_name.to_string(), link: source.name().to_string(), err, @@ -226,7 +228,7 @@ pub trait Api: Send + Sync { // the mtu to N. Set it here using `set-linkprop`. // // See https://www.illumos.org/issues/15695 for the illumos bug. - let mut command = std::process::Command::new(PFEXEC); + let mut command = Command::new(PFEXEC); let prop = format!("mtu={}", mtu); let cmd = command.args(&[ DLADM, @@ -236,7 +238,7 @@ pub trait Api: Send + Sync { &prop, vnic_name, ]); - execute(cmd).map_err(|err| CreateVnicError { + execute_async(cmd).await.map_err(|err| CreateVnicError { name: vnic_name.to_string(), link: source.name().to_string(), err, @@ -246,10 +248,13 @@ pub trait Api: Send + Sync { } /// Verify that the given link exists - fn verify_link(&self, link: &str) -> Result { - let mut command = std::process::Command::new(PFEXEC); + async fn verify_link( + &self, + link: &str, + ) -> Result { + let mut command = Command::new(PFEXEC); let cmd = command.args(&[DLADM, "show-link", "-p", "-o", "LINK", link]); - let output = execute(cmd)?; + let output = execute_async(cmd).await?; match String::from_utf8_lossy(&output.stdout) .lines() .next() @@ -261,10 +266,11 @@ pub trait Api: Send + Sync { } /// Remove a vnic from the sled. - fn delete_vnic(&self, name: &str) -> Result<(), DeleteVnicError> { - let mut command = std::process::Command::new(PFEXEC); + async fn delete_vnic(&self, name: &str) -> Result<(), DeleteVnicError> { + let mut command = Command::new(PFEXEC); let cmd = command.args(&[DLADM, "delete-vnic", name]); - execute(cmd) + execute_async(cmd) + .await .map_err(|err| DeleteVnicError { name: name.to_string(), err })?; Ok(()) } @@ -282,21 +288,23 @@ impl Dladm { } /// Creates an etherstub, or returns one which already exists. - pub fn ensure_etherstub(name: &str) -> Result { - if let Ok(stub) = Self::get_etherstub(name) { + pub async fn ensure_etherstub( + name: &str, + ) -> Result { + if let Ok(stub) = Self::get_etherstub(name).await { return Ok(stub); } - let mut command = std::process::Command::new(PFEXEC); + let mut command = Command::new(PFEXEC); let cmd = command.args(&[DLADM, "create-etherstub", "-t", name]); - execute(cmd)?; + execute_async(cmd).await?; Ok(Etherstub(name.to_string())) } /// Finds an etherstub. - fn get_etherstub(name: &str) -> Result { - let mut command = std::process::Command::new(PFEXEC); + async fn get_etherstub(name: &str) -> Result { + let mut command = Command::new(PFEXEC); let cmd = command.args(&[DLADM, "show-etherstub", name]); - execute(cmd)?; + execute_async(cmd).await?; Ok(Etherstub(name.to_string())) } @@ -304,7 +312,7 @@ impl Dladm { /// /// This VNIC is not tracked like [`crate::link::Link`], because /// it is expected to exist for the lifetime of the sled. - pub fn ensure_etherstub_vnic( + pub async fn ensure_etherstub_vnic( source: &Etherstub, ) -> Result { let (vnic_name, mtu) = match source.0.as_str() { @@ -312,65 +320,73 @@ impl Dladm { BOOTSTRAP_ETHERSTUB_NAME => (BOOTSTRAP_ETHERSTUB_VNIC_NAME, 1500), _ => unreachable!(), }; - if let Ok(vnic) = Self::get_etherstub_vnic(vnic_name) { + if let Ok(vnic) = Self::get_etherstub_vnic(vnic_name).await { return Ok(vnic); } - Self::real_api().create_vnic(source, vnic_name, None, None, mtu)?; + Self::real_api() + .create_vnic(source, vnic_name, None, None, mtu) + .await?; Ok(EtherstubVnic(vnic_name.to_string())) } - fn get_etherstub_vnic(name: &str) -> Result { - let mut command = std::process::Command::new(PFEXEC); + async fn get_etherstub_vnic( + name: &str, + ) -> Result { + let mut command = Command::new(PFEXEC); let cmd = command.args(&[DLADM, "show-vnic", name]); - execute(cmd)?; + execute_async(cmd).await?; Ok(EtherstubVnic(name.to_string())) } // Return the name of the IP interface over the etherstub VNIC, if it // exists. - fn get_etherstub_vnic_interface( + async fn get_etherstub_vnic_interface( name: &str, ) -> Result { - let mut cmd = std::process::Command::new(PFEXEC); + let mut cmd = Command::new(PFEXEC); let cmd = cmd.args(&[IPADM, "show-if", "-p", "-o", "IFNAME", name]); - execute(cmd)?; + execute_async(cmd).await?; Ok(name.to_string()) } /// Delete the VNIC over the inter-zone comms etherstub. - pub fn delete_etherstub_vnic(name: &str) -> Result<(), ExecutionError> { + pub async fn delete_etherstub_vnic( + name: &str, + ) -> Result<(), ExecutionError> { // It's not clear why, but this requires deleting the _interface_ that's // over the VNIC first. Other VNICs don't require this for some reason. - if Self::get_etherstub_vnic_interface(name).is_ok() { - let mut cmd = std::process::Command::new(PFEXEC); + if Self::get_etherstub_vnic_interface(name).await.is_ok() { + let mut cmd = Command::new(PFEXEC); let cmd = cmd.args(&[IPADM, "delete-if", name]); - execute(cmd)?; + execute_async(cmd).await?; } - if Self::get_etherstub_vnic(name).is_ok() { - let mut cmd = std::process::Command::new(PFEXEC); + if Self::get_etherstub_vnic(name).await.is_ok() { + let mut cmd = Command::new(PFEXEC); let cmd = cmd.args(&[DLADM, "delete-vnic", name]); - execute(cmd)?; + execute_async(cmd).await?; } Ok(()) } /// Delete the inter-zone comms etherstub. - pub fn delete_etherstub(name: &str) -> Result<(), ExecutionError> { - if Self::get_etherstub(name).is_ok() { - let mut cmd = std::process::Command::new(PFEXEC); + pub async fn delete_etherstub(name: &str) -> Result<(), ExecutionError> { + if Self::get_etherstub(name).await.is_ok() { + let mut cmd = Command::new(PFEXEC); let cmd = cmd.args(&[DLADM, "delete-etherstub", name]); - execute(cmd)?; + execute_async(cmd).await?; } Ok(()) } /// Returns the name of the first observed physical data link. - pub fn find_physical() -> Result { + pub async fn find_physical() -> Result + { // TODO: This is arbitrary, but we're currently grabbing the first // physical device. Should we have a more sophisticated method for // selection? - Self::list_physical()? + Self::list_physical() + .await? .into_iter() .next() .ok_or_else(|| FindPhysicalLinkError::NoPhysicalLinkFound) @@ -379,10 +395,11 @@ impl Dladm { /// List the extant physical data links on the system. /// /// Note that this returns _all_ links. - pub fn list_physical() -> Result, FindPhysicalLinkError> { - let mut command = std::process::Command::new(PFEXEC); + pub async fn list_physical() + -> Result, FindPhysicalLinkError> { + let mut command = Command::new(PFEXEC); let cmd = command.args(&[DLADM, "show-phys", "-p", "-o", "LINK"]); - let output = execute(cmd)?; + let output = execute_async(cmd).await?; std::str::from_utf8(&output.stdout) .map_err(FindPhysicalLinkError::NonUtf8Output) .map(|stdout| { @@ -394,8 +411,8 @@ impl Dladm { } /// Returns the MAC address of a physical link. - pub fn get_mac(link: &PhysicalLink) -> Result { - let mut command = std::process::Command::new(PFEXEC); + pub async fn get_mac(link: &PhysicalLink) -> Result { + let mut command = Command::new(PFEXEC); let cmd = command.args(&[ DLADM, "show-phys", @@ -405,7 +422,7 @@ impl Dladm { "ADDRESS", &link.0, ]); - let output = execute(cmd)?; + let output = execute_async(cmd).await?; let name = String::from_utf8_lossy(&output.stdout) .lines() .next() @@ -425,10 +442,11 @@ impl Dladm { } /// Returns VNICs that may be managed by the Sled Agent. - pub fn get_vnics() -> Result, GetVnicError> { - let mut command = std::process::Command::new(PFEXEC); + pub async fn get_vnics() -> Result, GetVnicError> { + let mut command = Command::new(PFEXEC); let cmd = command.args(&[DLADM, "show-vnic", "-p", "-o", "LINK"]); - let output = execute(cmd).map_err(|err| GetVnicError { err })?; + let output = + execute_async(cmd).await.map_err(|err| GetVnicError { err })?; let vnics = String::from_utf8_lossy(&output.stdout) .lines() @@ -445,10 +463,12 @@ impl Dladm { } /// Returns simnet links masquerading as tfport devices - pub fn get_simulated_tfports() -> Result, GetSimnetError> { - let mut command = std::process::Command::new(PFEXEC); + pub async fn get_simulated_tfports() -> Result, GetSimnetError> + { + let mut command = Command::new(PFEXEC); let cmd = command.args(&[DLADM, "show-simnet", "-p", "-o", "LINK"]); - let output = execute(cmd).map_err(|err| GetSimnetError { err })?; + let output = + execute_async(cmd).await.map_err(|err| GetSimnetError { err })?; let tfports = String::from_utf8_lossy(&output.stdout) .lines() @@ -464,11 +484,11 @@ impl Dladm { } /// Get a link property value on a VNIC - pub fn get_linkprop( + pub async fn get_linkprop( vnic: &str, prop_name: &str, ) -> Result { - let mut command = std::process::Command::new(PFEXEC); + let mut command = Command::new(PFEXEC); let cmd = command.args(&[ DLADM, "show-linkprop", @@ -479,24 +499,25 @@ impl Dladm { prop_name, vnic, ]); - let result = execute(cmd).map_err(|err| GetLinkpropError { - link_name: vnic.to_string(), - prop_name: prop_name.to_string(), - err, - })?; + let result = + execute_async(cmd).await.map_err(|err| GetLinkpropError { + link_name: vnic.to_string(), + prop_name: prop_name.to_string(), + err, + })?; Ok(String::from_utf8_lossy(&result.stdout).into_owned()) } /// Set a link property on a VNIC - pub fn set_linkprop( + pub async fn set_linkprop( vnic: &str, prop_name: &str, prop_value: &str, ) -> Result<(), SetLinkpropError> { - let mut command = std::process::Command::new(PFEXEC); + let mut command = Command::new(PFEXEC); let prop = format!("{}={}", prop_name, prop_value); let cmd = command.args(&[DLADM, "set-linkprop", "-t", "-p", &prop, vnic]); - execute(cmd).map_err(|err| SetLinkpropError { + execute_async(cmd).await.map_err(|err| SetLinkpropError { link_name: vnic.to_string(), prop_name: prop_name.to_string(), prop_value: prop_value.to_string(), @@ -506,11 +527,11 @@ impl Dladm { } /// Reset a link property on a VNIC - pub fn reset_linkprop( + pub async fn reset_linkprop( vnic: &str, prop_name: &str, ) -> Result<(), ResetLinkpropError> { - let mut command = std::process::Command::new(PFEXEC); + let mut command = Command::new(PFEXEC); let cmd = command.args(&[ DLADM, "reset-linkprop", @@ -519,7 +540,7 @@ impl Dladm { prop_name, vnic, ]); - execute(cmd).map_err(|err| ResetLinkpropError { + execute_async(cmd).await.map_err(|err| ResetLinkpropError { link_name: vnic.to_string(), prop_name: prop_name.to_string(), err, diff --git a/illumos-utils/src/fakes/dladm.rs b/illumos-utils/src/fakes/dladm.rs index c70b2df842f..a131b7e0815 100644 --- a/illumos-utils/src/fakes/dladm.rs +++ b/illumos-utils/src/fakes/dladm.rs @@ -24,8 +24,9 @@ impl Dladm { } } +#[async_trait::async_trait] impl Api for Dladm { - fn create_vnic( + async fn create_vnic( &self, _source: &(dyn VnicSource + 'static), _vnic_name: &str, @@ -36,11 +37,14 @@ impl Api for Dladm { Ok(()) } - fn verify_link(&self, link: &str) -> Result { + async fn verify_link( + &self, + link: &str, + ) -> Result { Ok(Link::wrap_physical(link)) } - fn delete_vnic(&self, _name: &str) -> Result<(), DeleteVnicError> { + async fn delete_vnic(&self, _name: &str) -> Result<(), DeleteVnicError> { Ok(()) } } diff --git a/illumos-utils/src/lib.rs b/illumos-utils/src/lib.rs index 811a17dbe1a..ce1404f09a3 100644 --- a/illumos-utils/src/lib.rs +++ b/illumos-utils/src/lib.rs @@ -87,7 +87,7 @@ impl From for HttpError { } } -fn command_to_string(command: &mut std::process::Command) -> String { +fn command_to_string(command: &std::process::Command) -> String { command .get_args() .map(|s| s.to_string_lossy().into()) @@ -128,3 +128,20 @@ pub fn execute( Ok(output) } + +pub async fn execute_async( + command: &mut tokio::process::Command, +) -> Result { + let output = command.output().await.map_err(|err| { + ExecutionError::ExecutionStart { + command: command_to_string(command.as_std()), + err, + } + })?; + + if !output.status.success() { + return Err(output_to_exec_error(command.as_std(), &output)); + } + + Ok(output) +} diff --git a/illumos-utils/src/link.rs b/illumos-utils/src/link.rs index 0c7c5ff7c00..aea9e33c8df 100644 --- a/illumos-utils/src/link.rs +++ b/illumos-utils/src/link.rs @@ -56,7 +56,7 @@ impl VnicAllocator
{ /// Creates a new NIC, intended for allowing Propolis to communicate /// with the control plane. - pub fn new_control( + pub async fn new_control( &self, mac: Option, ) -> Result { @@ -64,7 +64,7 @@ impl VnicAllocator
{ let name = allocator.next(); debug_assert!(name.starts_with(VNIC_PREFIX)); debug_assert!(name.starts_with(VNIC_PREFIX_CONTROL)); - self.dladm.create_vnic(&self.data_link, &name, mac, None, 9000)?; + self.dladm.create_vnic(&self.data_link, &name, mac, None, 9000).await?; Ok(Link { name, deleted: false, @@ -101,9 +101,11 @@ impl VnicAllocator
{ } } - pub fn new_bootstrap(&self) -> Result { + pub async fn new_bootstrap(&self) -> Result { let name = self.next(); - self.dladm.create_vnic(&self.data_link, &name, None, None, 1500)?; + self.dladm + .create_vnic(&self.data_link, &name, None, None, 1500) + .await?; Ok(Link { name, deleted: false, @@ -198,11 +200,11 @@ impl Link { } /// Deletes a NIC (if it has not already been deleted). - pub fn delete(&mut self) -> Result<(), DeleteVnicError> { + pub async fn delete(&mut self) -> Result<(), DeleteVnicError> { if self.deleted || self.kind == LinkKind::Physical { Ok(()) } else { - self.api.as_ref().unwrap().delete_vnic(&self.name)?; + self.api.as_ref().unwrap().delete_vnic(&self.name).await?; self.deleted = true; Ok(()) } @@ -243,7 +245,7 @@ struct VnicDestruction { impl Deletable for VnicDestruction { async fn delete(&self) -> Result<(), anyhow::Error> { if let Some(api) = self.api.as_ref() { - api.delete_vnic(&self.name)?; + api.delete_vnic(&self.name).await?; } Ok(()) } diff --git a/illumos-utils/src/running_zone.rs b/illumos-utils/src/running_zone.rs index a90a7350940..86d9acadaea 100644 --- a/illumos-utils/src/running_zone.rs +++ b/illumos-utils/src/running_zone.rs @@ -1186,7 +1186,7 @@ impl<'a> ZoneBuilder<'a> { } // (used in unit tests) - fn fake_install(mut self) -> Result { + async fn fake_install(mut self) -> Result { let zones_api = self.zones_api.take().unwrap(); let zone = self .zone_type @@ -1196,6 +1196,7 @@ impl<'a> ZoneBuilder<'a> { .underlay_vnic_allocator .ok_or(InstallZoneError::IncompleteBuilder)? .new_control(None) + .await .map_err(move |err| InstallZoneError::CreateVnic { zone, err })?; let fake_cfg = self.fake_cfg.unwrap(); let temp_dir = fake_cfg.temp_dir; @@ -1233,7 +1234,7 @@ impl<'a> ZoneBuilder<'a> { /// parameter was not provided. pub async fn install(mut self) -> Result { if self.fake_cfg.is_some() { - return self.fake_install(); + return self.fake_install().await; } let Self { @@ -1258,12 +1259,12 @@ impl<'a> ZoneBuilder<'a> { return Err(InstallZoneError::IncompleteBuilder); }; - let control_vnic = - underlay_vnic_allocator.new_control(None).map_err(|err| { - InstallZoneError::CreateVnic { - zone: zone_type.to_string(), - err, - } + let control_vnic = underlay_vnic_allocator + .new_control(None) + .await + .map_err(|err| InstallZoneError::CreateVnic { + zone: zone_type.to_string(), + err, })?; let full_zone_name = diff --git a/installinator/src/bootstrap.rs b/installinator/src/bootstrap.rs index 734c5c8289f..b9e58fbcdac 100644 --- a/installinator/src/bootstrap.rs +++ b/installinator/src/bootstrap.rs @@ -36,6 +36,7 @@ pub(crate) async fn bootstrap_sled( ) -> Result<()> { // Find address objects to pass to maghemite. let links = underlay::find_chelsio_links(data_links) + .await .context("failed to find chelsio links")?; ensure!( !links.is_empty(), @@ -59,17 +60,18 @@ pub(crate) async fn bootstrap_sled( // Set up an interface for our bootstrap network. let bootstrap_etherstub = Dladm::ensure_etherstub(dladm::BOOTSTRAP_ETHERSTUB_NAME) + .await .context("failed to ensure bootstrap etherstub existence")?; let bootstrap_etherstub_vnic = Dladm::ensure_etherstub_vnic(&bootstrap_etherstub) + .await .context("failed to ensure bootstrap etherstub vnic existence")?; // Use the mac address of the first link to derive our bootstrap address. - let ip = - BootstrapInterface::GlobalZone.ip(&links[0]).with_context(|| { - format!("failed to derive a bootstrap prefix from {:?}", links[0]) - })?; + let ip = BootstrapInterface::GlobalZone.ip(&links[0]).await.with_context( + || format!("failed to derive a bootstrap prefix from {:?}", links[0]), + )?; Zones::ensure_has_global_zone_v6_address( bootstrap_etherstub_vnic, diff --git a/sled-agent/src/bootstrap/pre_server.rs b/sled-agent/src/bootstrap/pre_server.rs index fe456bb1f5e..7a56c187e4e 100644 --- a/sled-agent/src/bootstrap/pre_server.rs +++ b/sled-agent/src/bootstrap/pre_server.rs @@ -74,12 +74,13 @@ impl BootstrapAgentStartup { // Perform several blocking startup tasks first; we move `config` and // `log` into this task, and on success, it gives them back to us. let (config, log, startup_networking) = - tokio::task::spawn_blocking(move || { - enable_mg_ddm(&config, &log)?; + tokio::task::spawn_blocking(|| async move { + enable_mg_ddm(&config, &log).await?; pumpkind::enable_pumpkind_service(&log)?; ensure_zfs_key_directory_exists(&log)?; - let startup_networking = BootstrapNetworking::setup(&config)?; + let startup_networking = + BootstrapNetworking::setup(&config).await?; // Before we create the switch zone, we need to ensure that the // necessary ZFS and Zone resources are ready. All other zones @@ -89,7 +90,8 @@ impl BootstrapAgentStartup { Ok::<_, StartError>((config, log, startup_networking)) }) .await - .unwrap()?; + .unwrap() + .await?; // Start the DDM reconciler, giving it our bootstrap subnet to // advertise to other sleds. @@ -246,9 +248,13 @@ async fn cleanup_all_old_global_state(log: &Logger) -> Result<(), StartError> { Ok(()) } -fn enable_mg_ddm(config: &Config, log: &Logger) -> Result<(), StartError> { +async fn enable_mg_ddm( + config: &Config, + log: &Logger, +) -> Result<(), StartError> { info!(log, "finding links {:?}", config.data_links); let mg_addr_objs = underlay::find_nics(&config.data_links) + .await .map_err(StartError::FindMaghemiteAddrObjs)?; if mg_addr_objs.is_empty() { return Err(StartError::NoUnderlayAddrObjs); @@ -344,21 +350,23 @@ pub(crate) struct BootstrapNetworking { } impl BootstrapNetworking { - fn setup(config: &Config) -> Result { - let link_for_mac = config.get_link().map_err(StartError::ConfigLink)?; + async fn setup(config: &Config) -> Result { + let link_for_mac = + config.get_link().await.map_err(StartError::ConfigLink)?; let global_zone_bootstrap_ip = BootstrapInterface::GlobalZone .ip(&link_for_mac) + .await .map_err(StartError::BootstrapLinkMac)?; - let bootstrap_etherstub = Dladm::ensure_etherstub( - dladm::BOOTSTRAP_ETHERSTUB_NAME, - ) - .map_err(|err| StartError::EnsureEtherstubError { - name: dladm::BOOTSTRAP_ETHERSTUB_NAME, - err, - })?; + let bootstrap_etherstub = + Dladm::ensure_etherstub(dladm::BOOTSTRAP_ETHERSTUB_NAME) + .await + .map_err(|err| StartError::EnsureEtherstubError { + name: dladm::BOOTSTRAP_ETHERSTUB_NAME, + err, + })?; let bootstrap_etherstub_vnic = - Dladm::ensure_etherstub_vnic(&bootstrap_etherstub)?; + Dladm::ensure_etherstub_vnic(&bootstrap_etherstub).await?; Zones::ensure_has_global_zone_v6_address( bootstrap_etherstub_vnic.clone(), @@ -385,17 +393,18 @@ impl BootstrapNetworking { let switch_zone_bootstrap_ip = BootstrapInterface::SwitchZone .ip(&link_for_mac) + .await .map_err(StartError::BootstrapLinkMac)?; - let underlay_etherstub = Dladm::ensure_etherstub( - dladm::UNDERLAY_ETHERSTUB_NAME, - ) - .map_err(|err| StartError::EnsureEtherstubError { - name: dladm::UNDERLAY_ETHERSTUB_NAME, - err, - })?; + let underlay_etherstub = + Dladm::ensure_etherstub(dladm::UNDERLAY_ETHERSTUB_NAME) + .await + .map_err(|err| StartError::EnsureEtherstubError { + name: dladm::UNDERLAY_ETHERSTUB_NAME, + err, + })?; let underlay_etherstub_vnic = - Dladm::ensure_etherstub_vnic(&underlay_etherstub)?; + Dladm::ensure_etherstub_vnic(&underlay_etherstub).await?; Ok(Self { bootstrap_etherstub, diff --git a/sled-agent/src/config.rs b/sled-agent/src/config.rs index 1ae676f8491..1c6acb1b8a5 100644 --- a/sled-agent/src/config.rs +++ b/sled-agent/src/config.rs @@ -154,12 +154,13 @@ impl Config { Ok(config) } - pub fn get_link(&self) -> Result { + pub async fn get_link(&self) -> Result { if let Some(link) = self.data_link.as_ref() { Ok(link.clone()) } else { if is_gimlet().map_err(ConfigError::SystemDetection)? { Dladm::list_physical() + .await .map_err(ConfigError::FindLinks)? .into_iter() .find(|link| link.0.starts_with(CHELSIO_LINK_PREFIX)) @@ -169,7 +170,7 @@ impl Config { ) }) } else { - Dladm::find_physical().map_err(ConfigError::FindLinks) + Dladm::find_physical().await.map_err(ConfigError::FindLinks) } } } diff --git a/sled-agent/src/probe_manager.rs b/sled-agent/src/probe_manager.rs index 7096a15769b..75729e3a872 100644 --- a/sled-agent/src/probe_manager.rs +++ b/sled-agent/src/probe_manager.rs @@ -405,7 +405,7 @@ impl ProbeManagerInner { // TODO-correctness: There are no physical links in the zone, is // this intended to delete the control VNIC? for l in running_zone.links_mut() { - if let Err(e) = l.delete() { + if let Err(e) = l.delete().await { error!(self.log, "delete probe link {}: {e}", l.name()); } } diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 4396f77f375..3b30746da4f 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -677,7 +677,7 @@ impl<'a> ZoneArgs<'a> { /// supposed to be running pub fn switch_zone_services( &self, - ) -> Box + 'a> { + ) -> Box + Send + 'a> { match self { ZoneArgs::Omicron(_) => Box::new(std::iter::empty()), ZoneArgs::Switch(request) => Box::new(request.services.iter()), @@ -1266,7 +1266,7 @@ impl ServiceManager { // // No other zone besides the switch and global zones should have a // bootstrap address. - fn bootstrap_address_needed( + async fn bootstrap_address_needed( &self, zone_args: &ZoneArgs<'_>, ) -> Result, Error> { @@ -1275,6 +1275,7 @@ impl ServiceManager { .inner .bootstrap_vnic_allocator .new_bootstrap() + .await .map_err(Error::SwitchZoneVnicCreation)?; Ok(Some((link, self.inner.switch_zone_bootstrap_address))) } else { @@ -1301,7 +1302,7 @@ impl ServiceManager { // physical links or vnics need to be mapped into the zone when it is // created. Returns a list of links, plus whether or not they need link // local addresses in the zone. - fn links_needed( + async fn links_needed( &self, zone_args: &ZoneArgs<'_>, ) -> Result, Error> { @@ -1317,7 +1318,12 @@ impl ServiceManager { // The tfport service requires a MAC device to/from which // sidecar packets may be multiplexed. If the link isn't // present, don't bother trying to start the zone. - match self.inner.system_api.dladm().verify_link(pkt_source) + match self + .inner + .system_api + .dladm() + .verify_link(pkt_source) + .await { Ok(link) => { // It's important that tfpkt does **not** receive a @@ -1342,6 +1348,7 @@ impl ServiceManager { .system_api .dladm() .verify_link(&link.to_string()) + .await { Ok(link) => { // Link local addresses should be created in the @@ -1678,7 +1685,7 @@ impl ServiceManager { ) -> Result { let device_names = Self::devices_needed(&request)?; let (bootstrap_vnic, bootstrap_name_and_address) = - match self.bootstrap_address_needed(&request)? { + match self.bootstrap_address_needed(&request).await? { Some((vnic, address)) => { let name = vnic.name().to_string(); (Some(vnic), Some((name, address))) @@ -1691,7 +1698,7 @@ impl ServiceManager { let links: Vec; let links_need_link_local: Vec; (links, links_need_link_local) = - self.links_needed(&request)?.into_iter().unzip(); + self.links_needed(&request).await?.into_iter().unzip(); let opte_ports = self.opte_ports_needed(&request).await?; let limit_priv = Self::privs_needed(&request); @@ -4264,7 +4271,7 @@ impl ServiceManager { ..Default::default() }; filesystems.push(softnpu_filesystem); - data_links = Dladm::get_simulated_tfports()?; + data_links = Dladm::get_simulated_tfports().await?; } vec![ SwitchService::Dendrite { asic }, diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 5b6d248a4ff..7071b1a1b4b 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -455,8 +455,10 @@ impl SledAgent { let etherstub = Dladm::ensure_etherstub( illumos_utils::dladm::UNDERLAY_ETHERSTUB_NAME, ) + .await .map_err(|e| Error::Etherstub(e))?; let etherstub_vnic = Dladm::ensure_etherstub_vnic(ðerstub) + .await .map_err(|e| Error::EtherstubVnic(e))?; // Ensure the global zone has a functioning IPv6 address. @@ -469,7 +471,7 @@ impl SledAgent { .map_err(|err| Error::SledSubnet { err })?; // Initialize the xde kernel driver with the underlay devices. - let underlay_nics = underlay::find_nics(&config.data_links)?; + let underlay_nics = underlay::find_nics(&config.data_links).await?; illumos_utils::opte::initialize_xde_driver(&log, &underlay_nics)?; // Start collecting metric data. @@ -485,7 +487,7 @@ impl SledAgent { MetricsManager::new(&log, identifiers.clone(), *sled_address.ip())?; // Start tracking the underlay physical links. - for link in underlay::find_chelsio_links(&config.data_links)? { + for link in underlay::find_chelsio_links(&config.data_links).await? { match metrics_manager .request_queue() .track_physical("global", &link.0) diff --git a/sled-hardware/src/cleanup.rs b/sled-hardware/src/cleanup.rs index 88a0a8cbcc1..a597ca8fa8b 100644 --- a/sled-hardware/src/cleanup.rs +++ b/sled-hardware/src/cleanup.rs @@ -76,15 +76,15 @@ fn delete_addresses_matching_prefixes( } /// Delete the etherstub and underlay VNIC used for interzone communication -pub fn delete_etherstub(log: &Logger) -> Result<(), ExecutionError> { +pub async fn delete_etherstub(log: &Logger) -> Result<(), ExecutionError> { warn!(log, "Deleting Omicron underlay VNIC"; "vnic_name" => UNDERLAY_ETHERSTUB_VNIC_NAME); - Dladm::delete_etherstub_vnic(UNDERLAY_ETHERSTUB_VNIC_NAME)?; + Dladm::delete_etherstub_vnic(UNDERLAY_ETHERSTUB_VNIC_NAME).await?; warn!(log, "Deleting Omicron underlay etherstub"; "stub_name" => UNDERLAY_ETHERSTUB_NAME); - Dladm::delete_etherstub(UNDERLAY_ETHERSTUB_NAME)?; + Dladm::delete_etherstub(UNDERLAY_ETHERSTUB_NAME).await?; warn!(log, "Deleting Omicron bootstrap VNIC"; "vnic_name" => BOOTSTRAP_ETHERSTUB_VNIC_NAME); - Dladm::delete_etherstub_vnic(BOOTSTRAP_ETHERSTUB_VNIC_NAME)?; + Dladm::delete_etherstub_vnic(BOOTSTRAP_ETHERSTUB_VNIC_NAME).await?; warn!(log, "Deleting Omicron bootstrap etherstub"; "stub_name" => BOOTSTRAP_ETHERSTUB_NAME); - Dladm::delete_etherstub(BOOTSTRAP_ETHERSTUB_NAME)?; + Dladm::delete_etherstub(BOOTSTRAP_ETHERSTUB_NAME).await?; Ok(()) } @@ -92,22 +92,18 @@ pub fn delete_etherstub(log: &Logger) -> Result<(), ExecutionError> { /// /// These are currently those that match the prefix `ox` or `vopte`. pub async fn delete_omicron_vnics(log: &Logger) -> Result<(), Error> { - let vnics = Dladm::get_vnics()?; + let vnics = Dladm::get_vnics().await?; stream::iter(vnics) .zip(stream::iter(std::iter::repeat(log.clone()))) .map(Ok::<_, illumos_utils::dladm::DeleteVnicError>) - .try_for_each_concurrent(None, |(vnic, log)| async { - tokio::task::spawn_blocking(move || { - warn!( - log, - "Deleting existing VNIC"; - "vnic_name" => &vnic, - "vnic_kind" => ?LinkKind::from_name(&vnic).unwrap(), - ); - Dladm::real_api().delete_vnic(&vnic) - }) - .await - .unwrap() + .try_for_each_concurrent(None, |(vnic, log)| async move { + warn!( + log, + "Deleting existing VNIC"; + "vnic_name" => &vnic, + "vnic_kind" => ?LinkKind::from_name(&vnic).unwrap(), + ); + Dladm::real_api().delete_vnic(&vnic).await }) .await?; Ok(()) @@ -117,7 +113,7 @@ pub async fn cleanup_networking_resources(log: &Logger) -> Result<(), Error> { delete_underlay_addresses(log)?; delete_bootstrap_addresses(log)?; delete_omicron_vnics(log).await?; - delete_etherstub(log)?; + delete_etherstub(log).await?; opte::delete_all_xde_devices(log)?; Ok(()) diff --git a/sled-hardware/src/underlay.rs b/sled-hardware/src/underlay.rs index dbc0d57022a..d11db2bf51c 100644 --- a/sled-hardware/src/underlay.rs +++ b/sled-hardware/src/underlay.rs @@ -41,19 +41,20 @@ pub enum Error { /// Convenience function that calls /// `ensure_links_have_global_zone_link_local_v6_addresses()` with the links /// returned by `find_chelsio_links()`. -pub fn find_nics( +pub async fn find_nics( config_data_links: &[String; 2], ) -> Result, Error> { - let underlay_nics = find_chelsio_links(config_data_links)?; + let underlay_nics = find_chelsio_links(config_data_links).await?; // Before these links have any consumers (eg. IP interfaces), set the MTU. // If we have previously set the MTU, do not attempt to re-set. const MTU: &str = "9000"; for link in &underlay_nics { - let existing_mtu = Dladm::get_linkprop(&link.to_string(), "mtu")?; + let existing_mtu = + Dladm::get_linkprop(&link.to_string(), "mtu").await?; if existing_mtu != MTU { - Dladm::set_linkprop(&link.to_string(), "mtu", MTU)?; + Dladm::set_linkprop(&link.to_string(), "mtu", MTU).await?; } } @@ -65,11 +66,11 @@ pub fn find_nics( /// For a real Gimlet, this should return the devices like `cxgbeN`. For a /// developer machine, or generally a non-Gimlet, this will return the /// VNICs we use to emulate those Chelsio links. -pub fn find_chelsio_links( +pub async fn find_chelsio_links( config_data_links: &[String; 2], ) -> Result, Error> { if is_gimlet().map_err(Error::SystemDetection)? { - Dladm::list_physical().map_err(Error::FindLinks).map(|links| { + Dladm::list_physical().await.map_err(Error::FindLinks).map(|links| { links .into_iter() .filter(|link| link.0.starts_with(CHELSIO_LINK_PREFIX)) diff --git a/sled-hardware/types/src/underlay.rs b/sled-hardware/types/src/underlay.rs index e285dd047c1..b384249ae80 100644 --- a/sled-hardware/types/src/underlay.rs +++ b/sled-hardware/types/src/underlay.rs @@ -30,11 +30,11 @@ impl BootstrapInterface { // TODO(https://github.com/oxidecomputer/omicron/issues/945): This address // could be randomly generated when it no longer needs to be durable. - pub fn ip( + pub async fn ip( self, link: &PhysicalLink, ) -> Result { - let mac = Dladm::get_mac(link)?; + let mac = Dladm::get_mac(link).await?; Ok(mac_to_bootstrap_ip(mac, self.interface_id())) } } From 31cad18f12bc0b999511d06b847b13457c9f1f07 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 12 May 2025 16:52:56 -0700 Subject: [PATCH 2/5] Make more illumos commands async --- clickhouse-admin/src/context.rs | 7 +- illumos-utils/src/fakes/zpool.rs | 3 +- illumos-utils/src/fstyp.rs | 9 +- illumos-utils/src/ipadm.rs | 60 +++-- illumos-utils/src/route.rs | 52 ++-- illumos-utils/src/running_zone.rs | 15 +- illumos-utils/src/svcadm.rs | 15 +- illumos-utils/src/zfs.rs | 243 ++++++++++-------- illumos-utils/src/zone.rs | 102 ++++---- illumos-utils/src/zpool.rs | 50 ++-- installinator/src/bootstrap.rs | 2 + installinator/src/write.rs | 2 +- package/src/bin/omicron-package.rs | 8 +- .../src/dataset_serialization_task.rs | 60 +---- .../config-reconciler/src/dump_setup.rs | 2 + sled-agent/src/backing_fs.rs | 8 +- sled-agent/src/bootstrap/pre_server.rs | 11 +- sled-agent/src/bootstrap/server.rs | 3 +- sled-agent/src/long_running_tasks.rs | 6 +- sled-agent/src/services.rs | 4 +- sled-agent/src/sled_agent.rs | 7 +- sled-agent/src/support_bundle/logs.rs | 9 +- sled-agent/src/support_bundle/storage.rs | 9 +- sled-agent/src/zone_bundle.rs | 190 +++++++------- sled-diagnostics/src/logs.rs | 128 +++++---- sled-hardware/src/disk.rs | 28 +- sled-hardware/src/underlay.rs | 6 +- sled-storage/src/dataset.rs | 16 +- sled-storage/src/disk.rs | 21 +- sled-storage/src/manager.rs | 14 +- sled-storage/src/manager_test_harness.rs | 2 +- sled-storage/src/pool.rs | 7 +- zone-setup/src/bin/zone-setup.rs | 34 ++- 33 files changed, 623 insertions(+), 510 deletions(-) diff --git a/clickhouse-admin/src/context.rs b/clickhouse-admin/src/context.rs index 36442a99700..307111647d8 100644 --- a/clickhouse-admin/src/context.rs +++ b/clickhouse-admin/src/context.rs @@ -290,7 +290,8 @@ async fn long_running_generate_config_task( generation_tx.clone(), clickward, node_settings, - ); + ) + .await; if let Err(e) = response.send(result) { error!( &log, @@ -324,7 +325,7 @@ impl NodeSettings { } } -fn generate_config_and_enable_svc( +async fn generate_config_and_enable_svc( generation_tx: watch::Sender>, clickward: Clickward, node_settings: NodeSettings, @@ -364,7 +365,7 @@ fn generate_config_and_enable_svc( generation_tx.send_replace(Some(incoming_generation)); // Once we have generated the client we can safely enable the clickhouse_server service - Svcadm::enable_service(node_settings.fmri())?; + Svcadm::enable_service(node_settings.fmri()).await?; Ok(output) } diff --git a/illumos-utils/src/fakes/zpool.rs b/illumos-utils/src/fakes/zpool.rs index 6e16933764f..baab8257f06 100644 --- a/illumos-utils/src/fakes/zpool.rs +++ b/illumos-utils/src/fakes/zpool.rs @@ -20,8 +20,9 @@ impl Zpool { } } +#[async_trait::async_trait] impl Api for Zpool { - fn create( + async fn create( &self, _name: &ZpoolName, _vdev: &Utf8Path, diff --git a/illumos-utils/src/fstyp.rs b/illumos-utils/src/fstyp.rs index 1f56695e24a..d37b2a9add8 100644 --- a/illumos-utils/src/fstyp.rs +++ b/illumos-utils/src/fstyp.rs @@ -5,9 +5,10 @@ //! Helper for calling fstyp. use crate::zpool::ZpoolName; -use crate::{PFEXEC, execute}; +use crate::{PFEXEC, execute_async}; use camino::Utf8Path; use std::str::FromStr; +use tokio::process::Command; const FSTYP: &str = "/usr/sbin/fstyp"; @@ -32,14 +33,14 @@ pub struct Fstyp {} impl Fstyp { /// Executes the 'fstyp' command and parses the name of a zpool from it, if /// one exists. - pub fn get_zpool(path: &Utf8Path) -> Result { - let mut cmd = std::process::Command::new(PFEXEC); + pub async fn get_zpool(path: &Utf8Path) -> Result { + let mut cmd = Command::new(PFEXEC); cmd.env_clear(); cmd.env("LC_ALL", "C.UTF-8"); let cmd = cmd.arg(FSTYP).arg("-a").arg(path); - let output = execute(cmd).map_err(Error::from)?; + let output = execute_async(cmd).await.map_err(Error::from)?; let stdout = String::from_utf8(output.stdout)?; let mut seen_zfs_marker = false; diff --git a/illumos-utils/src/ipadm.rs b/illumos-utils/src/ipadm.rs index ccfe41c72ae..b66c19ac4f2 100644 --- a/illumos-utils/src/ipadm.rs +++ b/illumos-utils/src/ipadm.rs @@ -6,9 +6,10 @@ use crate::addrobj::{IPV6_LINK_LOCAL_ADDROBJ_NAME, IPV6_STATIC_ADDROBJ_NAME}; use crate::zone::IPADM; -use crate::{ExecutionError, PFEXEC, execute}; +use crate::{ExecutionError, PFEXEC, execute_async}; use oxnet::IpNet; use std::net::{IpAddr, Ipv6Addr}; +use tokio::process::Command; /// Wraps commands for interacting with interfaces. pub struct Ipadm {} @@ -34,12 +35,12 @@ pub enum AddrObjType { impl Ipadm { /// Ensure that an IP interface exists on the provided datalink. - pub fn ensure_ip_interface_exists( + pub async fn ensure_ip_interface_exists( datalink: &str, ) -> Result<(), ExecutionError> { - let mut cmd = std::process::Command::new(PFEXEC); + let mut cmd = Command::new(PFEXEC); let cmd = cmd.args(&[IPADM, "create-if", "-t", datalink]); - match execute(cmd) { + match execute_async(cmd).await { Ok(_) => Ok(()), Err(ExecutionError::CommandFailure(info)) if info.stderr.contains(INTERFACE_ALREADY_EXISTS) => @@ -54,11 +55,11 @@ impl Ipadm { /// with the requested name already exists, return success. Note that in /// this case, the existing object is not checked to ensure it is /// consistent with the provided parameters. - pub fn ensure_ip_addrobj_exists( + pub async fn ensure_ip_addrobj_exists( addrobj: &str, addrtype: AddrObjType, ) -> Result<(), ExecutionError> { - let mut cmd = std::process::Command::new(PFEXEC); + let mut cmd = Command::new(PFEXEC); let cmd = cmd.args(&[IPADM, "create-addr", "-t", "-T"]); let cmd = match addrtype { AddrObjType::DHCP => cmd.args(&["dhcp"]), @@ -68,7 +69,7 @@ impl Ipadm { } }; let cmd = cmd.arg(&addrobj); - match execute(cmd) { + match execute_async(cmd).await { Ok(_) => Ok(()), Err(ExecutionError::CommandFailure(info)) if info.stderr.contains(ADDROBJ_ALREADY_EXISTS) => @@ -97,14 +98,14 @@ impl Ipadm { /// Return the IP network associated with an address object, or None if /// there is no address object with this name. - pub fn addrobj_addr( + pub async fn addrobj_addr( addrobj: &str, ) -> Result, ExecutionError> { // Note that additional privileges are not required to list address // objects, and so there is no `pfexec` here. - let mut cmd = std::process::Command::new(IPADM); + let mut cmd = Command::new(IPADM); let cmd = cmd.args(&["show-addr", "-po", "addr", addrobj]); - match execute(cmd) { + match execute_async(cmd).await { Err(ExecutionError::CommandFailure(info)) if [ADDROBJ_NOT_FOUND_ERR1, ADDROBJ_NOT_FOUND_ERR2] .iter() @@ -136,13 +137,15 @@ impl Ipadm { } /// Determine if a named address object exists - pub fn addrobj_exists(addrobj: &str) -> Result { - Ok(Self::addrobj_addr(addrobj)?.is_some()) + pub async fn addrobj_exists(addrobj: &str) -> Result { + Ok(Self::addrobj_addr(addrobj).await?.is_some()) } /// Set MTU to 9000 on both IPv4 and IPv6 - pub fn set_interface_mtu(datalink: &str) -> Result<(), ExecutionError> { - let mut cmd = std::process::Command::new(PFEXEC); + pub async fn set_interface_mtu( + datalink: &str, + ) -> Result<(), ExecutionError> { + let mut cmd = Command::new(PFEXEC); let cmd = cmd.args(&[ IPADM, "set-ifprop", @@ -153,9 +156,9 @@ impl Ipadm { "ipv4", datalink, ]); - execute(cmd)?; + execute_async(cmd).await?; - let mut cmd = std::process::Command::new(PFEXEC); + let mut cmd = Command::new(PFEXEC); let cmd = cmd.args(&[ IPADM, "set-ifprop", @@ -166,40 +169,41 @@ impl Ipadm { "ipv6", datalink, ]); - execute(cmd)?; + execute_async(cmd).await?; Ok(()) } - pub fn create_static_and_autoconfigured_addrs( + pub async fn create_static_and_autoconfigured_addrs( datalink: &str, listen_addr: &Ipv6Addr, ) -> Result<(), ExecutionError> { // Create auto-configured address on the IP interface if it doesn't // already exist let addrobj = format!("{}/{}", datalink, IPV6_LINK_LOCAL_ADDROBJ_NAME); - Self::ensure_ip_addrobj_exists(&addrobj, AddrObjType::AddrConf)?; + Self::ensure_ip_addrobj_exists(&addrobj, AddrObjType::AddrConf).await?; // Create static address on the IP interface if it doesn't already exist let addrobj = format!("{}/{}", datalink, IPV6_STATIC_ADDROBJ_NAME); Self::ensure_ip_addrobj_exists( &addrobj, AddrObjType::Static((*listen_addr).into()), - )?; + ) + .await?; Ok(()) } // Create gateway on the IP interface if it doesn't already exist - pub fn create_opte_gateway( + pub async fn create_opte_gateway( opte_iface: &String, ) -> Result<(), ExecutionError> { let addrobj = format!("{}/public", opte_iface); - Self::ensure_ip_addrobj_exists(&addrobj, AddrObjType::DHCP)?; + Self::ensure_ip_addrobj_exists(&addrobj, AddrObjType::DHCP).await?; Ok(()) } /// Set TCP recv_buf to 1 MB. - pub fn set_tcp_recv_buf() -> Result<(), ExecutionError> { - let mut cmd = std::process::Command::new(PFEXEC); + pub async fn set_tcp_recv_buf() -> Result<(), ExecutionError> { + let mut cmd = Command::new(PFEXEC); // This is to improve single-connection throughput on large uploads // from clients, e.g., images. Modern browsers will almost always use @@ -216,14 +220,14 @@ impl Ipadm { "recv_buf=1000000", "tcp", ]); - execute(cmd)?; + execute_async(cmd).await?; Ok(()) } /// Set TCP congestion control algorithm to `cubic`. - pub fn set_tcp_congestion_control() -> Result<(), ExecutionError> { - let mut cmd = std::process::Command::new(PFEXEC); + pub async fn set_tcp_congestion_control() -> Result<(), ExecutionError> { + let mut cmd = Command::new(PFEXEC); let cmd = cmd.args(&[ IPADM, "set-prop", @@ -232,7 +236,7 @@ impl Ipadm { "congestion_control=cubic", "tcp", ]); - execute(cmd)?; + execute_async(cmd).await?; Ok(()) } diff --git a/illumos-utils/src/route.rs b/illumos-utils/src/route.rs index 835f1029322..a1f85820382 100644 --- a/illumos-utils/src/route.rs +++ b/illumos-utils/src/route.rs @@ -6,10 +6,12 @@ use crate::zone::ROUTE; use crate::{ - ExecutionError, PFEXEC, command_to_string, execute, output_to_exec_error, + ExecutionError, PFEXEC, command_to_string, execute_async, + output_to_exec_error, }; use libc::ESRCH; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; +use tokio::process::Command; /// Wraps commands for interacting with routing tables. pub struct Route {} @@ -21,7 +23,7 @@ pub enum Gateway { } impl Route { - pub fn ensure_default_route_with_gateway( + pub async fn ensure_default_route_with_gateway( gateway: Gateway, ) -> Result<(), ExecutionError> { let inet; @@ -38,37 +40,40 @@ impl Route { } // Add the desired route if it doesn't already exist let destination = "default"; - let mut cmd = std::process::Command::new(PFEXEC); + let mut cmd = Command::new(PFEXEC); let cmd = cmd.args(&[ROUTE, "-n", "get", inet, destination, inet, &gw]); - let out = - cmd.output().map_err(|err| ExecutionError::ExecutionStart { - command: command_to_string(cmd), + let out = cmd.output().await.map_err(|err| { + ExecutionError::ExecutionStart { + command: command_to_string(cmd.as_std()), err, - })?; + } + })?; match out.status.code() { Some(0) => (), // If the entry is not found in the table, // the exit status of the command will be 3 (ESRCH). // When that is the case, we'll add the route. Some(ESRCH) => { - let mut cmd = std::process::Command::new(PFEXEC); + let mut cmd = Command::new(PFEXEC); let cmd = cmd.args(&[ROUTE, "add", inet, destination, inet, &gw]); - execute(cmd)?; + execute_async(cmd).await?; + } + Some(_) | None => { + return Err(output_to_exec_error(cmd.as_std(), &out)); } - Some(_) | None => return Err(output_to_exec_error(cmd, &out)), }; Ok(()) } - pub fn ensure_opte_route( + pub async fn ensure_opte_route( gateway: &Ipv4Addr, iface: &String, opte_ip: &IpAddr, ) -> Result<(), ExecutionError> { // Add the desired route if it doesn't already exist - let mut cmd = std::process::Command::new(PFEXEC); + let mut cmd = Command::new(PFEXEC); let cmd = cmd.args(&[ ROUTE, "-n", @@ -81,18 +86,19 @@ impl Route { &iface.to_string(), ]); - let out = - cmd.output().map_err(|err| ExecutionError::ExecutionStart { - command: command_to_string(cmd), + let out = cmd.output().await.map_err(|err| { + ExecutionError::ExecutionStart { + command: command_to_string(cmd.as_std()), err, - })?; + } + })?; match out.status.code() { Some(0) => (), // If the entry is not found in the table, // the exit status of the command will be 3 (ESRCH). // When that is the case, we'll add the route. Some(ESRCH) => { - let mut cmd = std::process::Command::new(PFEXEC); + let mut cmd = Command::new(PFEXEC); let cmd = cmd.args(&[ ROUTE, "add", @@ -103,19 +109,21 @@ impl Route { "-ifp", &iface.to_string(), ]); - execute(cmd)?; + execute_async(cmd).await?; + } + Some(_) | None => { + return Err(output_to_exec_error(cmd.as_std(), &out)); } - Some(_) | None => return Err(output_to_exec_error(cmd, &out)), }; Ok(()) } - pub fn add_bootstrap_route( + pub async fn add_bootstrap_route( bootstrap_prefix: u16, gz_bootstrap_addr: Ipv6Addr, zone_vnic_name: &str, ) -> Result<(), ExecutionError> { - let mut cmd = std::process::Command::new(PFEXEC); + let mut cmd = Command::new(PFEXEC); let cmd = cmd.args(&[ ROUTE, "add", @@ -125,7 +133,7 @@ impl Route { "-ifp", zone_vnic_name, ]); - execute(cmd)?; + execute_async(cmd).await?; Ok(()) } } diff --git a/illumos-utils/src/running_zone.rs b/illumos-utils/src/running_zone.rs index 86d9acadaea..7f7b1cd66b5 100644 --- a/illumos-utils/src/running_zone.rs +++ b/illumos-utils/src/running_zone.rs @@ -535,7 +535,8 @@ impl RunningZone { err, })?; let network = - Zones::ensure_address(Some(&self.inner.name), &addrobj, addrtype)?; + Zones::ensure_address(Some(&self.inner.name), &addrobj, addrtype) + .await?; Ok(network) } @@ -561,7 +562,8 @@ impl RunningZone { let zone = Some(self.inner.name.as_ref()); if let IpAddr::V4(gateway) = port.gateway().ip() { let addr = - Zones::ensure_address(zone, &addrobj, AddressRequest::Dhcp)?; + Zones::ensure_address(zone, &addrobj, AddressRequest::Dhcp) + .await?; // TODO-remove(#2931): OPTE's DHCP "server" returns the list of routes // to add via option 121 (Classless Static Route). The illumos DHCP // client currently does not support this option, so we add the routes @@ -589,12 +591,12 @@ impl RunningZone { } else { // If the port is using IPv6 addressing we still want it to use // DHCP(v6) which requires first creating a link-local address. - Zones::ensure_has_link_local_v6_address(zone, &addrobj).map_err( - |err| EnsureAddressError::LinkLocal { + Zones::ensure_has_link_local_v6_address(zone, &addrobj) + .await + .map_err(|err| EnsureAddressError::LinkLocal { zone: self.inner.name.clone(), err, - }, - )?; + })?; // Unlike DHCPv4, there's no blocking `ipadm` call we can // make as it just happens in the background. So we just poll @@ -605,6 +607,7 @@ impl RunningZone { // Grab all the address on the addrobj. There should // always be at least one (the link-local we added) let addrs = Zones::get_all_addresses(zone, &addrobj) + .await .map_err(|e| { backoff::BackoffError::permanent( EnsureAddressError::from(e), diff --git a/illumos-utils/src/svcadm.rs b/illumos-utils/src/svcadm.rs index ed9b516fb99..cc68dc88997 100644 --- a/illumos-utils/src/svcadm.rs +++ b/illumos-utils/src/svcadm.rs @@ -5,23 +5,24 @@ //! Utilities for manipulating SMF services. use crate::zone::SVCADM; -use crate::{ExecutionError, PFEXEC, execute}; +use crate::{ExecutionError, PFEXEC, execute_async}; +use tokio::process::Command; /// Wraps commands for interacting with svcadm. pub struct Svcadm {} impl Svcadm { - pub fn refresh_logadm_upgrade() -> Result<(), ExecutionError> { - let mut cmd = std::process::Command::new(PFEXEC); + pub async fn refresh_logadm_upgrade() -> Result<(), ExecutionError> { + let mut cmd = Command::new(PFEXEC); let cmd = cmd.args(&[SVCADM, "refresh", "logadm-upgrade"]); - execute(cmd)?; + execute_async(cmd).await?; Ok(()) } - pub fn enable_service(fmri: String) -> Result<(), ExecutionError> { - let mut cmd = std::process::Command::new(PFEXEC); + pub async fn enable_service(fmri: String) -> Result<(), ExecutionError> { + let mut cmd = Command::new(PFEXEC); let cmd = cmd.args(&[SVCADM, "enable", "-s", &fmri]); - execute(cmd)?; + execute_async(cmd).await?; Ok(()) } } diff --git a/illumos-utils/src/zfs.rs b/illumos-utils/src/zfs.rs index de9c235f397..e2c0f87a563 100644 --- a/illumos-utils/src/zfs.rs +++ b/illumos-utils/src/zfs.rs @@ -4,7 +4,7 @@ //! Utilities for poking at ZFS. -use crate::{PFEXEC, execute}; +use crate::{PFEXEC, execute_async}; use anyhow::Context; use anyhow::anyhow; use anyhow::bail; @@ -19,6 +19,8 @@ use omicron_uuid_kinds::DatasetUuid; use std::collections::BTreeMap; use std::fmt; +use tokio::process::Command; + // These locations in the ramdisk must only be used by the switch zone. // // We need the switch zone online before we can create the U.2 drives and @@ -596,7 +598,7 @@ pub struct DatasetEnsureArgs<'a> { // // If this function is called on a mountwhich which is already mounted, an error // is returned. -fn ensure_empty_immutable_mountpoint( +async fn ensure_empty_immutable_mountpoint( mountpoint: &Utf8Path, ) -> Result<(), MountpointError> { if mountpoint @@ -605,7 +607,7 @@ fn ensure_empty_immutable_mountpoint( { // If the mountpoint exists, confirm nothing is already mounted // on it. - let mut command = std::process::Command::new(ZFS); + let mut command = Command::new(ZFS); let cmd = command.args(&[ "get", "-Hpo", @@ -613,8 +615,9 @@ fn ensure_empty_immutable_mountpoint( "mountpoint", mountpoint.as_str(), ]); - let output = - execute(cmd).map_err(|err| MountpointError::CheckMounted(err))?; + let output = execute_async(cmd) + .await + .map_err(|err| MountpointError::CheckMounted(err))?; let stdout = String::from_utf8_lossy(&output.stdout); let Some(dir_mountpoint) = stdout.trim().lines().next() else { return Err(MountpointError::CheckMountedParse(stdout.to_string())); @@ -651,7 +654,7 @@ fn ensure_empty_immutable_mountpoint( // // - If necessary, we make the directory mutable, and try to clear it out. // - We then re-set the directory to immutable. - let immutablity = is_directory_immutable(mountpoint)?; + let immutablity = is_directory_immutable(mountpoint).await?; if is_directory_empty(mountpoint)? && immutablity.as_immutable_as_filesystem_allows() { @@ -659,11 +662,11 @@ fn ensure_empty_immutable_mountpoint( } if immutablity.can_set_immutable() { - make_directory_mutable(mountpoint)?; + make_directory_mutable(mountpoint).await?; } ensure_mountpoint_empty(mountpoint)?; if immutablity.can_set_immutable() { - make_directory_immutable(mountpoint)?; + make_directory_immutable(mountpoint).await?; } // This concurrent error case is a bit exceptional: we briefly made the @@ -723,17 +726,25 @@ fn ensure_mountpoint_empty(path: &Utf8Path) -> Result<(), MountpointError> { Ok(()) } -fn make_directory_immutable(path: &Utf8Path) -> Result<(), MountpointError> { - let mut command = std::process::Command::new(PFEXEC); +async fn make_directory_immutable( + path: &Utf8Path, +) -> Result<(), MountpointError> { + let mut command = Command::new(PFEXEC); let cmd = command.args(&["chmod", "S+ci", path.as_str()]); - execute(cmd).map_err(|err| MountpointError::MakeImmutable(err))?; + execute_async(cmd) + .await + .map_err(|err| MountpointError::MakeImmutable(err))?; Ok(()) } -fn make_directory_mutable(path: &Utf8Path) -> Result<(), MountpointError> { - let mut command = std::process::Command::new(PFEXEC); +async fn make_directory_mutable( + path: &Utf8Path, +) -> Result<(), MountpointError> { + let mut command = Command::new(PFEXEC); let cmd = command.args(&["chmod", "S-ci", path.as_str()]); - execute(cmd).map_err(|err| MountpointError::MakeMutable(err))?; + execute_async(cmd) + .await + .map_err(|err| MountpointError::MakeMutable(err))?; Ok(()) } @@ -762,13 +773,14 @@ impl Immutability { } } -fn is_directory_immutable( +async fn is_directory_immutable( path: &Utf8Path, ) -> Result { - let mut command = std::process::Command::new(PFEXEC); + let mut command = Command::new(PFEXEC); let cmd = command.args(&["ls", "-d/v", path.as_str()]); - let output = - execute(cmd).map_err(|err| MountpointError::MakeImmutable(err))?; + let output = execute_async(cmd) + .await + .map_err(|err| MountpointError::MakeImmutable(err))?; // NOTE: Experimenting with "truss ls -d/v" shows that it seems to be using // the https://illumos.org/man/2/acl API, but we will need to likely bring @@ -821,11 +833,14 @@ impl Zfs { /// Lists all datasets within a pool or existing dataset. /// /// Strips the input `name` from the output dataset names. - pub fn list_datasets(name: &str) -> Result, ListDatasetsError> { - let mut command = std::process::Command::new(ZFS); + pub async fn list_datasets( + name: &str, + ) -> Result, ListDatasetsError> { + let mut command = Command::new(ZFS); let cmd = command.args(&["list", "-d", "1", "-rHpo", "name", name]); - let output = execute(cmd) + let output = execute_async(cmd) + .await .map_err(|err| ListDatasetsError { name: name.to_string(), err })?; let stdout = String::from_utf8_lossy(&output.stdout); let filesystems: Vec = stdout @@ -848,11 +863,11 @@ impl Zfs { /// substantial results about the datasets found. /// /// Sorts results and de-duplicates them by name. - pub fn get_dataset_properties( + pub async fn get_dataset_properties( datasets: &[String], which: WhichDatasets, ) -> Result, anyhow::Error> { - let mut command = std::process::Command::new(ZFS); + let mut command = Command::new(ZFS); let cmd = command.arg("get"); // Restrict to just filesystems and volumes to be consistent with @@ -879,7 +894,7 @@ impl Zfs { // // If one or more dataset doesn't exist, we can still read stdout to // see about the ones that do exist. - let output = cmd.output().map_err(|err| { + let output = cmd.output().await.map_err(|err| { anyhow!( "Failed to get dataset properties for {datasets:?}: {err:?}" ) @@ -893,10 +908,13 @@ impl Zfs { /// /// The object can either be a dataset name, or a path, in which case it /// will be resolved to the _mounted_ ZFS dataset containing that path. - pub fn get_dataset_name(object: &str) -> Result { - let mut command = std::process::Command::new(ZFS); + pub async fn get_dataset_name( + object: &str, + ) -> Result { + let mut command = Command::new(ZFS); let cmd = command.args(&["get", "-Hpo", "value", "name", object]); - execute(cmd) + execute_async(cmd) + .await .map(|output| { String::from_utf8_lossy(&output.stdout).trim().to_string() }) @@ -904,10 +922,12 @@ impl Zfs { } /// Destroys a dataset. - pub fn destroy_dataset(name: &str) -> Result<(), DestroyDatasetError> { - let mut command = std::process::Command::new(PFEXEC); + pub async fn destroy_dataset( + name: &str, + ) -> Result<(), DestroyDatasetError> { + let mut command = Command::new(PFEXEC); let cmd = command.args(&[ZFS, "destroy", "-r", name]); - execute(cmd).map_err(|err| { + execute_async(cmd).await.map_err(|err| { let variant = match err { crate::ExecutionError::CommandFailure(info) if info.stderr.contains("does not exist") => @@ -924,59 +944,58 @@ impl Zfs { /// Ensures that a ZFS dataset is mounted /// /// Returns an error if the dataset exists, but cannot be mounted. - pub fn ensure_dataset_mounted_and_exists( + pub async fn ensure_dataset_mounted_and_exists( name: &str, mountpoint: &Mountpoint, ) -> Result<(), EnsureDatasetError> { Self::ensure_dataset_mounted_and_exists_inner(name, mountpoint) - .map_err(|err| EnsureDatasetError { - name: name.to_string(), - err, - })?; + .await + .map_err(|err| EnsureDatasetError { name: name.to_string(), err })?; Ok(()) } - fn ensure_dataset_mounted_and_exists_inner( + async fn ensure_dataset_mounted_and_exists_inner( name: &str, mountpoint: &Mountpoint, ) -> Result<(), EnsureDatasetErrorRaw> { - let mount_info = Self::dataset_exists(name, mountpoint)?; + let mount_info = Self::dataset_exists(name, mountpoint).await?; if !mount_info.exists { return Err(EnsureDatasetErrorRaw::DoesNotExist); } if !mount_info.mounted { - Self::ensure_dataset_mounted(name, mountpoint)?; + Self::ensure_dataset_mounted(name, mountpoint).await?; } return Ok(()); } - fn ensure_dataset_mounted( + async fn ensure_dataset_mounted( name: &str, mountpoint: &Mountpoint, ) -> Result<(), EnsureDatasetErrorRaw> { - ensure_empty_immutable_mountpoint(&mountpoint.0).map_err(|err| { - EnsureDatasetErrorRaw::MountpointCreation { + ensure_empty_immutable_mountpoint(&mountpoint.0).await.map_err( + |err| EnsureDatasetErrorRaw::MountpointCreation { mountpoint: mountpoint.0.to_path_buf(), err, - } - })?; - Self::mount_dataset(name)?; + }, + )?; + Self::mount_dataset(name).await?; Ok(()) } /// Creates a new ZFS dataset unless one already exists. /// /// Refer to [DatasetEnsureArgs] for details on the supplied arguments. - pub fn ensure_dataset( - args: DatasetEnsureArgs, + pub async fn ensure_dataset( + args: DatasetEnsureArgs<'_>, ) -> Result<(), EnsureDatasetError> { let name = args.name.to_string(); Self::ensure_dataset_inner(args) + .await .map_err(|err| EnsureDatasetError { name, err }) } - fn ensure_dataset_inner( + async fn ensure_dataset_inner( DatasetEnsureArgs { name, mountpoint, @@ -986,9 +1005,9 @@ impl Zfs { size_details, id, additional_options, - }: DatasetEnsureArgs, + }: DatasetEnsureArgs<'_>, ) -> Result<(), EnsureDatasetErrorRaw> { - let dataset_info = Self::dataset_exists(name, &mountpoint)?; + let dataset_info = Self::dataset_exists(name, &mountpoint).await?; // Non-zoned datasets with an explicit mountpoint and the // "canmount=on" property should be mounted within the global zone. @@ -1004,10 +1023,11 @@ impl Zfs { // have changed, and ensure it has been mounted if it needs // to be mounted. Self::set_values(name, props.as_slice()) + .await .map_err(|err| EnsureDatasetErrorRaw::from(err.err))?; if wants_mounting { - Self::ensure_dataset_mounted(name, &mountpoint)?; + Self::ensure_dataset_mounted(name, &mountpoint).await?; } return Ok(()); @@ -1019,7 +1039,7 @@ impl Zfs { // creating the dataset itself, which will also mount it. if wants_mounting { let path = &mountpoint.0; - ensure_empty_immutable_mountpoint(&path).map_err(|err| { + ensure_empty_immutable_mountpoint(&path).await.map_err(|err| { EnsureDatasetErrorRaw::MountpointCreation { mountpoint: path.to_path_buf(), err, @@ -1027,7 +1047,7 @@ impl Zfs { })?; } - let mut command = std::process::Command::new(PFEXEC); + let mut command = Command::new(PFEXEC); let cmd = command.args(&[ZFS, "create"]); if zoned { cmd.args(&["-o", "zoned=on"]); @@ -1061,37 +1081,45 @@ impl Zfs { cmd.args(&["-o", &format!("mountpoint={}", mountpoint), name]); - execute(cmd).map_err(|err| EnsureDatasetErrorRaw::from(err))?; + execute_async(cmd) + .await + .map_err(|err| EnsureDatasetErrorRaw::from(err))?; // We ensure that the currently running process has the ability to // act on the underlying mountpoint. if !zoned { - let mut command = std::process::Command::new(PFEXEC); + let mut command = Command::new(PFEXEC); let user = whoami::username(); let mount = format!("{mountpoint}"); let cmd = command.args(["chown", "-R", &user, &mount]); - execute(cmd).map_err(|err| EnsureDatasetErrorRaw::from(err))?; + execute_async(cmd) + .await + .map_err(|err| EnsureDatasetErrorRaw::from(err))?; } Self::set_values(name, props.as_slice()) + .await .map_err(|err| EnsureDatasetErrorRaw::from(err.err))?; Ok(()) } // Mounts a dataset, loading keys if necessary. - fn mount_dataset(name: &str) -> Result<(), EnsureDatasetErrorRaw> { - let mut command = std::process::Command::new(PFEXEC); + async fn mount_dataset(name: &str) -> Result<(), EnsureDatasetErrorRaw> { + let mut command = Command::new(PFEXEC); let cmd = command.args(&[ZFS, "mount", "-l", name]); - execute(cmd) + execute_async(cmd) + .await .map_err(|err| EnsureDatasetErrorRaw::MountFsFailed(err))?; Ok(()) } - pub fn mount_overlay_dataset(name: &str) -> Result<(), EnsureDatasetError> { - let mut command = std::process::Command::new(PFEXEC); + pub async fn mount_overlay_dataset( + name: &str, + ) -> Result<(), EnsureDatasetError> { + let mut command = Command::new(PFEXEC); let cmd = command.args(&[ZFS, "mount", "-O", name]); - execute(cmd).map_err(|err| EnsureDatasetError { + execute_async(cmd).await.map_err(|err| EnsureDatasetError { name: name.to_string(), err: EnsureDatasetErrorRaw::MountOverlayFsFailed(err), })?; @@ -1100,11 +1128,11 @@ impl Zfs { // Return (true, mounted) if the dataset exists, (false, false) otherwise, // where mounted is if the dataset is mounted. - fn dataset_exists( + async fn dataset_exists( name: &str, mountpoint: &Mountpoint, ) -> Result { - let mut command = std::process::Command::new(ZFS); + let mut command = Command::new(ZFS); let cmd = command.args(&[ "list", "-Hpo", @@ -1112,7 +1140,7 @@ impl Zfs { name, ]); // If the list command returns any valid output, validate it. - if let Ok(output) = execute(cmd) { + if let Ok(output) = execute_async(cmd).await { let stdout = String::from_utf8_lossy(&output.stdout); let values: Vec<&str> = stdout.trim().split('\t').collect(); if &values[..3] != &[name, "filesystem", &mountpoint.to_string()] { @@ -1126,23 +1154,23 @@ impl Zfs { } /// Set the value of an Oxide-managed ZFS property. - pub fn set_oxide_value( + pub async fn set_oxide_value( filesystem_name: &str, name: &str, value: &str, ) -> Result<(), SetValueError> { - Zfs::set_value(filesystem_name, &format!("oxide:{}", name), value) + Zfs::set_value(filesystem_name, &format!("oxide:{}", name), value).await } - fn set_value( + async fn set_value( filesystem_name: &str, name: &str, value: &str, ) -> Result<(), SetValueError> { - Self::set_values(filesystem_name, &[(name, value)]) + Self::set_values(filesystem_name, &[(name, value)]).await } - fn set_values( + async fn set_values( filesystem_name: &str, name_values: &[(K, V)], ) -> Result<(), SetValueError> { @@ -1150,13 +1178,13 @@ impl Zfs { return Ok(()); } - let mut command = std::process::Command::new(PFEXEC); + let mut command = Command::new(PFEXEC); let cmd = command.args(&[ZFS, "set"]); for (name, value) in name_values { cmd.arg(format!("{name}={value}")); } cmd.arg(filesystem_name); - execute(cmd).map_err(|err| SetValueError { + execute_async(cmd).await.map_err(|err| SetValueError { filesystem: filesystem_name.to_string(), values: name_values .iter() @@ -1168,7 +1196,7 @@ impl Zfs { } /// Get the value of an Oxide-managed ZFS property. - pub fn get_oxide_value( + pub async fn get_oxide_value( filesystem_name: &str, name: &str, ) -> Result { @@ -1177,24 +1205,26 @@ impl Zfs { filesystem_name, &[&property], Some(PropertySource::Local), - )?; + ) + .await?; Ok(value) } /// Calls "zfs get" with a single value - pub fn get_value( + pub async fn get_value( filesystem_name: &str, name: &str, ) -> Result { - let [value] = Self::get_values(filesystem_name, &[name], None)?; + let [value] = Self::get_values(filesystem_name, &[name], None).await?; Ok(value) } /// List all extant snapshots. - pub fn list_snapshots() -> Result, ListSnapshotsError> { - let mut command = std::process::Command::new(ZFS); + pub async fn list_snapshots() -> Result, ListSnapshotsError> { + let mut command = Command::new(ZFS); let cmd = command.args(&["list", "-H", "-o", "name", "-t", "snapshot"]); - execute(cmd) + execute_async(cmd) + .await .map(|output| { let stdout = String::from_utf8_lossy(&output.stdout); stdout @@ -1217,36 +1247,40 @@ impl Zfs { /// /// A list of properties, as name-value tuples, may be passed to this /// method, for creating properties directly on the snapshots. - pub fn create_snapshot<'a>( + pub async fn create_snapshot<'a>( filesystem: &'a str, snap_name: &'a str, properties: &'a [(&'a str, &'a str)], ) -> Result<(), CreateSnapshotError> { - let mut command = std::process::Command::new(PFEXEC); + let mut command = Command::new(PFEXEC); let mut cmd = command.args([ZFS, "snapshot"]); for (name, value) in properties.iter() { cmd = cmd.arg("-o").arg(&format!("{name}={value}")); } cmd.arg(&format!("{filesystem}@{snap_name}")); - execute(cmd).map(|_| ()).map_err(|err| CreateSnapshotError { - filesystem: filesystem.to_string(), - snap_name: snap_name.to_string(), - err, + execute_async(cmd).await.map(|_| ()).map_err(|err| { + CreateSnapshotError { + filesystem: filesystem.to_string(), + snap_name: snap_name.to_string(), + err, + } }) } /// Destroy a named snapshot of a filesystem. - pub fn destroy_snapshot( + pub async fn destroy_snapshot( filesystem: &str, snap_name: &str, ) -> Result<(), DestroySnapshotError> { - let mut command = std::process::Command::new(PFEXEC); + let mut command = Command::new(PFEXEC); let path = format!("{filesystem}@{snap_name}"); let cmd = command.args(&[ZFS, "destroy", &path]); - execute(cmd).map(|_| ()).map_err(|err| DestroySnapshotError { - filesystem: filesystem.to_string(), - snap_name: snap_name.to_string(), - err, + execute_async(cmd).await.map(|_| ()).map_err(|err| { + DestroySnapshotError { + filesystem: filesystem.to_string(), + snap_name: snap_name.to_string(), + err, + } }) } @@ -1255,12 +1289,12 @@ impl Zfs { /// - `names`: The properties being acquired /// - `source`: The optioanl property source (origin of the property) /// Defaults to "all sources" when unspecified. - pub fn get_values( + pub async fn get_values( filesystem_name: &str, names: &[&str; N], source: Option, ) -> Result<[String; N], GetValueError> { - let mut cmd = std::process::Command::new(PFEXEC); + let mut cmd = Command::new(PFEXEC); let all_names = names .into_iter() .map(|n| match *n { @@ -1280,11 +1314,12 @@ impl Zfs { } cmd.arg(&all_names); cmd.arg(filesystem_name); - let output = execute(&mut cmd).map_err(|err| GetValueError { - filesystem: filesystem_name.to_string(), - name: format!("{:?}", names), - err: err.into(), - })?; + let output = + execute_async(&mut cmd).await.map_err(|err| GetValueError { + filesystem: filesystem_name.to_string(), + name: format!("{:?}", names), + err: err.into(), + })?; let stdout = String::from_utf8_lossy(&output.stdout); let values = stdout.trim(); @@ -1322,12 +1357,12 @@ impl Snapshot { /// "legacy/.zfs/snapshot/" because the mountpoint of the dataset /// is a "legacy" mount. Additionally a fileystem with no mountpoint will /// have a zfs mountpoint property of "-". - pub fn full_path(&self) -> Result { + pub async fn full_path(&self) -> Result { // TODO (omicron#8023): // When a mountpoint is returned as "legacy" we could go fish around in // "/etc/mnttab". That would probably mean making this function return a // result of an option. - let mountpoint = Zfs::get_value(&self.filesystem, "mountpoint")?; + let mountpoint = Zfs::get_value(&self.filesystem, "mountpoint").await?; Ok(Utf8PathBuf::from(mountpoint) .join(format!(".zfs/snapshot/{}", self.snap_name))) } @@ -1340,18 +1375,19 @@ impl fmt::Display for Snapshot { } /// Returns all datasets managed by Omicron -pub fn get_all_omicron_datasets_for_delete() -> anyhow::Result> { +pub async fn get_all_omicron_datasets_for_delete() -> anyhow::Result> +{ let mut datasets = vec![]; // Collect all datasets within Oxide zpools. // // This includes cockroachdb, clickhouse, and crucible datasets. - let zpools = crate::zpool::Zpool::list()?; + let zpools = crate::zpool::Zpool::list().await?; for pool in &zpools { let internal = pool.kind() == omicron_common::zpool_name::ZpoolKind::Internal; let pool = pool.to_string(); - for dataset in &Zfs::list_datasets(&pool)? { + for dataset in &Zfs::list_datasets(&pool).await? { // Avoid erasing crashdump, backing data and swap datasets on // internal pools. The swap device may be in use. if internal @@ -1366,7 +1402,8 @@ pub fn get_all_omicron_datasets_for_delete() -> anyhow::Result> { } // Collect all datasets for ramdisk-based Oxide zones, if any exist. - if let Ok(ramdisk_datasets) = Zfs::list_datasets(&ZONE_ZFS_RAMDISK_DATASET) + if let Ok(ramdisk_datasets) = + Zfs::list_datasets(&ZONE_ZFS_RAMDISK_DATASET).await { for dataset in &ramdisk_datasets { datasets.push(format!("{}/{dataset}", ZONE_ZFS_RAMDISK_DATASET)); diff --git a/illumos-utils/src/zone.rs b/illumos-utils/src/zone.rs index 8c79185ecb9..c66c214a9ef 100644 --- a/illumos-utils/src/zone.rs +++ b/illumos-utils/src/zone.rs @@ -11,12 +11,13 @@ use ipnetwork::IpNetworkError; use slog::Logger; use slog::info; use std::net::{IpAddr, Ipv6Addr}; +use tokio::process::Command; use crate::ExecutionError; use crate::addrobj::AddrObject; use crate::dladm::{EtherstubVnic, VNIC_PREFIX_BOOTSTRAP, VNIC_PREFIX_CONTROL}; use crate::zpool::PathInPool; -use crate::{PFEXEC, execute}; +use crate::{PFEXEC, execute_async}; use omicron_common::address::SLED_PREFIX; use omicron_uuid_kinds::OmicronZoneUuid; @@ -479,10 +480,10 @@ impl Zones { } /// Returns the name of the VNIC used to communicate with the control plane. - pub fn get_control_interface( + pub async fn get_control_interface( zone: &str, ) -> Result { - let mut command = std::process::Command::new(PFEXEC); + let mut command = Command::new(PFEXEC); let cmd = command.args(&[ ZLOGIN, zone, @@ -492,7 +493,7 @@ impl Zones { "-o", "LINK", ]); - let output = execute(cmd).map_err(|err| { + let output = execute_async(cmd).await.map_err(|err| { GetControlInterfaceError::Execution { zone: zone.to_string(), err } })?; String::from_utf8_lossy(&output.stdout) @@ -510,10 +511,10 @@ impl Zones { } /// Returns the name of the VNIC used to communicate with the bootstrap network. - pub fn get_bootstrap_interface( + pub async fn get_bootstrap_interface( zone: &str, ) -> Result, GetBootstrapInterfaceError> { - let mut command = std::process::Command::new(PFEXEC); + let mut command = Command::new(PFEXEC); let cmd = command.args(&[ ZLOGIN, zone, @@ -523,7 +524,7 @@ impl Zones { "-o", "LINK", ]); - let output = execute(cmd).map_err(|err| { + let output = execute_async(cmd).await.map_err(|err| { GetBootstrapInterfaceError::Execution { zone: zone.to_string(), err, @@ -557,13 +558,14 @@ impl Zones { /// This address may be optionally within a zone `zone`. /// If `None` is supplied, the address is queried from the Global Zone. #[allow(clippy::needless_lifetimes)] - pub fn ensure_address<'a>( + pub async fn ensure_address<'a>( zone: Option<&'a str>, addrobj: &AddrObject, addrtype: AddressRequest, ) -> Result { - |zone, addrobj, addrtype| -> Result { - match Self::get_address_impl(zone, addrobj) { + #[allow(clippy::redundant_closure_call)] + async |zone, addrobj, addrtype| -> Result { + match Self::get_address_impl(zone, addrobj).await { Ok(addr) => { if let AddressRequest::Static(expected_addr) = addrtype { // If the address is static, we need to validate that it @@ -572,19 +574,23 @@ impl Zones { // If the address doesn't match, try removing the old // value before using the new one. Self::delete_address(zone, addrobj) + .await .map_err(|e| anyhow!(e))?; return Self::create_address( zone, addrobj, addrtype, ) + .await .map_err(|e| anyhow!(e)); } } Ok(addr) } Err(_) => Self::create_address(zone, addrobj, addrtype) + .await .map_err(|e| anyhow!(e)), } }(zone, addrobj, addrtype) + .await .map_err(|err| EnsureAddressError { zone: zone.unwrap_or("global").to_string(), request: addrtype, @@ -598,14 +604,16 @@ impl Zones { /// This `addrobj` may optionally be within a zone named `zone`. /// If `None` is supplied, the address is queried from the Global Zone. #[allow(clippy::needless_lifetimes)] - pub fn get_address<'a>( + pub async fn get_address<'a>( zone: Option<&'a str>, addrobj: &AddrObject, ) -> Result { - Self::get_address_impl(zone, addrobj).map_err(|err| GetAddressError { - zone: zone.unwrap_or("global").to_string(), - name: addrobj.clone(), - err: anyhow!(err), + Self::get_address_impl(zone, addrobj).await.map_err(|err| { + GetAddressError { + zone: zone.unwrap_or("global").to_string(), + name: addrobj.clone(), + err: anyhow!(err), + } }) } @@ -614,11 +622,11 @@ impl Zones { /// This address may optionally be within a zone named `zone`. /// If `None` is supplied, the address is queried from the Global Zone. #[allow(clippy::needless_lifetimes)] - fn get_address_impl<'a>( + async fn get_address_impl<'a>( zone: Option<&'a str>, addrobj: &AddrObject, ) -> Result { - let mut command = std::process::Command::new(PFEXEC); + let mut command = Command::new(PFEXEC); let mut args = vec![]; if let Some(zone) = zone { @@ -629,7 +637,7 @@ impl Zones { args.extend(&[IPADM, "show-addr", "-p", "-o", "ADDR", &addrobj_str]); let cmd = command.args(args); - let output = execute(cmd)?; + let output = execute_async(cmd).await?; String::from_utf8_lossy(&output.stdout) .lines() .find_map(|s| parse_ip_network(s).ok()) @@ -641,11 +649,11 @@ impl Zones { /// This `addrobj` may optionally be within a zone named `zone`. /// If `None` is supplied, the address is queried from the Global Zone. #[allow(clippy::needless_lifetimes)] - pub fn get_all_addresses<'a>( + pub async fn get_all_addresses<'a>( zone: Option<&'a str>, addrobj: &AddrObject, ) -> Result, GetAddressesError> { - let mut command = std::process::Command::new(PFEXEC); + let mut command = Command::new(PFEXEC); let mut args = vec![]; if let Some(zone) = zone { @@ -656,11 +664,12 @@ impl Zones { args.extend(&[IPADM, "show-addr", "-p", "-o", "ADDR", &addrobj_str]); let cmd = command.args(args); - let output = execute(cmd).map_err(|err| GetAddressesError { - zone: zone.unwrap_or("global").to_string(), - name: addrobj.clone(), - err: err.into(), - })?; + let output = + execute_async(cmd).await.map_err(|err| GetAddressesError { + zone: zone.unwrap_or("global").to_string(), + name: addrobj.clone(), + err: err.into(), + })?; Ok(String::from_utf8_lossy(&output.stdout) .lines() .filter_map(|s| s.parse().ok()) @@ -672,11 +681,11 @@ impl Zones { /// Zone may either be `Some(zone)` for a non-global zone, or `None` to /// run the command in the Global zone. #[allow(clippy::needless_lifetimes)] - fn has_link_local_v6_address<'a>( + async fn has_link_local_v6_address<'a>( zone: Option<&'a str>, addrobj: &AddrObject, ) -> Result<(), Error> { - let mut command = std::process::Command::new(PFEXEC); + let mut command = Command::new(PFEXEC); let prefix = if let Some(zone) = zone { vec![ZLOGIN, zone] } else { vec![] }; @@ -687,7 +696,7 @@ impl Zones { let args = prefix.iter().chain(show_addr_args); let cmd = command.args(args); - let output = execute(cmd)?; + let output = execute_async(cmd).await?; if let Some(_) = String::from_utf8_lossy(&output.stdout) .lines() .find(|s| s.trim() == "addrconf") @@ -701,12 +710,12 @@ impl Zones { // // Does NOT check if the address already exists. #[allow(clippy::needless_lifetimes)] - pub fn create_address_internal<'a>( + pub async fn create_address_internal<'a>( zone: Option<&'a str>, addrobj: &AddrObject, addrtype: AddressRequest, ) -> Result<(), crate::ExecutionError> { - let mut command = std::process::Command::new(PFEXEC); + let mut command = Command::new(PFEXEC); let mut args = vec![]; if let Some(zone) = zone { args.push(ZLOGIN.to_string()); @@ -732,7 +741,7 @@ impl Zones { args.push(addrobj.to_string()); let cmd = command.args(args); - execute(cmd)?; + execute_async(cmd).await?; Ok(()) } @@ -742,7 +751,7 @@ impl Zones { /// This method attempts to be idempotent: deleting a nonexistent address /// object returns `Ok(())`. #[allow(clippy::needless_lifetimes)] - pub fn delete_address<'a>( + pub async fn delete_address<'a>( zone: Option<&'a str>, addrobj: &AddrObject, ) -> Result<(), DeleteAddressError> { @@ -752,7 +761,7 @@ impl Zones { const OBJECT_NOT_FOUND: &str = "could not delete address: Object not found"; - let mut command = std::process::Command::new(PFEXEC); + let mut command = Command::new(PFEXEC); let mut args = vec![]; if let Some(zone) = zone { args.push(ZLOGIN.to_string()); @@ -764,7 +773,7 @@ impl Zones { args.push(addrobj.to_string()); let cmd = command.args(args); - match execute(cmd) { + match execute_async(cmd).await { Ok(_) => Ok(()), Err(ExecutionError::CommandFailure(err)) if err.stderr.contains(OBJECT_NOT_FOUND) => @@ -787,16 +796,16 @@ impl Zones { /// For more context, see: /// #[allow(clippy::needless_lifetimes)] - pub fn ensure_has_link_local_v6_address<'a>( + pub async fn ensure_has_link_local_v6_address<'a>( zone: Option<&'a str>, addrobj: &AddrObject, ) -> Result<(), crate::ExecutionError> { - if let Ok(()) = Self::has_link_local_v6_address(zone, &addrobj) { + if let Ok(()) = Self::has_link_local_v6_address(zone, &addrobj).await { return Ok(()); } // No link-local address was found, attempt to make one. - let mut command = std::process::Command::new(PFEXEC); + let mut command = Command::new(PFEXEC); let prefix = if let Some(zone) = zone { vec![ZLOGIN, zone] } else { vec![] }; @@ -812,7 +821,7 @@ impl Zones { let args = prefix.iter().chain(create_addr_args); let cmd = command.args(args); - execute(cmd)?; + execute_async(cmd).await?; Ok(()) } @@ -821,20 +830,22 @@ impl Zones { // from RSS. Edit to this TODO: we still need this for the bootstrap network // (which exists pre-RSS), but we should remove all uses of it other than // the bootstrap agent. - pub fn ensure_has_global_zone_v6_address( + pub async fn ensure_has_global_zone_v6_address( link: EtherstubVnic, address: Ipv6Addr, name: &str, ) -> Result<(), EnsureGzAddressError> { // Call the guts of this function within a closure to make it easier // to wrap the error with appropriate context. - |link: EtherstubVnic, address, name| -> Result<(), anyhow::Error> { + #[allow(clippy::redundant_closure_call)] + async |link: EtherstubVnic, address, name| -> Result<(), anyhow::Error> { let gz_link_local_addrobj = AddrObject::link_local(&link.0) .map_err(|err| anyhow!(err))?; Self::ensure_has_link_local_v6_address( None, &gz_link_local_addrobj, ) + .await .map_err(|err| anyhow!(err))?; // Ensure that a static IPv6 address has been allocated to the @@ -853,9 +864,11 @@ impl Zones { Some(omicron_common::address::SLED_PREFIX), ), ) + .await .map_err(|err| anyhow!(err))?; Ok(()) }(link.clone(), address, name) + .await .map_err(|err| EnsureGzAddressError { address: IpAddr::V6(address), link: link.0.clone(), @@ -883,7 +896,7 @@ impl Zones { // Creates an IP address within a Zone. #[allow(clippy::needless_lifetimes)] - fn create_address<'a>( + async fn create_address<'a>( zone: Option<&'a str>, addrobj: &AddrObject, addrtype: AddressRequest, @@ -905,16 +918,17 @@ impl Zones { Self::ensure_has_link_local_v6_address( Some(zone), &link_local_addrobj, - )?; + ) + .await?; } } } }; // Actually perform address allocation. - Self::create_address_internal(zone, addrobj, addrtype)?; + Self::create_address_internal(zone, addrobj, addrtype).await?; - Self::get_address_impl(zone, addrobj) + Self::get_address_impl(zone, addrobj).await } } diff --git a/illumos-utils/src/zpool.rs b/illumos-utils/src/zpool.rs index 1a5e113b14e..6fee2596d29 100644 --- a/illumos-utils/src/zpool.rs +++ b/illumos-utils/src/zpool.rs @@ -4,9 +4,10 @@ //! Utilities for managing Zpools. -use crate::{ExecutionError, PFEXEC, execute}; +use crate::{ExecutionError, PFEXEC, execute_async}; use camino::{Utf8Path, Utf8PathBuf}; use std::str::FromStr; +use tokio::process::Command; pub use omicron_common::zpool_name::ZpoolName; @@ -203,29 +204,30 @@ pub struct Zpool(()); /// Describes the API for interfacing with zpools /// /// This is a trait so that it can be faked out for tests. +#[async_trait::async_trait] pub trait Api: Send + Sync { - fn create( + async fn create( &self, name: &ZpoolName, vdev: &Utf8Path, ) -> Result<(), CreateError> { - let mut cmd = std::process::Command::new(PFEXEC); + let mut cmd = Command::new(PFEXEC); cmd.env_clear(); cmd.env("LC_ALL", "C.UTF-8"); cmd.arg(ZPOOL).args(["create", "-o", "ashift=12"]); cmd.arg(&name.to_string()); cmd.arg(vdev); - execute(&mut cmd).map_err(Error::from)?; + execute_async(&mut cmd).await.map_err(Error::from)?; // Ensure that this zpool has the encryption feature enabled - let mut cmd = std::process::Command::new(PFEXEC); + let mut cmd = Command::new(PFEXEC); cmd.env_clear(); cmd.env("LC_ALL", "C.UTF-8"); cmd.arg(ZPOOL) .arg("set") .arg("feature@encryption=enabled") .arg(&name.to_string()); - execute(&mut cmd).map_err(Error::from)?; + execute_async(&mut cmd).await.map_err(Error::from)?; Ok(()) } @@ -238,23 +240,23 @@ impl Zpool { Self(()) } - pub fn destroy(name: &ZpoolName) -> Result<(), DestroyError> { - let mut cmd = std::process::Command::new(PFEXEC); + pub async fn destroy(name: &ZpoolName) -> Result<(), DestroyError> { + let mut cmd = Command::new(PFEXEC); cmd.env_clear(); cmd.env("LC_ALL", "C.UTF-8"); cmd.arg(ZPOOL).arg("destroy"); cmd.arg(&name.to_string()); - execute(&mut cmd).map_err(Error::from)?; + execute_async(&mut cmd).await.map_err(Error::from)?; Ok(()) } - pub fn import(name: &ZpoolName) -> Result<(), Error> { - let mut cmd = std::process::Command::new(PFEXEC); + pub async fn import(name: &ZpoolName) -> Result<(), Error> { + let mut cmd = Command::new(PFEXEC); cmd.env_clear(); cmd.env("LC_ALL", "C.UTF-8"); cmd.arg(ZPOOL).arg("import").arg("-f"); cmd.arg(&name.to_string()); - match execute(&mut cmd) { + match execute_async(&mut cmd).await { Ok(_) => Ok(()), Err(ExecutionError::CommandFailure(err_info)) => { // I'd really prefer to match on a specific error code, but the @@ -272,34 +274,34 @@ impl Zpool { } } - pub fn export(name: &ZpoolName) -> Result<(), Error> { - let mut cmd = std::process::Command::new(PFEXEC); + pub async fn export(name: &ZpoolName) -> Result<(), Error> { + let mut cmd = Command::new(PFEXEC); cmd.env_clear(); cmd.env("LC_ALL", "C.UTF-8"); cmd.arg(ZPOOL).arg("export").arg(&name.to_string()); - execute(&mut cmd)?; + execute_async(&mut cmd).await?; Ok(()) } /// `zpool set failmode=continue ` - pub fn set_failmode_continue(name: &ZpoolName) -> Result<(), Error> { - let mut cmd = std::process::Command::new(PFEXEC); + pub async fn set_failmode_continue(name: &ZpoolName) -> Result<(), Error> { + let mut cmd = Command::new(PFEXEC); cmd.env_clear(); cmd.env("LC_ALL", "C.UTF-8"); cmd.arg(ZPOOL) .arg("set") .arg("failmode=continue") .arg(&name.to_string()); - execute(&mut cmd)?; + execute_async(&mut cmd).await?; Ok(()) } - pub fn list() -> Result, ListError> { - let mut command = std::process::Command::new(ZPOOL); + pub async fn list() -> Result, ListError> { + let mut command = Command::new(ZPOOL); let cmd = command.args(&["list", "-Hpo", "name"]); - let output = execute(cmd).map_err(Error::from)?; + let output = execute_async(cmd).await.map_err(Error::from)?; let stdout = String::from_utf8_lossy(&output.stdout); let zpool = stdout .lines() @@ -309,8 +311,8 @@ impl Zpool { } #[cfg_attr(test, allow(dead_code))] - pub fn get_info(name: &str) -> Result { - let mut command = std::process::Command::new(ZPOOL); + pub async fn get_info(name: &str) -> Result { + let mut command = Command::new(ZPOOL); let cmd = command.args(&[ "list", "-Hpo", @@ -318,7 +320,7 @@ impl Zpool { name, ]); - let output = execute(cmd).map_err(|err| GetInfoError { + let output = execute_async(cmd).await.map_err(|err| GetInfoError { name: name.to_string(), err: err.into(), })?; diff --git a/installinator/src/bootstrap.rs b/installinator/src/bootstrap.rs index b9e58fbcdac..9c592e10c8b 100644 --- a/installinator/src/bootstrap.rs +++ b/installinator/src/bootstrap.rs @@ -45,6 +45,7 @@ pub(crate) async fn bootstrap_sled( let mg_addr_objs = underlay::ensure_links_have_global_zone_link_local_v6_addresses(&links) + .await .context("failed to create address objects for maghemite")?; info!(log, "Starting mg-ddm service"); @@ -78,6 +79,7 @@ pub(crate) async fn bootstrap_sled( ip, "bootstrap6", ) + .await .context("failed to create v6 address for bootstrap etherstub vnic")?; // Spawn a background task to notify our local ddmd of our bootstrap address diff --git a/installinator/src/write.rs b/installinator/src/write.rs index 96b7c0aa49e..84b88a5cb8a 100644 --- a/installinator/src/write.rs +++ b/installinator/src/write.rs @@ -750,7 +750,7 @@ impl ControlPlaneZoneWriteContext<'_> { std::mem::drop(output_directory); if let Some(zpool) = zpool { - Zpool::export(zpool)?; + Zpool::export(zpool).await?; } StepSuccess::new(()).into() diff --git a/package/src/bin/omicron-package.rs b/package/src/bin/omicron-package.rs index 52ceb7a99ab..012555beb3c 100644 --- a/package/src/bin/omicron-package.rs +++ b/package/src/bin/omicron-package.rs @@ -597,8 +597,8 @@ async fn uninstall_all_omicron_zones() -> Result<()> { Ok(()) } -fn uninstall_all_omicron_datasets(config: &Config) -> Result<()> { - let datasets = match zfs::get_all_omicron_datasets_for_delete() { +async fn uninstall_all_omicron_datasets(config: &Config) -> Result<()> { + let datasets = match zfs::get_all_omicron_datasets_for_delete().await { Err(e) => { warn!(config.log(), "Failed to get omicron datasets: {}", e); return Err(e); @@ -616,7 +616,7 @@ fn uninstall_all_omicron_datasets(config: &Config) -> Result<()> { ))?; for dataset in &datasets { info!(config.log(), "Deleting dataset: {dataset}"); - zfs::Zfs::destroy_dataset(dataset)?; + zfs::Zfs::destroy_dataset(dataset).await?; } Ok(()) @@ -702,7 +702,7 @@ async fn do_deactivate(config: &Config) -> Result<()> { async fn do_uninstall(config: &Config) -> Result<()> { do_deactivate(config).await?; info!(config.log(), "Removing datasets"); - uninstall_all_omicron_datasets(config)?; + uninstall_all_omicron_datasets(config).await?; Ok(()) } diff --git a/sled-agent/config-reconciler/src/dataset_serialization_task.rs b/sled-agent/config-reconciler/src/dataset_serialization_task.rs index 096f7f66985..982a359fcfb 100644 --- a/sled-agent/config-reconciler/src/dataset_serialization_task.rs +++ b/sled-agent/config-reconciler/src/dataset_serialization_task.rs @@ -930,41 +930,12 @@ impl ZfsImpl for RealZfs { &self, args: DatasetEnsureArgs<'_>, ) -> Result<(), DatasetEnsureError> { - // Unpack `args` so we can clone `name` so we can move it into the - // closure below so that we can safely use `spawn_blocking`. We could - // also consider changing `DatasetEnsureArgs` to hold a `String` instead - // of a `&str`... - let DatasetEnsureArgs { - name, - mountpoint, - can_mount, - zoned, - encryption_details, - size_details, - id, - additional_options, - } = args; - let name = name.to_owned(); - let full_name = name.clone(); - - tokio::task::spawn_blocking(move || { - let args = DatasetEnsureArgs { - name: &name, - mountpoint, - can_mount, - zoned, - encryption_details, - size_details, - id, - additional_options, - }; - Zfs::ensure_dataset(args) - }) - .await - .expect("blocking closure did not panic") - .map_err(|err| DatasetEnsureError::EnsureFailed { - name: full_name, - err, + let full_name = args.name; + Zfs::ensure_dataset(args).await.map_err(|err| { + DatasetEnsureError::EnsureFailed { + name: full_name.to_string(), + err, + } }) } @@ -974,21 +945,16 @@ impl ZfsImpl for RealZfs { mountpoint: Mountpoint, ) -> Result<(), NestedDatasetMountError> { let name_cloned = name.clone(); - tokio::task::spawn_blocking(move || { - Zfs::ensure_dataset_mounted_and_exists(&name_cloned, &mountpoint) - }) - .await - .expect("blocking closure did not panic") - .map_err(|err| NestedDatasetMountError::MountFailed { name, err }) + Zfs::ensure_dataset_mounted_and_exists(&name_cloned, &mountpoint) + .await + .map_err(|err| NestedDatasetMountError::MountFailed { name, err }) } async fn destroy_dataset( &self, name: String, ) -> Result<(), DestroyDatasetError> { - tokio::task::spawn_blocking(move || Zfs::destroy_dataset(&name)) - .await - .expect("blocking closure did not panic") + Zfs::destroy_dataset(&name).await } async fn get_dataset_properties( @@ -997,11 +963,7 @@ impl ZfsImpl for RealZfs { which: WhichDatasets, ) -> anyhow::Result> { let datasets = datasets.to_vec(); - tokio::task::spawn_blocking(move || { - Zfs::get_dataset_properties(&datasets, which) - }) - .await - .expect("blocking closure did not panic") + Zfs::get_dataset_properties(&datasets, which).await } } diff --git a/sled-agent/config-reconciler/src/dump_setup.rs b/sled-agent/config-reconciler/src/dump_setup.rs index 7210d4168a8..6e3bd8fc26c 100644 --- a/sled-agent/config-reconciler/src/dump_setup.rs +++ b/sled-agent/config-reconciler/src/dump_setup.rs @@ -272,6 +272,7 @@ impl DumpSetup { let name = disk.zpool_name(); if let Ok(info) = illumos_utils::zpool::Zpool::get_info(&name.to_string()) + .await { if info.health() == ZpoolHealth::Online { m2_core_datasets.push(CoreZpool { @@ -290,6 +291,7 @@ impl DumpSetup { let name = disk.zpool_name(); if let Ok(info) = illumos_utils::zpool::Zpool::get_info(&name.to_string()) + .await { if info.health() == ZpoolHealth::Online { u2_debug_datasets.push(DebugZpool { diff --git a/sled-agent/src/backing_fs.rs b/sled-agent/src/backing_fs.rs index b6c5e5ce6bd..ac6e625512a 100644 --- a/sled-agent/src/backing_fs.rs +++ b/sled-agent/src/backing_fs.rs @@ -118,7 +118,7 @@ const BACKINGFS: [BackingFs; BACKINGFS_COUNT] = /// Ensure that the backing filesystems are mounted. /// If the underlying dataset for a backing fs does not exist on the specified /// boot disk then it will be created. -pub(crate) fn ensure_backing_fs( +pub(crate) async fn ensure_backing_fs( log: &slog::Logger, boot_zpool_name: &illumos_utils::zpool::ZpoolName, ) -> Result<(), BackingFsError> { @@ -153,7 +153,8 @@ pub(crate) fn ensure_backing_fs( size_details, id: None, additional_options: None, - })?; + }) + .await?; // Check if a ZFS filesystem is already mounted on bfs.mountpoint by // retrieving the ZFS `mountpoint` property and comparing it. This @@ -164,6 +165,7 @@ pub(crate) fn ensure_backing_fs( // filesystem mounted there - most likely we are running with a non-ZFS // root, such as when net booted during CI. if Zfs::get_value(&bfs.mountpoint, "mountpoint") + .await .unwrap_or("not-zfs".to_string()) == bfs.mountpoint { @@ -182,7 +184,7 @@ pub(crate) fn ensure_backing_fs( info!(log, "Mounting {} on {}", dataset, mountpoint); - Zfs::mount_overlay_dataset(&dataset)?; + Zfs::mount_overlay_dataset(&dataset).await?; if let Some(subdirs) = bfs.subdirs { for dir in subdirs { diff --git a/sled-agent/src/bootstrap/pre_server.rs b/sled-agent/src/bootstrap/pre_server.rs index 7a56c187e4e..d5b7553d54b 100644 --- a/sled-agent/src/bootstrap/pre_server.rs +++ b/sled-agent/src/bootstrap/pre_server.rs @@ -85,7 +85,7 @@ impl BootstrapAgentStartup { // Before we create the switch zone, we need to ensure that the // necessary ZFS and Zone resources are ready. All other zones // are created on U.2 drives. - ensure_zfs_ramdisk_dataset()?; + ensure_zfs_ramdisk_dataset().await?; Ok::<_, StartError>((config, log, startup_networking)) }) @@ -279,7 +279,7 @@ fn ensure_zfs_key_directory_exists(log: &Logger) -> Result<(), StartError> { }) } -fn ensure_zfs_ramdisk_dataset() -> Result<(), StartError> { +async fn ensure_zfs_ramdisk_dataset() -> Result<(), StartError> { Zfs::ensure_dataset(zfs::DatasetEnsureArgs { name: zfs::ZONE_ZFS_RAMDISK_DATASET, mountpoint: zfs::Mountpoint(Utf8PathBuf::from( @@ -292,6 +292,7 @@ fn ensure_zfs_ramdisk_dataset() -> Result<(), StartError> { id: None, additional_options: None, }) + .await .map_err(StartError::EnsureZfsRamdiskDataset) } @@ -372,7 +373,8 @@ impl BootstrapNetworking { bootstrap_etherstub_vnic.clone(), global_zone_bootstrap_ip, "bootstrap6", - )?; + ) + .await?; let global_zone_bootstrap_link_local_address = Zones::get_address( None, @@ -380,7 +382,8 @@ impl BootstrapNetworking { // malformed, but we just got it from `Dladm`, so we know it's // valid. &AddrObject::link_local(&bootstrap_etherstub_vnic.0).unwrap(), - )?; + ) + .await?; // Convert the `IpNetwork` down to just the IP address. let global_zone_bootstrap_link_local_ip = diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs index 761a39ccbbf..288f35d2a33 100644 --- a/sled-agent/src/bootstrap/server.rs +++ b/sled-agent/src/bootstrap/server.rs @@ -686,10 +686,11 @@ impl Inner { log: &Logger, ) -> Result<(), BootstrapError> { let datasets = zfs::get_all_omicron_datasets_for_delete() + .await .map_err(BootstrapError::ZfsDatasetsList)?; for dataset in &datasets { info!(log, "Removing dataset: {dataset}"); - zfs::Zfs::destroy_dataset(dataset)?; + zfs::Zfs::destroy_dataset(dataset).await?; } Ok(()) diff --git a/sled-agent/src/long_running_tasks.rs b/sled-agent/src/long_running_tasks.rs index ce0c3b3f6fe..216961072e5 100644 --- a/sled-agent/src/long_running_tasks.rs +++ b/sled-agent/src/long_running_tasks.rs @@ -117,7 +117,8 @@ pub async fn spawn_all_longrunning_tasks( ) .await; - let zone_bundler = spawn_zone_bundler_tasks(log, &mut storage_manager); + let zone_bundler = + spawn_zone_bundler_tasks(log, &mut storage_manager).await; let zone_image_resolver = make_zone_image_resolver(log, &all_disks, &boot_zpool); @@ -237,13 +238,14 @@ async fn spawn_bootstore_tasks( } // `ZoneBundler::new` spawns a periodic cleanup task that runs indefinitely -fn spawn_zone_bundler_tasks( +async fn spawn_zone_bundler_tasks( log: &Logger, storage_handle: &mut StorageHandle, ) -> ZoneBundler { info!(log, "Starting ZoneBundler related tasks"); let log = log.new(o!("component" => "ZoneBundler")); ZoneBundler::new(log, storage_handle.clone(), CleanupContext::default()) + .await } fn make_zone_image_resolver( diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 3b30746da4f..cd8e977dc23 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -2529,6 +2529,7 @@ impl ServiceManager { *gz_address, &addr_name, ) + .await .map_err(|err| Error::GzAddress { message: format!( "Failed to create address {} for Internal DNS zone", @@ -3941,7 +3942,7 @@ impl ServiceManager { &internal_dns_addrobj_name(*gz_address_index), ) .expect("internal DNS address object name is well-formed"); - Zones::delete_address(None, &addrobj).map_err(|err| { + Zones::delete_address(None, &addrobj).await.map_err(|err| { Error::ZoneCleanup { zone_name: zone.zone_name(), err: Box::new(err), @@ -3979,6 +3980,7 @@ impl ServiceManager { &["zoned", "canmount", "encryption"], None, ) + .await .map_err(|err| Error::GetZfsValue { zone: zone.zone_name(), source: err, diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 7071b1a1b4b..fa2aa963973 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -419,7 +419,8 @@ impl SledAgent { sled_diagnostics::LogsHandle::new( log.new(o!("component" => "sled-diagnostics-cleanup")), ) - .cleanup_snapshots(); + .cleanup_snapshots() + .await; let storage_manager = &long_running_task_handles.storage_manager; let boot_disk = storage_manager @@ -447,7 +448,7 @@ impl SledAgent { } info!(log, "Mounting backing filesystems"); - crate::backing_fs::ensure_backing_fs(&parent_log, &boot_disk.1)?; + crate::backing_fs::ensure_backing_fs(&parent_log, &boot_disk.1).await?; // TODO-correctness Bootstrap-agent already ensures the underlay // etherstub and etherstub VNIC exist on startup - could it pass them @@ -468,6 +469,7 @@ impl SledAgent { *sled_address.ip(), "sled6", ) + .await .map_err(|err| Error::SledSubnet { err })?; // Initialize the xde kernel driver with the underlay devices. @@ -1371,6 +1373,7 @@ impl SledAgent { for zpool in all_disks.all_u2_zpools() { let info = match illumos_utils::zpool::Zpool::get_info(&zpool.to_string()) + .await { Ok(info) => info, Err(err) => { diff --git a/sled-agent/src/support_bundle/logs.rs b/sled-agent/src/support_bundle/logs.rs index 268f2b64835..3467c8663d0 100644 --- a/sled-agent/src/support_bundle/logs.rs +++ b/sled-agent/src/support_bundle/logs.rs @@ -88,15 +88,14 @@ impl<'a> SupportBundleLogs<'a> { let log = self.log.clone(); let zone = zone.into(); - let zip_file = tokio::task::spawn_blocking(move || { + let zip_file = { let handle = sled_diagnostics::LogsHandle::new(log); - match handle.get_zone_logs(&zone, max_rotated, &mut tempfile) { + match handle.get_zone_logs(&zone, max_rotated, &mut tempfile).await + { Ok(_) => Ok(tempfile), Err(e) => Err(e), } - }) - .await - .map_err(Error::Join)? + } .map_err(Error::Logs)?; // Since we are using a tempfile and the file path has already been diff --git a/sled-agent/src/support_bundle/storage.rs b/sled-agent/src/support_bundle/storage.rs index 9101e090725..4e3d781e146 100644 --- a/sled-agent/src/support_bundle/storage.rs +++ b/sled-agent/src/support_bundle/storage.rs @@ -135,7 +135,7 @@ pub trait LocalStorage: Sync { async fn dyn_datasets_config_list(&self) -> Result; /// Returns properties about a dataset - fn dyn_dataset_get( + async fn dyn_dataset_get( &self, dataset_name: &String, ) -> Result; @@ -180,7 +180,7 @@ impl LocalStorage for StorageHandle { self.datasets_config_list().await.map_err(|err| err.into()) } - fn dyn_dataset_get( + async fn dyn_dataset_get( &self, dataset_name: &String, ) -> Result { @@ -188,6 +188,7 @@ impl LocalStorage for StorageHandle { &[dataset_name.clone()], illumos_utils::zfs::WhichDatasets::SelfOnly, ) + .await .map_err(|err| Error::DatasetLookup(err))? .pop() else { // This should not be possible, unless the "zfs get" command is @@ -246,7 +247,7 @@ impl LocalStorage for crate::sim::Storage { self.lock().datasets_config_list().map_err(|err| err.into()) } - fn dyn_dataset_get( + async fn dyn_dataset_get( &self, dataset_name: &String, ) -> Result { @@ -457,7 +458,7 @@ impl<'a> SupportBundleManager<'a> { .ok_or_else(|| Error::DatasetNotFound)?; let dataset_props = - self.storage.dyn_dataset_get(&dataset.name.full_name())?; + self.storage.dyn_dataset_get(&dataset.name.full_name()).await?; if !dataset_props.mounted { return Err(Error::DatasetNotMounted { dataset: dataset.name.clone(), diff --git a/sled-agent/src/zone_bundle.rs b/sled-agent/src/zone_bundle.rs index 982aaf5d719..3430344f0b7 100644 --- a/sled-agent/src/zone_bundle.rs +++ b/sled-agent/src/zone_bundle.rs @@ -67,54 +67,57 @@ const ZONE_BUNDLE_ZFS_PROPERTY_VALUE: &'static str = "true"; // This deletes any snapshots matching the names we expect to create ourselves // during bundling. #[cfg(not(test))] -fn initialize_zfs_resources(log: &Logger) -> Result<(), BundleError> { - let zb_snapshots = - Zfs::list_snapshots().unwrap().into_iter().filter(|snap| { - // Check for snapshots named how we expect to create them. - if snap.snap_name != ZONE_ROOT_SNAPSHOT_NAME - || !snap.snap_name.starts_with(ARCHIVE_SNAPSHOT_PREFIX) - { - return false; - } +async fn initialize_zfs_resources(log: &Logger) -> Result<(), BundleError> { + let mut zb_snapshots = Vec::new(); + for snap in Zfs::list_snapshots().await.unwrap().into_iter() { + // Check for snapshots named how we expect to create them. + if snap.snap_name != ZONE_ROOT_SNAPSHOT_NAME + || !snap.snap_name.starts_with(ARCHIVE_SNAPSHOT_PREFIX) + { + continue; + } - // Additionally check for the zone-bundle-specific property. - // - // If we find a dataset that matches our names, but which _does not_ - // have such a property (or has in invalid property), we'll log it - // but avoid deleting the snapshot. - let name = snap.to_string(); - let Ok([value]) = Zfs::get_values( - &name, - &[ZONE_BUNDLE_ZFS_PROPERTY_NAME], - Some(illumos_utils::zfs::PropertySource::Local), - ) else { - warn!( - log, - "Found a ZFS snapshot with a name reserved for zone \ - bundling, but which does not have the zone-bundle-specific \ - property. Bailing out, rather than risking deletion of \ - user data."; - "snap_name" => &name, - "property" => ZONE_BUNDLE_ZFS_PROPERTY_NAME - ); - return false; - }; - if value != ZONE_BUNDLE_ZFS_PROPERTY_VALUE { - warn!( - log, - "Found a ZFS snapshot with a name reserved for zone \ - bundling, with an unexpected property value. \ - Bailing out, rather than risking deletion of user data."; - "snap_name" => &name, - "property" => ZONE_BUNDLE_ZFS_PROPERTY_NAME, - "property_value" => value, - ); - return false; - } - true - }); + // Additionally check for the zone-bundle-specific property. + // + // If we find a dataset that matches our names, but which _does not_ + // have such a property (or has in invalid property), we'll log it + // but avoid deleting the snapshot. + let name = snap.to_string(); + let Ok([value]) = Zfs::get_values( + &name, + &[ZONE_BUNDLE_ZFS_PROPERTY_NAME], + Some(illumos_utils::zfs::PropertySource::Local), + ) + .await + else { + warn!( + log, + "Found a ZFS snapshot with a name reserved for zone \ + bundling, but which does not have the zone-bundle-specific \ + property. Bailing out, rather than risking deletion of \ + user data."; + "snap_name" => &name, + "property" => ZONE_BUNDLE_ZFS_PROPERTY_NAME + ); + continue; + }; + if value != ZONE_BUNDLE_ZFS_PROPERTY_VALUE { + warn!( + log, + "Found a ZFS snapshot with a name reserved for zone \ + bundling, with an unexpected property value. \ + Bailing out, rather than risking deletion of user data."; + "snap_name" => &name, + "property" => ZONE_BUNDLE_ZFS_PROPERTY_NAME, + "property_value" => value, + ); + continue; + } + zb_snapshots.push(snap); + } for snapshot in zb_snapshots { - Zfs::destroy_snapshot(&snapshot.filesystem, &snapshot.snap_name)?; + Zfs::destroy_snapshot(&snapshot.filesystem, &snapshot.snap_name) + .await?; debug!( log, "destroyed pre-existing zone bundle snapshot"; @@ -231,7 +234,7 @@ impl ZoneBundler { /// This creates an object that manages zone bundles on the system. It can /// be used to create bundles from running zones, and runs a periodic task /// to clean them up to free up space. - pub fn new( + pub async fn new( log: Logger, storage_handle: StorageHandle, cleanup_context: CleanupContext, @@ -248,6 +251,7 @@ impl ZoneBundler { // temporary directory. #[cfg(not(test))] initialize_zfs_resources(&log) + .await .expect("Failed to initialize existing ZFS resources"); let notify_cleanup = Arc::new(Notify::new()); let inner = Arc::new(Mutex::new(Inner { @@ -636,7 +640,7 @@ fn insert_data( } // Create a read-only snapshot from an existing filesystem. -fn create_snapshot( +async fn create_snapshot( log: &Logger, filesystem: &str, snap_name: &str, @@ -645,7 +649,8 @@ fn create_snapshot( filesystem, snap_name, &[(ZONE_BUNDLE_ZFS_PROPERTY_NAME, ZONE_BUNDLE_ZFS_PROPERTY_VALUE)], - )?; + ) + .await?; debug!( log, "created snapshot"; @@ -690,15 +695,15 @@ fn create_snapshot( // At this point, we operate entirely on those snapshots. We search for // "current" log files in the root snapshot, and archived log files in the // archive snapshots. -fn create_zfs_snapshots( +async fn create_zfs_snapshots( log: &Logger, zone: &RunningZone, extra_log_dirs: &[Utf8PathBuf], ) -> Result, BundleError> { // Snapshot the root filesystem. - let dataset = Zfs::get_dataset_name(zone.root().as_str())?; + let dataset = Zfs::get_dataset_name(zone.root().as_str()).await?; let root_snapshot = - create_snapshot(log, &dataset, ZONE_ROOT_SNAPSHOT_NAME)?; + create_snapshot(log, &dataset, ZONE_ROOT_SNAPSHOT_NAME).await?; let mut snapshots = vec![root_snapshot]; // Look at all the provided extra log directories, and take a snapshot for @@ -711,24 +716,24 @@ fn create_zfs_snapshots( match std::fs::metadata(&zone_dir) { Ok(d) => { if d.is_dir() { - let dataset = match Zfs::get_dataset_name(zone_dir.as_str()) - { - Ok(ds) => Utf8PathBuf::from(ds), - Err(e) => { - error!( - log, - "failed to list datasets, will \ - unwind any previously created snapshots"; - "error" => ?e, - ); - assert!( - maybe_err - .replace(BundleError::from(e)) - .is_none() - ); - break; - } - }; + let dataset = + match Zfs::get_dataset_name(zone_dir.as_str()).await { + Ok(ds) => Utf8PathBuf::from(ds), + Err(e) => { + error!( + log, + "failed to list datasets, will \ + unwind any previously created snapshots"; + "error" => ?e, + ); + assert!( + maybe_err + .replace(BundleError::from(e)) + .is_none() + ); + break; + } + }; // These datasets are named like `/...`. Since // we're snapshotting zero or more of them, we disambiguate @@ -739,7 +744,9 @@ fn create_zfs_snapshots( .expect("Zone archive datasets must be non-empty"); let snap_name = format!("{}{}", ARCHIVE_SNAPSHOT_PREFIX, pool_name); - match create_snapshot(log, dataset.as_str(), &snap_name) { + match create_snapshot(log, dataset.as_str(), &snap_name) + .await + { Ok(snapshot) => snapshots.push(snapshot), Err(e) => { error!( @@ -772,16 +779,18 @@ fn create_zfs_snapshots( } } if let Some(err) = maybe_err { - cleanup_zfs_snapshots(log, &snapshots); + cleanup_zfs_snapshots(log, &snapshots).await; return Err(err); }; Ok(snapshots) } // Destroy any created ZFS snapshots. -fn cleanup_zfs_snapshots(log: &Logger, snapshots: &[Snapshot]) { +async fn cleanup_zfs_snapshots(log: &Logger, snapshots: &[Snapshot]) { for snapshot in snapshots.iter() { - match Zfs::destroy_snapshot(&snapshot.filesystem, &snapshot.snap_name) { + match Zfs::destroy_snapshot(&snapshot.filesystem, &snapshot.snap_name) + .await + { Ok(_) => debug!( log, "destroyed zone bundle ZFS snapshot"; @@ -820,7 +829,7 @@ async fn find_service_log_files( // additional directories, most notably `/root`. So the _cloned_ // log file will live at // `//crypt/zone/.zfs/snapshot//root/var/svc/log/...`. - let mut current_log_file = snapshots[0].full_path()?; + let mut current_log_file = snapshots[0].full_path().await?; current_log_file.push(RunningZone::ROOT_FS_PATH); current_log_file.push(svc.log_file.as_str().trim_start_matches('/')); let log_dir = @@ -852,18 +861,17 @@ async fn find_service_log_files( // archiving to one location, but move to another if a quota is hit. We'll // iterate over all the extra log directories and try to find any log files // in those filesystem snapshots. - let snapped_extra_log_dirs = snapshots - .iter() - .skip(1) - .flat_map(|snapshot| { - extra_log_dirs.iter().map(|d| { - // Join the snapshot path with both the log directory and the - // zone name, to arrive at something like: - // /path/to/dataset/.zfs/snapshot//path/to/extra/ - snapshot.full_path().map(|p| p.join(d).join(zone_name)) - }) - }) - .collect::, _>>()?; + let mut snapped_extra_log_dirs = BTreeSet::new(); + + for snapshot in snapshots.iter().skip(1) { + for d in extra_log_dirs { + // Join the snapshot path with both the log directory and the + // zone name, to arrive at something like: + // /path/to/dataset/.zfs/snapshot//path/to/extra/ + let p = snapshot.full_path().await?; + snapped_extra_log_dirs.insert(p.join(d).join(zone_name)); + } + } debug!( log, "looking for extra log files in filesystem snapshots"; @@ -1032,7 +1040,7 @@ async fn create( // filesystems, and insert those (now-static) files into the zone-bundle // tarballs. let snapshots = - match create_zfs_snapshots(log, zone, &context.extra_log_dirs) { + match create_zfs_snapshots(log, zone, &context.extra_log_dirs).await { Ok(snapshots) => snapshots, Err(e) => { error!( @@ -1106,7 +1114,7 @@ async fn create( "zone" => zone.name(), "error" => ?e, ); - cleanup_zfs_snapshots(&log, &snapshots); + cleanup_zfs_snapshots(&log, &snapshots).await; return Err(e); } }; @@ -1137,7 +1145,7 @@ async fn create( // Finish writing out the tarball itself. if let Err(e) = builder.into_inner().context("Failed to build bundle") { - cleanup_zfs_snapshots(&log, &snapshots); + cleanup_zfs_snapshots(&log, &snapshots).await; return Err(BundleError::from(e)); } @@ -1160,7 +1168,7 @@ async fn create( break; } } - cleanup_zfs_snapshots(&log, &snapshots); + cleanup_zfs_snapshots(&log, &snapshots).await; if let Some(err) = copy_err { return Err(err); } diff --git a/sled-diagnostics/src/logs.rs b/sled-diagnostics/src/logs.rs index d046cb66a8f..a4c47d98c7d 100644 --- a/sled-diagnostics/src/logs.rs +++ b/sled-diagnostics/src/logs.rs @@ -75,7 +75,10 @@ struct DiagnosticsSnapshot { impl DiagnosticsSnapshot { /// Create a snapshot for a ZFS filesystem - fn create(logger: &Logger, filesystem: &str) -> Result { + async fn create( + logger: &Logger, + filesystem: &str, + ) -> Result { let snap_name = format!( "{SLED_DIAGNOSTICS_SNAPSHOT_PREFIX}{}", thread_rng() @@ -89,7 +92,8 @@ impl DiagnosticsSnapshot { filesystem, &snap_name, diagnostics_zfs_properties(), - )?; + ) + .await?; let snapshot = Snapshot { filesystem: filesystem.to_string(), snap_name }; @@ -174,24 +178,32 @@ impl DiagnosticsSnapshot { impl Drop for DiagnosticsSnapshot { fn drop(&mut self) { - let _ = Zfs::destroy_snapshot( - &self.snapshot.filesystem, - &self.snapshot.snap_name, - ) - .inspect(|_| { - debug!( - self.log, - "destroyed sled-diagnostics ZFS snapshot"; - "snapshot" => %self.snapshot - ); - }) - .inspect_err(|e| { - error!( - self.log, - "failed to destroy sled-diagnostics ZFS snapshot"; - "snapshot" => %self.snapshot, - "error" => ?e, - ); + let DiagnosticsSnapshot { log, snapshot, .. } = self; + tokio::task::spawn({ + let log = log.clone(); + let snapshot = snapshot.clone(); + async move { + let _ = Zfs::destroy_snapshot( + &snapshot.filesystem, + &snapshot.snap_name, + ) + .await + .inspect(|_| { + debug!( + log, + "destroyed sled-diagnostics ZFS snapshot"; + "snapshot" => %snapshot + ); + }) + .inspect_err(|e| { + error!( + log, + "failed to destroy sled-diagnostics ZFS snapshot"; + "snapshot" => %snapshot, + "error" => ?e, + ); + }); + } }); } } @@ -210,16 +222,17 @@ impl LogSnapshots { /// For a given log file return the corresponding `DiagnosticsSnapshot` or /// create a new one if we have not yet created one for the underlying ZFS /// dataset backing this particular file. - fn get_or_create( + async fn get_or_create( &mut self, logger: &Logger, logfile: &Utf8Path, ) -> Result<&DiagnosticsSnapshot, LogError> { - let dataset = Zfs::get_dataset_name(logfile.as_str())?; + let dataset = Zfs::get_dataset_name(logfile.as_str()).await?; let snapshot = match self.inner.entry(dataset.clone()) { Entry::Occupied(occupied_entry) => occupied_entry.into_mut(), - Entry::Vacant(vacant_entry) => vacant_entry - .insert(DiagnosticsSnapshot::create(logger, dataset.as_str())?), + Entry::Vacant(vacant_entry) => vacant_entry.insert( + DiagnosticsSnapshot::create(logger, dataset.as_str()).await?, + ), }; Ok(snapshot) @@ -268,8 +281,8 @@ impl LogsHandle { /// /// NB: This is a best-effort attempt, any failure to list or delete /// snapshots will be logged. - pub fn cleanup_snapshots(&self) { - let snapshot_list = match Zfs::list_snapshots() { + pub async fn cleanup_snapshots(&self) { + let snapshot_list = match Zfs::list_snapshots().await { Ok(snapshots) => snapshots, Err(e) => { error!( @@ -282,9 +295,10 @@ impl LogsHandle { } }; - let diagnostic_snapshots = snapshot_list.into_iter().filter(|snap| { + let mut diagnostic_snapshots = Vec::new(); + for snap in snapshot_list { if !snap.snap_name.starts_with(SLED_DIAGNOSTICS_SNAPSHOT_PREFIX) { - return false; + continue; } // Additionally check for the sled-diagnostics property. @@ -297,7 +311,9 @@ impl LogsHandle { &name, &[SLED_DIAGNOSTICS_ZFS_PROPERTY_NAME], Some(illumos_utils::zfs::PropertySource::Local), - ) { + ) + .await + { Ok([value]) => value, Err(e) => { error!( @@ -309,7 +325,7 @@ impl LogsHandle { "snap_name" => &name, "property" => SLED_DIAGNOSTICS_ZFS_PROPERTY_VALUE ); - return false; + continue; } }; @@ -323,16 +339,18 @@ impl LogsHandle { "property" => SLED_DIAGNOSTICS_ZFS_PROPERTY_NAME, "property_value" => value, ); - return false; + continue; } - true - }); + diagnostic_snapshots.push(snap); + } for snapshot in diagnostic_snapshots { match Zfs::destroy_snapshot( &snapshot.filesystem, &snapshot.snap_name, - ) { + ) + .await + { Ok(_) => debug!( self.log, "destroyed pre-existing sled-diagnostics snapshot"; @@ -354,13 +372,13 @@ impl LogsHandle { .map_err(|e| LogError::OxLog(e)) } - fn find_log_in_snapshot( + async fn find_log_in_snapshot( &self, log_snapshots: &mut LogSnapshots, logfile: &Utf8Path, ) -> Result { let diagnostics_snapshot = - log_snapshots.get_or_create(&self.log, logfile)?; + log_snapshots.get_or_create(&self.log, logfile).await?; trace!( &self.log, @@ -399,7 +417,7 @@ impl LogsHandle { /// variants. /// - Write the logs contents into the provided zip file based on its zone, /// and service. - fn process_logs( + async fn process_logs( &self, service: &str, zip: &mut zip::ZipWriter, @@ -408,7 +426,7 @@ impl LogsHandle { logtype: LogType, ) -> Result<(), LogError> { let snapshot_logfile = - self.find_log_in_snapshot(log_snapshots, logfile)?; + self.find_log_in_snapshot(log_snapshots, logfile).await?; if logtype == LogType::Current { // Since we are processing the current log files in a zone we need @@ -497,7 +515,7 @@ impl LogsHandle { /// /// Note that this log retrieval will automatically take and cleanup /// necessary zfs snapshots along the way. - pub fn get_zone_logs( + pub async fn get_zone_logs( &self, zone: &str, max_rotated: usize, @@ -536,7 +554,8 @@ impl LogsHandle { &mut log_snapshots, ¤t.path, LogType::Current, - )?; + ) + .await?; } // - Grab all of the service's archived logs - @@ -568,7 +587,8 @@ impl LogsHandle { &mut log_snapshots, &file, LogType::Archive, - )?; + ) + .await?; } // - Grab all of the service's extra logs - @@ -587,7 +607,8 @@ impl LogsHandle { &mut log_snapshots, log, LogType::Extra, - )?; + ) + .await?; } // We clamp the number of rotated logs we grab to 5. @@ -599,7 +620,8 @@ impl LogsHandle { &mut log_snapshots, log, LogType::Extra, - )?; + ) + .await?; } } } @@ -894,7 +916,7 @@ mod illumos_tests { } /// Find all sled-diagnostics created snapshots - fn get_sled_diagnostics_snapshots(filesystem: &str) -> Vec { + async fn get_sled_diagnostics_snapshots(filesystem: &str) -> Vec { list_snapshots(filesystem) .into_iter() .filter(|snap| { @@ -908,6 +930,7 @@ mod illumos_tests { &[SLED_DIAGNOSTICS_ZFS_PROPERTY_NAME], Some(illumos_utils::zfs::PropertySource::Local), ) + .await .unwrap() == [SLED_DIAGNOSTICS_ZFS_PROPERTY_VALUE] { @@ -943,19 +966,22 @@ mod illumos_tests { // Create a new snapshot log_snapshots.get_or_create(&log, &mountpoint).unwrap(); - let snapshots = get_sled_diagnostics_snapshots(zfs_filesystem); + let snapshots = + get_sled_diagnostics_snapshots(zfs_filesystem).await; assert_eq!(snapshots.len(), 1, "single snapshot created"); // Creating a second snapshot from the same dataset doesn't create a // new snapshot log_snapshots.get_or_create(&log, &mountpoint).unwrap(); - let snapshots = get_sled_diagnostics_snapshots(zfs_filesystem); + let snapshots = + get_sled_diagnostics_snapshots(zfs_filesystem).await; assert_eq!(snapshots.len(), 1, "duplicate snapshots not taken"); // Free all of the log_snapshots drop(log_snapshots); - let snapshots = get_sled_diagnostics_snapshots(zfs_filesystem); + let snapshots = + get_sled_diagnostics_snapshots(zfs_filesystem).await; assert!(snapshots.is_empty(), "no snapshots left behind"); // Simulate a crash leaving behind stale snapshots @@ -965,13 +991,15 @@ mod illumos_tests { // Don't run the drop handler for any log_snapshots std::mem::forget(log_snapshots); - let snapshots = get_sled_diagnostics_snapshots(zfs_filesystem); + let snapshots = + get_sled_diagnostics_snapshots(zfs_filesystem).await; assert_eq!(snapshots.len(), 1, "single snapshot created"); let handle = LogsHandle::new(log.clone()); handle.cleanup_snapshots(); - let snapshots = get_sled_diagnostics_snapshots(zfs_filesystem); + let snapshots = + get_sled_diagnostics_snapshots(zfs_filesystem).await; assert!(snapshots.is_empty(), "all stale snapshots cleaned up"); } @@ -1046,6 +1074,7 @@ mod illumos_tests { .join(format!("var/svc/log/{}", logfile_to_data[0].0)), LogType::Current, ) + .await .unwrap(); zip.finish().unwrap(); @@ -1182,6 +1211,7 @@ mod illumos_tests { let snapshot_dendrite_log = loghandle .find_log_in_snapshot(&mut log_snapshots, &logfile) + .await .unwrap(); assert!( diff --git a/sled-hardware/src/disk.rs b/sled-hardware/src/disk.rs index 586a25dc4e7..a2c28f8e8fe 100644 --- a/sled-hardware/src/disk.rs +++ b/sled-hardware/src/disk.rs @@ -286,7 +286,7 @@ pub struct PooledDisk { impl PooledDisk { /// Create a new PooledDisk - pub fn new( + pub async fn new( log: &Logger, unparsed_disk: UnparsedDisk, zpool_id: Option, @@ -310,9 +310,9 @@ impl PooledDisk { )?; let zpool_name = - ensure_zpool_exists(log, variant, &zpool_path, zpool_id)?; - ensure_zpool_imported(log, &zpool_name)?; - ensure_zpool_failmode_is_continue(log, &zpool_name)?; + ensure_zpool_exists(log, variant, &zpool_path, zpool_id).await?; + ensure_zpool_imported(log, &zpool_name).await?; + ensure_zpool_failmode_is_continue(log, &zpool_name).await?; Ok(Self { paths: unparsed_disk.paths, @@ -329,23 +329,23 @@ impl PooledDisk { /// Checks if the zpool exists, but makes no modifications, /// and does not attempt to import the zpool. -pub fn check_if_zpool_exists( +pub async fn check_if_zpool_exists( zpool_path: &Utf8Path, ) -> Result { - let zpool_name = match Fstyp::get_zpool(&zpool_path) { + let zpool_name = match Fstyp::get_zpool(&zpool_path).await { Ok(zpool_name) => zpool_name, Err(_) => return Err(PooledDiskError::ZpoolDoesNotExist), }; Ok(zpool_name) } -pub fn ensure_zpool_exists( +pub async fn ensure_zpool_exists( log: &Logger, variant: DiskVariant, zpool_path: &Utf8Path, zpool_id: Option, ) -> Result { - let zpool_name = match Fstyp::get_zpool(&zpool_path) { + let zpool_name = match Fstyp::get_zpool(&zpool_path).await { Ok(zpool_name) => { if let Some(expected) = zpool_id { info!(log, "Checking that UUID in storage matches request"; "expected" => ?expected); @@ -393,11 +393,11 @@ pub fn ensure_zpool_exists( DiskVariant::M2 => ZpoolName::new_internal(id), DiskVariant::U2 => ZpoolName::new_external(id), }; - Zpool::real_api().create(&zpool_name, &zpool_path)?; + Zpool::real_api().create(&zpool_name, &zpool_path).await?; zpool_name } }; - Zpool::import(&zpool_name).map_err(|e| { + Zpool::import(&zpool_name).await.map_err(|e| { warn!(log, "Failed to import zpool {zpool_name}: {e}"); PooledDiskError::ZpoolImport(e) })?; @@ -405,18 +405,18 @@ pub fn ensure_zpool_exists( Ok(zpool_name) } -pub fn ensure_zpool_imported( +pub async fn ensure_zpool_imported( log: &Logger, zpool_name: &ZpoolName, ) -> Result<(), PooledDiskError> { - Zpool::import(&zpool_name).map_err(|e| { + Zpool::import(&zpool_name).await.map_err(|e| { warn!(log, "Failed to import zpool {zpool_name}: {e}"); PooledDiskError::ZpoolImport(e) })?; Ok(()) } -pub fn ensure_zpool_failmode_is_continue( +pub async fn ensure_zpool_failmode_is_continue( log: &Logger, zpool_name: &ZpoolName, ) -> Result<(), PooledDiskError> { @@ -427,7 +427,7 @@ pub fn ensure_zpool_failmode_is_continue( // actively harmful to try to wait for it to come back; we'll be waiting // forever and get stuck. We'd rather get the errors so we can deal with // them ourselves. - Zpool::set_failmode_continue(&zpool_name).map_err(|e| { + Zpool::set_failmode_continue(&zpool_name).await.map_err(|e| { warn!( log, "Failed to set failmode=continue on zpool {zpool_name}: {e}" diff --git a/sled-hardware/src/underlay.rs b/sled-hardware/src/underlay.rs index d11db2bf51c..8b3d375176f 100644 --- a/sled-hardware/src/underlay.rs +++ b/sled-hardware/src/underlay.rs @@ -58,7 +58,7 @@ pub async fn find_nics( } } - ensure_links_have_global_zone_link_local_v6_addresses(&underlay_nics) + ensure_links_have_global_zone_link_local_v6_addresses(&underlay_nics).await } /// Return the Chelsio links on the system. @@ -86,14 +86,14 @@ pub async fn find_chelsio_links( /// Ensure each of the `PhysicalLink`s has a link local IPv6 address in the /// global zone. -pub fn ensure_links_have_global_zone_link_local_v6_addresses( +pub async fn ensure_links_have_global_zone_link_local_v6_addresses( links: &[PhysicalLink], ) -> Result, Error> { let mut addr_objs = Vec::with_capacity(links.len()); for link in links { let addrobj = AddrObject::link_local(&link.0)?; - Zones::ensure_has_link_local_v6_address(None, &addrobj)?; + Zones::ensure_has_link_local_v6_address(None, &addrobj).await?; addr_objs.push(addrobj); } diff --git a/sled-storage/src/dataset.rs b/sled-storage/src/dataset.rs index a1c026201f4..600f7e61b0f 100644 --- a/sled-storage/src/dataset.rs +++ b/sled-storage/src/dataset.rs @@ -216,7 +216,7 @@ pub(crate) async fn ensure_zpool_has_datasets( illumos_utils::zfs::Keypath::new(disk_identity, &mount_config.root); let epoch = if let Ok(epoch_str) = - Zfs::get_oxide_value(dataset, "epoch") + Zfs::get_oxide_value(dataset, "epoch").await { if let Ok(epoch) = epoch_str.parse::() { epoch @@ -274,6 +274,7 @@ pub(crate) async fn ensure_zpool_has_datasets( id: None, additional_options: None, }) + .await .inspect_err(|err| { warn!( log, @@ -307,13 +308,13 @@ pub(crate) async fn ensure_zpool_has_datasets( }); if dataset.wipe { - match Zfs::get_oxide_value(name, "agent") { + match Zfs::get_oxide_value(name, "agent").await { Ok(v) if &v == agent_local_value => { info!(log, "Skipping automatic wipe for dataset: {}", name); } Ok(_) | Err(_) => { info!(log, "Automatically destroying dataset: {}", name); - Zfs::destroy_dataset(name).or_else(|err| { + Zfs::destroy_dataset(name).await.or_else(|err| { // If we can't find the dataset, that's fine -- it might // not have been formatted yet. if matches!( @@ -345,6 +346,7 @@ pub(crate) async fn ensure_zpool_has_datasets( id: None, additional_options: None, }) + .await .inspect_err(|err| { warn!( log, @@ -355,12 +357,12 @@ pub(crate) async fn ensure_zpool_has_datasets( })?; if dataset.wipe { - Zfs::set_oxide_value(name, "agent", agent_local_value).map_err( - |err| DatasetError::CannotSetAgentProperty { + Zfs::set_oxide_value(name, "agent", agent_local_value) + .await + .map_err(|err| DatasetError::CannotSetAgentProperty { dataset: name.clone(), err: Box::new(err), - }, - )?; + })?; } } info!(log, "Finished ensuring zpool has datasets"; "zpool" => ?zpool_name, "disk_identity" => ?disk_identity); diff --git a/sled-storage/src/disk.rs b/sled-storage/src/disk.rs index 6391152c198..66569f339b7 100644 --- a/sled-storage/src/disk.rs +++ b/sled-storage/src/disk.rs @@ -50,7 +50,7 @@ const SYNTHETIC_FIRMWARE_SLOT1: &str = "SYNTH1"; impl SyntheticDisk { // "Manages" a SyntheticDisk by ensuring that it has a Zpool and importing // it. If the zpool already exists, it is imported, but not re-created. - pub fn new( + pub async fn new( log: &Logger, mount_config: &MountConfig, raw: RawSyntheticDisk, @@ -75,12 +75,16 @@ impl SyntheticDisk { &path, zpool_id, ) + .await .unwrap(); - sled_hardware::disk::ensure_zpool_imported(log, &zpool_name).unwrap(); + sled_hardware::disk::ensure_zpool_imported(log, &zpool_name) + .await + .unwrap(); sled_hardware::disk::ensure_zpool_failmode_is_continue( log, &zpool_name, ) + .await .unwrap(); Self { raw, zpool_name } @@ -267,13 +271,12 @@ impl Disk { key_requester: Option<&StorageKeyRequester>, ) -> Result { let disk: Disk = match raw_disk { - RawDisk::Real(disk) => PooledDisk::new(log, disk, pool_id)?.into(), - RawDisk::Synthetic(disk) => Disk::Synthetic(SyntheticDisk::new( - log, - mount_config, - disk, - pool_id, - )), + RawDisk::Real(disk) => { + PooledDisk::new(log, disk, pool_id).await?.into() + } + RawDisk::Synthetic(disk) => Disk::Synthetic( + SyntheticDisk::new(log, mount_config, disk, pool_id).await, + ), }; dataset::ensure_zpool_has_datasets( log, diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs index aff69c4eb58..b2bb40f7b94 100644 --- a/sled-storage/src/manager.rs +++ b/sled-storage/src/manager.rs @@ -173,7 +173,8 @@ impl NestedDatasetLocation { Zfs::ensure_dataset_mounted_and_exists( &self.full_name(), &Mountpoint(mountpoint.clone()), - )?; + ) + .await?; return Ok(mountpoint); } @@ -995,6 +996,7 @@ impl StorageManager { .as_slice(), WhichDatasets::SelfOnly, ) + .await .unwrap_or_default() .into_iter() .map(|props| (props.name.clone(), props)) @@ -1178,6 +1180,7 @@ impl StorageManager { datasets_of_interest.as_slice(), WhichDatasets::SelfAndChildren, ) + .await .map_err(Error::Other) } @@ -1230,7 +1233,7 @@ impl StorageManager { return Err(anyhow!(msg).into()); } - Zfs::destroy_dataset(&full_name).map_err(|e| anyhow!(e))?; + Zfs::destroy_dataset(&full_name).await.map_err(|e| anyhow!(e))?; Ok(()) } @@ -1250,6 +1253,7 @@ impl StorageManager { &[full_name], WhichDatasets::SelfAndChildren, ) + .await .map_err(|e| { warn!( log, @@ -1531,7 +1535,8 @@ impl StorageManager { size_details, id: dataset_id, additional_options: None, - })?; + }) + .await?; Ok(()) } @@ -1567,7 +1572,8 @@ impl StorageManager { size_details, id: request.dataset_id, additional_options: None, - })?; + }) + .await?; Ok(()) } diff --git a/sled-storage/src/manager_test_harness.rs b/sled-storage/src/manager_test_harness.rs index 440427a6ce9..8826031c2aa 100644 --- a/sled-storage/src/manager_test_harness.rs +++ b/sled-storage/src/manager_test_harness.rs @@ -425,7 +425,7 @@ impl StorageManagerTestHarness { for (pool, _) in pools { eprintln!("Destroying pool: {pool:?}"); - if let Err(e) = illumos_utils::zpool::Zpool::destroy(&pool) { + if let Err(e) = illumos_utils::zpool::Zpool::destroy(&pool).await { eprintln!("Failed to destroy {pool:?}: {e:?}"); } } diff --git a/sled-storage/src/pool.rs b/sled-storage/src/pool.rs index 13ffe48e452..be5376aceef 100644 --- a/sled-storage/src/pool.rs +++ b/sled-storage/src/pool.rs @@ -21,8 +21,11 @@ impl Pool { /// Queries for an existing Zpool by name. /// /// Returns Ok if the pool exists. - pub fn new(name: ZpoolName, parent: DiskIdentity) -> Result { - let info = Zpool::get_info(&name.to_string())?; + pub async fn new( + name: ZpoolName, + parent: DiskIdentity, + ) -> Result { + let info = Zpool::get_info(&name.to_string()).await?; Ok(Pool { name, info, parent }) } diff --git a/zone-setup/src/bin/zone-setup.rs b/zone-setup/src/bin/zone-setup.rs index d212f7cc7ca..f982f3c7afd 100644 --- a/zone-setup/src/bin/zone-setup.rs +++ b/zone-setup/src/bin/zone-setup.rs @@ -249,6 +249,7 @@ async fn switch_zone_setup( format!("invalid link name for addrobj: {link:?}") })?; Zones::ensure_has_link_local_v6_address(None, &addrobj) + .await .with_context(|| { format!("Could not ensure link local link {link:?}") })?; @@ -269,6 +270,7 @@ async fn switch_zone_setup( })?; let _ = Zones::create_address_internal(None, &addrobj, addrtype) + .await .with_context(|| { format!("Could not create bootstrap address {addrobj} {addrtype:?}") })?; @@ -283,6 +285,7 @@ async fn switch_zone_setup( gz_local_link_addr, &bootstrap_vnic, ) + .await .with_context(|| { format!( "Could not add bootstrap route via {bootstrap_vnic} \ @@ -342,6 +345,7 @@ async fn chrony_setup( "logadm config" => ?LOGADM_CONFIG_FILE, ); Svcadm::refresh_logadm_upgrade() + .await .context("failed to refresh logadm-upgrade")?; Ok(()) @@ -620,7 +624,7 @@ async fn common_nw_set_up( "Ensuring IP interface exists on datalink"; "datalink" => &datalink ); - Ipadm::ensure_ip_interface_exists(&datalink).with_context(|| { + Ipadm::ensure_ip_interface_exists(&datalink).await.with_context(|| { format!( "failed to ensure temporary IP interface on datalink {datalink}", ) @@ -631,17 +635,19 @@ async fn common_nw_set_up( "datalink" => &datalink, ); Ipadm::set_interface_mtu(&datalink) + .await .with_context(|| format!("failed to set MTU on datalink {datalink}"))?; info!( log, "Setting TCP recv_buf size to 1 MB"; ); - Ipadm::set_tcp_recv_buf().context("failed to set TCP recv_buf")?; + Ipadm::set_tcp_recv_buf().await.context("failed to set TCP recv_buf")?; info!( log, "Setting TCP congestion control algorithm to cubic"; ); Ipadm::set_tcp_congestion_control() + .await .context("failed to set TCP congestion_control")?; if static_addrs.is_empty() { @@ -660,6 +666,7 @@ async fn common_nw_set_up( "data link" => ?datalink, "static address" => %addr); Ipadm::create_static_and_autoconfigured_addrs(&datalink, &addr) + .await .with_context(|| { format!( "failed to ensure static address {addr} on \ @@ -736,8 +743,8 @@ async fn ensure_default_route_via_gateway_with_retries( log, "Ensuring there is a default route"; "gateway" => ?gateway, ); - Route::ensure_default_route_with_gateway(gateway).map_err(|err| { - match err { + Route::ensure_default_route_with_gateway(gateway).await.map_err( + |err| match err { ExecutionError::CommandFailure(ref e) => { if e.stdout.contains("Network is unreachable") { BackoffError::transient(err_with_context(err)) @@ -746,8 +753,8 @@ async fn ensure_default_route_via_gateway_with_retries( } } _ => BackoffError::permanent(err_with_context(err)), - } - }) + }, + ) }, |err, delay| { info!( @@ -771,7 +778,7 @@ async fn opte_interface_set_up( "Creating gateway on the OPTE IP interface if it doesn't already exist"; "OPTE interface" => ?interface ); - Ipadm::create_opte_gateway(&interface).with_context(|| { + Ipadm::create_opte_gateway(&interface).await.with_context(|| { format!("failed to create OPTE gateway on interface {interface}") })?; @@ -781,18 +788,21 @@ async fn opte_interface_set_up( "OPTE interface" => ?interface, "OPTE IP" => ?ip, ); - Route::ensure_opte_route(&gateway, &interface, &ip).with_context(|| { - format!( - "failed to ensure OPTE gateway route on interface {interface} \ + Route::ensure_opte_route(&gateway, &interface, &ip).await.with_context( + || { + format!( + "failed to ensure OPTE gateway route on interface {interface} \ with gateway {gateway} and IP {ip}", - ) - })?; + ) + }, + )?; info!( log, "Ensuring there is a default route"; "gateway" => ?gateway, ); Route::ensure_default_route_with_gateway(Gateway::Ipv4(gateway)) + .await .with_context(|| { format!("failed to ensure default route via gateway {gateway}") })?; From e546327e11fdae66ea213272061b136c42b22b6f Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 12 May 2025 17:12:02 -0700 Subject: [PATCH 3/5] illumos --- illumos-utils/src/zfs.rs | 21 +++--- illumos-utils/src/zone.rs | 6 +- .../src/dataset_serialization_task.rs | 20 +++--- sled-agent/src/bin/zone-bundle.rs | 2 +- sled-agent/src/instance.rs | 14 ++-- sled-agent/src/services.rs | 3 +- sled-agent/src/zone_bundle.rs | 3 +- sled-diagnostics/src/logs.rs | 55 ++++++++-------- sled-hardware/src/disk.rs | 3 +- sled-hardware/src/illumos/partitions.rs | 65 ++++++++++++------- sled-hardware/src/non_illumos/mod.rs | 2 +- sled-storage/src/manager.rs | 4 ++ 12 files changed, 119 insertions(+), 79 deletions(-) diff --git a/illumos-utils/src/zfs.rs b/illumos-utils/src/zfs.rs index e2c0f87a563..1795f5846bc 100644 --- a/illumos-utils/src/zfs.rs +++ b/illumos-utils/src/zfs.rs @@ -1418,25 +1418,25 @@ mod test { use super::*; #[cfg(target_os = "illumos")] - #[test] - fn directory_mutability() { + #[tokio::test] + async fn directory_mutability() { let dir = Utf8TempDir::new_in("/var/tmp").unwrap(); - let immutablity = is_directory_immutable(dir.path()).unwrap(); + let immutablity = is_directory_immutable(dir.path()).await.unwrap(); assert!( matches!(immutablity, Immutability::No), "new directory should be mutable, is: {:?}", immutablity ); - make_directory_immutable(dir.path()).unwrap(); - let immutablity = is_directory_immutable(dir.path()).unwrap(); + make_directory_immutable(dir.path()).await.unwrap(); + let immutablity = is_directory_immutable(dir.path()).await.unwrap(); assert!( matches!(immutablity, Immutability::Yes), "directory should be immutable" ); - make_directory_mutable(dir.path()).unwrap(); - let immutablity = is_directory_immutable(dir.path()).unwrap(); + make_directory_mutable(dir.path()).await.unwrap(); + let immutablity = is_directory_immutable(dir.path()).await.unwrap(); assert!( matches!(immutablity, Immutability::No), "directory should be mutable" @@ -1448,10 +1448,11 @@ mod test { // To minimize test setup, we rely on a zfs dataset named "rpool" existing, // but do not modify it within this test. #[cfg(target_os = "illumos")] - #[test] - fn get_values_of_rpool() { + #[tokio::test] + async fn get_values_of_rpool() { // If the rpool exists, it should have a name. let values = Zfs::get_values("rpool", &["name"; 1], None) + .await .expect("Failed to query rpool type"); assert_eq!(values[0], "rpool"); @@ -1459,12 +1460,14 @@ mod test { // want this to throw an error. let _values = Zfs::get_values("rpool", &["name"; 1], Some(PropertySource::Local)) + .await .expect("Failed to query rpool type"); // Also, the "all" property should not be queryable. It's normally fine // to pass this value, it just returns a variable number of properties, // which doesn't work with the current implementation's parsing. let err = Zfs::get_values("rpool", &["all"; 1], None) + .await .expect_err("Should not be able to query for 'all' property"); assert!( diff --git a/illumos-utils/src/zone.rs b/illumos-utils/src/zone.rs index c66c214a9ef..5dc270aaf20 100644 --- a/illumos-utils/src/zone.rs +++ b/illumos-utils/src/zone.rs @@ -964,11 +964,11 @@ mod tests { // This test validates that we correctly detect an attempt to delete an // address that does not exist and return `Ok(())`. #[cfg(target_os = "illumos")] - #[test] - fn delete_nonexistent_address() { + #[tokio::test] + async fn delete_nonexistent_address() { // We'll pick a name that hopefully no system actually has... let addr = AddrObject::new("nonsense", "shouldnotexist").unwrap(); - match Zones::delete_address(None, &addr) { + match Zones::delete_address(None, &addr).await { Ok(()) => (), Err(err) => panic!( "unexpected error deleting nonexistent address: {}", diff --git a/sled-agent/config-reconciler/src/dataset_serialization_task.rs b/sled-agent/config-reconciler/src/dataset_serialization_task.rs index 982a359fcfb..f578ae83d80 100644 --- a/sled-agent/config-reconciler/src/dataset_serialization_task.rs +++ b/sled-agent/config-reconciler/src/dataset_serialization_task.rs @@ -1927,7 +1927,11 @@ mod illumos_tests { "Attempting automated cleanup of {}", vdev_dir.path(), ); - self.cleanup(); + tokio::task::block_in_place(|| { + tokio::runtime::Handle::current().block_on(async move { + self.cleanup().await; + }); + }); } } } @@ -2024,7 +2028,7 @@ mod illumos_tests { zpool } - fn cleanup(&mut self) { + async fn cleanup(&mut self) { let Some(vdev_dir) = self.vdev_dir.take() else { // Already terminated return; @@ -2034,7 +2038,7 @@ mod illumos_tests { for pool in self.currently_managed_zpools_tx.borrow().iter() { eprintln!("destroying pool: {pool}"); - if let Err(err) = Zpool::destroy(&pool) { + if let Err(err) = Zpool::destroy(&pool).await { eprintln!( "failed to destroy {pool}: {}", InlineErrorChain::new(&err) @@ -2118,7 +2122,7 @@ mod illumos_tests { DatasetState::Ensured ); - harness.cleanup(); + harness.cleanup().await; logctx.cleanup_successful(); } @@ -2202,7 +2206,7 @@ mod illumos_tests { // ... and doing so mounts the dataset again. assert!(is_mounted(name).await); - harness.cleanup(); + harness.cleanup().await; logctx.cleanup_successful(); } @@ -2290,7 +2294,7 @@ mod illumos_tests { "err: {err}" ); - harness.cleanup(); + harness.cleanup().await; logctx.cleanup_successful(); } @@ -2344,7 +2348,7 @@ mod illumos_tests { assert_matches!(result.state, DatasetState::Ensured); } - harness.cleanup(); + harness.cleanup().await; logctx.cleanup_successful(); } @@ -2500,7 +2504,7 @@ mod illumos_tests { .expect("no error listing datasets"); assert_eq!(nested_datasets.len(), 0); - harness.cleanup(); + harness.cleanup().await; logctx.cleanup_successful(); } } diff --git a/sled-agent/src/bin/zone-bundle.rs b/sled-agent/src/bin/zone-bundle.rs index 938202da060..42316b4e705 100644 --- a/sled-agent/src/bin/zone-bundle.rs +++ b/sled-agent/src/bin/zone-bundle.rs @@ -216,7 +216,7 @@ async fn fetch_underlay_address() -> anyhow::Result { use illumos_utils::ipadm::Ipadm; use std::net::IpAddr; const EXPECTED_ADDR_OBJ: &str = "underlay0/sled6"; - match Ipadm::addrobj_addr(EXPECTED_ADDR_OBJ) { + match Ipadm::addrobj_addr(EXPECTED_ADDR_OBJ).await { // If we failed because there was no such interface, then fall back // to localhost. Ok(None) => Ok(Ipv6Addr::LOCALHOST), diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 1e7d9c07ea8..0888a1fadd4 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -2442,7 +2442,8 @@ mod tests { storage_handle, nexus_client, temp_dir, - ); + ) + .await; let metadata = InstanceMetadata { silo_id: Uuid::new_v4(), @@ -2520,7 +2521,7 @@ mod tests { // Helper alias for the receive-side of the metrics request queue. type MetricsRx = mpsc::Receiver; - fn fake_instance_manager_services( + async fn fake_instance_manager_services( log: &Logger, storage_handle: StorageHandle, nexus_client: NexusClient, @@ -2541,7 +2542,8 @@ mod tests { log.new(o!("component" => "ZoneBundler")), storage_handle.clone(), cleanup_context, - ); + ) + .await; let (metrics_queue, rx) = MetricsRequestQueue::for_test(); let services = InstanceManagerServices { @@ -2580,7 +2582,8 @@ mod tests { storage_harness.handle().clone(), nexus.nexus_client.clone(), &temp_guard.path().to_string(), - ); + ) + .await; let InstanceManagerServices { nexus_client, @@ -3143,7 +3146,8 @@ mod tests { storage_harness.handle().clone(), nexus_client, &temp_guard.path().to_string(), - ); + ) + .await; let propolis_id = PropolisUuid::new_v4(); let propolis_addr = SocketAddr::V4(SocketAddrV4::new( Ipv4Addr::new(127, 0, 0, 1), diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index cd8e977dc23..704b6937c5b 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -5366,7 +5366,8 @@ mod illumos_tests { log.clone(), storage_test_harness.handle().clone(), Default::default(), - ); + ) + .await; let mut storage_manager = storage_test_harness.handle().clone(); let all_disks = storage_manager.get_latest_disks().await; diff --git a/sled-agent/src/zone_bundle.rs b/sled-agent/src/zone_bundle.rs index 3430344f0b7..24a13b8b2a2 100644 --- a/sled-agent/src/zone_bundle.rs +++ b/sled-agent/src/zone_bundle.rs @@ -1895,7 +1895,8 @@ mod illumos_tests { log, resource_wrapper.storage_test_harness.handle().clone(), context, - ); + ) + .await; Ok(CleanupTestContext { ctx: Arc::new(Mutex::new(CleanupTestContextInner { resource_wrapper, diff --git a/sled-diagnostics/src/logs.rs b/sled-diagnostics/src/logs.rs index a4c47d98c7d..2f91f4675d2 100644 --- a/sled-diagnostics/src/logs.rs +++ b/sled-diagnostics/src/logs.rs @@ -917,29 +917,29 @@ mod illumos_tests { /// Find all sled-diagnostics created snapshots async fn get_sled_diagnostics_snapshots(filesystem: &str) -> Vec { - list_snapshots(filesystem) - .into_iter() - .filter(|snap| { - if !snap.snap_name.starts_with(SLED_DIAGNOSTICS_SNAPSHOT_PREFIX) - { - return false; - } - let name = snap.to_string(); - if Zfs::get_values( - &name, - &[SLED_DIAGNOSTICS_ZFS_PROPERTY_NAME], - Some(illumos_utils::zfs::PropertySource::Local), - ) - .await - .unwrap() - == [SLED_DIAGNOSTICS_ZFS_PROPERTY_VALUE] - { - return true; - }; + let mut snapshots = Vec::new(); - false - }) - .collect() + for snap in list_snapshots(filesystem).into_iter() { + if !snap.snap_name.starts_with(SLED_DIAGNOSTICS_SNAPSHOT_PREFIX) { + continue; + } + let name = snap.to_string(); + if Zfs::get_values( + &name, + &[SLED_DIAGNOSTICS_ZFS_PROPERTY_NAME], + Some(illumos_utils::zfs::PropertySource::Local), + ) + .await + .unwrap() + == [SLED_DIAGNOSTICS_ZFS_PROPERTY_VALUE] + { + continue; + }; + + snapshots.push(snap); + } + + snapshots } #[tokio::test] @@ -965,14 +965,14 @@ mod illumos_tests { let mut log_snapshots = LogSnapshots::new(); // Create a new snapshot - log_snapshots.get_or_create(&log, &mountpoint).unwrap(); + log_snapshots.get_or_create(&log, &mountpoint).await.unwrap(); let snapshots = get_sled_diagnostics_snapshots(zfs_filesystem).await; assert_eq!(snapshots.len(), 1, "single snapshot created"); // Creating a second snapshot from the same dataset doesn't create a // new snapshot - log_snapshots.get_or_create(&log, &mountpoint).unwrap(); + log_snapshots.get_or_create(&log, &mountpoint).await.unwrap(); let snapshots = get_sled_diagnostics_snapshots(zfs_filesystem).await; assert_eq!(snapshots.len(), 1, "duplicate snapshots not taken"); @@ -986,7 +986,7 @@ mod illumos_tests { // Simulate a crash leaving behind stale snapshots let mut log_snapshots = LogSnapshots::new(); - log_snapshots.get_or_create(&log, &mountpoint).unwrap(); + log_snapshots.get_or_create(&log, &mountpoint).await.unwrap(); // Don't run the drop handler for any log_snapshots std::mem::forget(log_snapshots); @@ -996,7 +996,7 @@ mod illumos_tests { assert_eq!(snapshots.len(), 1, "single snapshot created"); let handle = LogsHandle::new(log.clone()); - handle.cleanup_snapshots(); + handle.cleanup_snapshots().await; let snapshots = get_sled_diagnostics_snapshots(zfs_filesystem).await; @@ -1139,7 +1139,7 @@ mod illumos_tests { let mut log_snapshots = LogSnapshots::new(); // Create a snapshot first - log_snapshots.get_or_create(&log, &logfile).unwrap(); + log_snapshots.get_or_create(&log, &logfile).await.unwrap(); // Change the data on disk by truncating the old file first let mut logfile_handle = @@ -1158,6 +1158,7 @@ mod illumos_tests { &logfile, LogType::Current, ) + .await .unwrap(); zip.finish().unwrap(); diff --git a/sled-hardware/src/disk.rs b/sled-hardware/src/disk.rs index a2c28f8e8fe..c5e7b466e8c 100644 --- a/sled-hardware/src/disk.rs +++ b/sled-hardware/src/disk.rs @@ -297,7 +297,8 @@ impl PooledDisk { // Ensure the GPT has the right format. This does not necessarily // mean that the partitions are populated with the data we need. let partitions = - ensure_partition_layout(&log, &paths, variant, identity, zpool_id)?; + ensure_partition_layout(&log, &paths, variant, identity, zpool_id) + .await?; // Find the path to the zpool which exists on this disk. // diff --git a/sled-hardware/src/illumos/partitions.rs b/sled-hardware/src/illumos/partitions.rs index 9fcba7239d4..74b1646ea84 100644 --- a/sled-hardware/src/illumos/partitions.rs +++ b/sled-hardware/src/illumos/partitions.rs @@ -141,7 +141,7 @@ fn parse_partition_types( /// /// Returns a Vec of partitions on success. The index of the Vec is guaranteed /// to also be the index of the partition. -pub fn ensure_partition_layout( +pub async fn ensure_partition_layout( log: &Logger, paths: &DiskPaths, variant: DiskVariant, @@ -156,11 +156,12 @@ pub fn ensure_partition_layout( identity, zpool_id, ) + .await } // Same as the [ensure_partition_layout], but with generic parameters // for access to external resources. -fn internal_ensure_partition_layout( +async fn internal_ensure_partition_layout( log: &Logger, zpool_api: &dyn illumos_utils::zpool::Api, paths: &DiskPaths, @@ -176,13 +177,19 @@ fn internal_ensure_partition_layout( let devfs_path_str = paths.devfs_path.as_str().to_string(); let log = log.new(slog::o!("path" => devfs_path_str)); - let gpt = match GPT::read(&path) { + // The GPT returned from "GPT::read" is "!Send", so it must not live across + // any await points. + let err = match GPT::read(&path) { Ok(gpt) => { // This should be the common steady-state case info!(log, "Disk already has a GPT"); - gpt + return read_partitions(gpt, &path, variant); } - Err(libefi_illumos::Error::LabelNotFound) => { + Err(err) => err, + }; + + match err { + libefi_illumos::Error::LabelNotFound => { // Fresh U.2 disks are an example of devices where "we don't expect // a GPT to exist". info!(log, "Disk does not have a GPT"); @@ -214,7 +221,7 @@ fn internal_ensure_partition_layout( // If a zpool does not already exist, create one. let zpool_name = ZpoolName::new_external(zpool_id); - zpool_api.create(&zpool_name, dev_path)?; + zpool_api.create(&zpool_name, dev_path).await?; return Ok(vec![Partition::ZfsPool]); } DiskVariant::M2 => { @@ -226,13 +233,20 @@ fn internal_ensure_partition_layout( } } } - Err(err) => { + err => { return Err(PooledDiskError::Gpt { path, error: anyhow::Error::new(err), }); } }; +} + +fn read_partitions( + gpt: GPT, + path: &Utf8Path, + variant: DiskVariant, +) -> Result, PooledDiskError> { let mut partitions: Vec<_> = gpt.partitions(); match variant { DiskVariant::U2 => { @@ -386,8 +400,8 @@ mod test { } } - #[test] - fn ensure_partition_layout_u2_no_format_without_dev_path() { + #[tokio::test] + async fn ensure_partition_layout_u2_no_format_without_dev_path() { let logctx = test_setup_log( "ensure_partition_layout_u2_no_format_without_dev_path", ); @@ -401,7 +415,8 @@ mod test { DiskVariant::U2, &mock_disk_identity(), None, - ); + ) + .await; match result { Err(PooledDiskError::CannotFormatMissingDevPath { .. }) => {} _ => panic!("Should have failed with a missing dev path error"), @@ -410,8 +425,8 @@ mod test { logctx.cleanup_successful(); } - #[test] - fn ensure_partition_layout_u2_format_with_dev_path() { + #[tokio::test] + async fn ensure_partition_layout_u2_format_with_dev_path() { let logctx = test_setup_log("ensure_partition_layout_u2_format_with_dev_path"); let log = &logctx.log; @@ -430,6 +445,7 @@ mod test { &mock_disk_identity(), Some(ZpoolUuid::new_v4()), ) + .await .expect("Should have succeeded partitioning disk"); assert_eq!(partitions.len(), U2_EXPECTED_PARTITION_COUNT); @@ -438,8 +454,8 @@ mod test { logctx.cleanup_successful(); } - #[test] - fn ensure_partition_layout_m2_cannot_format() { + #[tokio::test] + async fn ensure_partition_layout_m2_cannot_format() { let logctx = test_setup_log("ensure_partition_layout_m2_cannot_format"); let log = &logctx.log.clone(); @@ -458,6 +474,7 @@ mod test { &mock_disk_identity(), None, ) + .await .is_err() ); @@ -479,8 +496,8 @@ mod test { } } - #[test] - fn ensure_partition_layout_u2_with_expected_format() { + #[tokio::test] + async fn ensure_partition_layout_u2_with_expected_format() { let logctx = test_setup_log("ensure_partition_layout_u2_with_expected_format"); let log = &logctx.log; @@ -499,6 +516,7 @@ mod test { &mock_disk_identity(), None, ) + .await .expect("Should be able to parse disk"); assert_eq!(partitions.len(), U2_EXPECTED_PARTITION_COUNT); @@ -524,8 +542,8 @@ mod test { } } - #[test] - fn ensure_partition_layout_m2_with_expected_format() { + #[tokio::test] + async fn ensure_partition_layout_m2_with_expected_format() { let logctx = test_setup_log("ensure_partition_layout_m2_with_expected_format"); let log = &logctx.log; @@ -544,6 +562,7 @@ mod test { &mock_disk_identity(), None, ) + .await .expect("Should be able to parse disk"); assert_eq!(partitions.len(), M2_EXPECTED_PARTITION_COUNT); @@ -565,8 +584,8 @@ mod test { } } - #[test] - fn ensure_partition_layout_m2_fails_with_empty_gpt() { + #[tokio::test] + async fn ensure_partition_layout_m2_fails_with_empty_gpt() { let logctx = test_setup_log("ensure_partition_layout_m2_fails_with_empty_gpt"); let log = &logctx.log; @@ -586,6 +605,7 @@ mod test { &mock_disk_identity(), None, ) + .await .expect_err("Should have failed parsing empty GPT"), PooledDiskError::BadPartitionLayout { .. } )); @@ -593,8 +613,8 @@ mod test { logctx.cleanup_successful(); } - #[test] - fn ensure_partition_layout_u2_fails_with_empty_gpt() { + #[tokio::test] + async fn ensure_partition_layout_u2_fails_with_empty_gpt() { let logctx = test_setup_log("ensure_partition_layout_u2_fails_with_empty_gpt"); let log = &logctx.log; @@ -614,6 +634,7 @@ mod test { &mock_disk_identity(), None, ) + .await .expect_err("Should have failed parsing empty GPT"), PooledDiskError::BadPartitionLayout { .. } )); diff --git a/sled-hardware/src/non_illumos/mod.rs b/sled-hardware/src/non_illumos/mod.rs index d03484bbfa1..c54afe87301 100644 --- a/sled-hardware/src/non_illumos/mod.rs +++ b/sled-hardware/src/non_illumos/mod.rs @@ -70,7 +70,7 @@ impl HardwareManager { } } -pub fn ensure_partition_layout( +pub async fn ensure_partition_layout( _log: &Logger, _paths: &DiskPaths, _variant: DiskVariant, diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs index b2bb40f7b94..19e1ecf0f6f 100644 --- a/sled-storage/src/manager.rs +++ b/sled-storage/src/manager.rs @@ -2063,6 +2063,7 @@ mod tests { &[dataset_name.full_name()], WhichDatasets::SelfOnly, ) + .await .unwrap()[0]; assert_eq!(observed_dataset.id, Some(dataset_id)); @@ -2077,6 +2078,7 @@ mod tests { &[dataset_name.full_name()], WhichDatasets::SelfOnly, ) + .await .unwrap()[0]; assert_eq!(observed_dataset.id, Some(dataset_id)); @@ -2114,6 +2116,7 @@ mod tests { &[dataset_name.full_name()], WhichDatasets::SelfOnly, ) + .await .unwrap()[0]; assert_eq!(observed_dataset.id, None); @@ -2128,6 +2131,7 @@ mod tests { &[dataset_name.full_name()], WhichDatasets::SelfOnly, ) + .await .unwrap()[0]; assert_eq!(observed_dataset.id, Some(dataset_id)); From 3739d5e4f01ff80639f38cb4436336ecff76fdd2 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 13 May 2025 13:30:32 -0700 Subject: [PATCH 4/5] Better async drop --- .../src/dataset_serialization_task.rs | 22 ++- sled-diagnostics/src/logs.rs | 132 +++++++++++++----- 2 files changed, 112 insertions(+), 42 deletions(-) diff --git a/sled-agent/config-reconciler/src/dataset_serialization_task.rs b/sled-agent/config-reconciler/src/dataset_serialization_task.rs index f578ae83d80..f406f11a2ba 100644 --- a/sled-agent/config-reconciler/src/dataset_serialization_task.rs +++ b/sled-agent/config-reconciler/src/dataset_serialization_task.rs @@ -1927,6 +1927,12 @@ mod illumos_tests { "Attempting automated cleanup of {}", vdev_dir.path(), ); + + // NOTE: There is an assertion in RealZfsTestHarness::new + // which should make this a safe invocation to call. + // + // Blocking the calling thread like this isn't ideal, but + // the scope is limited to "tests which are failing anyway". tokio::task::block_in_place(|| { tokio::runtime::Handle::current().block_on(async move { self.cleanup().await; @@ -1940,6 +1946,12 @@ mod illumos_tests { pub const DEFAULT_VDEV_SIZE: u64 = 64 * (1 << 20); fn new(log: Logger) -> Self { + assert_eq!( + tokio::runtime::Handle::current().runtime_flavor(), + tokio::runtime::RuntimeFlavor::MultiThread, + "RealZfsTestHarness requires a multi-threaded runtime to ensure deletion on drop" + ); + let vdev_dir = Utf8TempDir::new_in("/var/tmp").expect("created tempdir"); @@ -2078,7 +2090,7 @@ mod illumos_tests { } } - #[tokio::test] + #[tokio::test(flavor = "multi_thread")] async fn ensure_datasets() { let logctx = dev::test_setup_log("ensure_datasets"); let mut harness = RealZfsTestHarness::new(logctx.log.clone()); @@ -2150,7 +2162,7 @@ mod illumos_tests { ); } - #[tokio::test] + #[tokio::test(flavor = "multi_thread")] async fn ensure_datasets_get_mounted() { let logctx = dev::test_setup_log("ensure_datasets_get_mounted"); let mut harness = RealZfsTestHarness::new(logctx.log.clone()); @@ -2210,7 +2222,7 @@ mod illumos_tests { logctx.cleanup_successful(); } - #[tokio::test] + #[tokio::test(flavor = "multi_thread")] async fn ensure_datasets_get_mounted_even_with_data() { let logctx = dev::test_setup_log("ensure_datasets_get_mounted_even_with_data"); @@ -2298,7 +2310,7 @@ mod illumos_tests { logctx.cleanup_successful(); } - #[tokio::test] + #[tokio::test(flavor = "multi_thread")] async fn ensure_many_datasets() { let logctx = dev::test_setup_log("ensure_many_datasets"); let mut harness = RealZfsTestHarness::new(logctx.log.clone()); @@ -2352,7 +2364,7 @@ mod illumos_tests { logctx.cleanup_successful(); } - #[tokio::test] + #[tokio::test(flavor = "multi_thread")] async fn nested_dataset() { let logctx = dev::test_setup_log("nested_dataset"); diff --git a/sled-diagnostics/src/logs.rs b/sled-diagnostics/src/logs.rs index 2f91f4675d2..1aed79fd987 100644 --- a/sled-diagnostics/src/logs.rs +++ b/sled-diagnostics/src/logs.rs @@ -12,11 +12,14 @@ use std::{ use camino::{Utf8Path, Utf8PathBuf}; use fs_err::File; use illumos_utils::zfs::{ - CreateSnapshotError, GetValueError, ListDatasetsError, Snapshot, Zfs, + CreateSnapshotError, DestroySnapshotError, GetValueError, + ListDatasetsError, Snapshot, Zfs, }; use oxlog::LogFile; +use oxlog::SvcLogs; use rand::{Rng, distributions::Alphanumeric, thread_rng}; use slog::Logger; +use std::collections::BTreeMap; use zip::{result::ZipError, write::FullFileOptions}; // The name of the snapshot created from the zone root filesystem. @@ -52,6 +55,9 @@ pub enum LogError { #[error(transparent)] Snapshot(#[from] CreateSnapshotError), + #[error(transparent)] + ZfsDestroySnapshot(#[from] DestroySnapshotError), + #[error(transparent)] ZfsGetValue(#[from] GetValueError), @@ -63,7 +69,7 @@ pub enum LogError { } /// A ZFS snapshot that is taken by the `sled-diagnostics` crate and handles -/// snapshot deletion on `Drop`. +/// snapshot deletion on `destroy` #[derive(Debug)] struct DiagnosticsSnapshot { log: Logger, @@ -71,6 +77,8 @@ struct DiagnosticsSnapshot { snapshot: Snapshot, /// The mountpoint on disk where this snapshot is mounted. snapshot_mountpoint: Utf8PathBuf, + /// Whether or not this snapshot has been destroyed + destroyed: bool, } impl DiagnosticsSnapshot { @@ -109,7 +117,12 @@ impl DiagnosticsSnapshot { logger, &snapshot, )?; - Ok(Self { log: logger.clone(), snapshot, snapshot_mountpoint }) + Ok(Self { + log: logger.clone(), + snapshot, + snapshot_mountpoint, + destroyed: false, + }) } /// Return the full path to the snapshot directory within the filesystem. @@ -174,41 +187,49 @@ impl DiagnosticsSnapshot { .join(format!(".zfs/snapshot/{}", snapshot.snap_name)) }) } + + async fn destroy(&mut self) -> Result<(), LogError> { + if !self.destroyed { + self.destroyed = true; + Zfs::destroy_snapshot( + &self.snapshot.filesystem, + &self.snapshot.snap_name, + ) + .await + .inspect(|_| { + debug!( + self.log, + "destroyed sled-diagnostics ZFS snapshot"; + "snapshot" => %self.snapshot + ); + }) + .inspect_err(|e| { + error!( + self.log, + "failed to destroy sled-diagnostics ZFS snapshot"; + "snapshot" => %self.snapshot, + "error" => ?e, + ); + })?; + } + Ok(()) + } } impl Drop for DiagnosticsSnapshot { fn drop(&mut self) { - let DiagnosticsSnapshot { log, snapshot, .. } = self; - tokio::task::spawn({ - let log = log.clone(); - let snapshot = snapshot.clone(); - async move { - let _ = Zfs::destroy_snapshot( - &snapshot.filesystem, - &snapshot.snap_name, - ) - .await - .inspect(|_| { - debug!( - log, - "destroyed sled-diagnostics ZFS snapshot"; - "snapshot" => %snapshot - ); - }) - .inspect_err(|e| { - error!( - log, - "failed to destroy sled-diagnostics ZFS snapshot"; - "snapshot" => %snapshot, - "error" => ?e, - ); - }); - } - }); + if !self.destroyed { + error!( + self.log, + "Snapshot dropped without being destroyed"; + "filesystem" => self.snapshot.filesystem.clone(), + "snap_name" => self.snapshot.snap_name.clone(), + ); + } } } -/// A utility type that keeps track fo `DiagnosticsSnapshot`s keyed off of a +/// A utility type that keeps track of `DiagnosticsSnapshot`s keyed off of a /// ZFS dataset name. struct LogSnapshots { inner: HashMap, @@ -237,6 +258,12 @@ impl LogSnapshots { Ok(snapshot) } + + async fn destroy(&mut self) { + for (_, mut snap) in self.inner.drain() { + let _ = snap.destroy().await; + } + } } /// The type of log file we are operating on. @@ -515,6 +542,10 @@ impl LogsHandle { /// /// Note that this log retrieval will automatically take and cleanup /// necessary zfs snapshots along the way. + /// + /// NOTE: Cancelling this function may result in leaked log snapshots, + /// which will not be removed until "LogsHandle::cleanup_snapshots" is + /// invoked. pub async fn get_zone_logs( &self, zone: &str, @@ -540,11 +571,37 @@ impl LogsHandle { }, ); - let mut zip = zip::ZipWriter::new(writer); + let zip = zip::ZipWriter::new(writer); - // Hold onto log snapshots so that they can be cleaned up on drop. + // Hold onto log snapshots so that they can be cleaned independent of + // "result". + // + // NOTE: This isn't cancel safe - if we drop this future after creating + // any number of logs, but before calling "log_snapshots.destroy()", + // we'll leak snapshots. let mut log_snapshots = LogSnapshots::new(); + let result = self + .get_zone_logs_inner( + zone_logs, + max_rotated, + zip, + &mut log_snapshots, + ) + .await; + + log_snapshots.destroy().await; + + result + } + + async fn get_zone_logs_inner( + &self, + zone_logs: BTreeMap, + max_rotated: usize, + mut zip: zip::ZipWriter, + mut log_snapshots: &mut LogSnapshots, + ) -> Result<(), LogError> { for (service, service_logs) in zone_logs { // - Grab all of the service's SMF logs - if let Some(current) = service_logs.current { @@ -933,10 +990,8 @@ mod illumos_tests { .unwrap() == [SLED_DIAGNOSTICS_ZFS_PROPERTY_VALUE] { - continue; + snapshots.push(snap); }; - - snapshots.push(snap); } snapshots @@ -978,7 +1033,7 @@ mod illumos_tests { assert_eq!(snapshots.len(), 1, "duplicate snapshots not taken"); // Free all of the log_snapshots - drop(log_snapshots); + log_snapshots.destroy().await; let snapshots = get_sled_diagnostics_snapshots(zfs_filesystem).await; @@ -1097,6 +1152,7 @@ mod illumos_tests { archive.by_name(&format!("mg-ddm/current/{name}")); assert!(file_in_zip.is_err(), "file should not be in zip"); } + log_snapshots.destroy().await; } // Cleanup @@ -1174,6 +1230,7 @@ mod illumos_tests { // Confirm we have the data in the snapshot and not the newly // written data. assert_eq!(contents.as_str(), data1, "log file data matches"); + log_snapshots.destroy().await; } // Cleanup @@ -1226,6 +1283,7 @@ mod illumos_tests { let mut contents = String::new(); snapshot_dendrite_log.read_to_string(&mut contents).await.unwrap(); assert_eq!(contents.as_str(), data, "log file data matches"); + log_snapshots.destroy().await; } // Cleanup From d776aa82391651a344c9b76fdee59a85452a7857 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 14 May 2025 14:40:15 -0700 Subject: [PATCH 5/5] feedback --- illumos-utils/src/zone.rs | 66 +++++++++++++++++++----------------- sled-diagnostics/src/logs.rs | 2 +- 2 files changed, 35 insertions(+), 33 deletions(-) diff --git a/illumos-utils/src/zone.rs b/illumos-utils/src/zone.rs index 5dc270aaf20..6883126203a 100644 --- a/illumos-utils/src/zone.rs +++ b/illumos-utils/src/zone.rs @@ -557,46 +557,48 @@ impl Zones { /// /// This address may be optionally within a zone `zone`. /// If `None` is supplied, the address is queried from the Global Zone. - #[allow(clippy::needless_lifetimes)] - pub async fn ensure_address<'a>( - zone: Option<&'a str>, + pub async fn ensure_address( + zone: Option<&str>, addrobj: &AddrObject, addrtype: AddressRequest, ) -> Result { - #[allow(clippy::redundant_closure_call)] - async |zone, addrobj, addrtype| -> Result { - match Self::get_address_impl(zone, addrobj).await { - Ok(addr) => { - if let AddressRequest::Static(expected_addr) = addrtype { - // If the address is static, we need to validate that it - // matches the value we asked for. - if addr != expected_addr { - // If the address doesn't match, try removing the old - // value before using the new one. - Self::delete_address(zone, addrobj) - .await - .map_err(|e| anyhow!(e))?; - return Self::create_address( - zone, addrobj, addrtype, - ) + Self::ensure_address_inner(zone, addrobj, addrtype).await.map_err( + |err| EnsureAddressError { + zone: zone.unwrap_or("global").to_string(), + request: addrtype, + name: addrobj.clone(), + err, + }, + ) + } + + async fn ensure_address_inner( + zone: Option<&str>, + addrobj: &AddrObject, + addrtype: AddressRequest, + ) -> anyhow::Result { + match Self::get_address_impl(zone, addrobj).await { + Ok(addr) => { + if let AddressRequest::Static(expected_addr) = addrtype { + // If the address is static, we need to validate that it + // matches the value we asked for. + if addr != expected_addr { + // If the address doesn't match, try removing the old + // value before using the new one. + Self::delete_address(zone, addrobj) + .await + .map_err(|e| anyhow!(e))?; + return Self::create_address(zone, addrobj, addrtype) .await .map_err(|e| anyhow!(e)); - } } - Ok(addr) } - Err(_) => Self::create_address(zone, addrobj, addrtype) - .await - .map_err(|e| anyhow!(e)), + Ok(addr) } - }(zone, addrobj, addrtype) - .await - .map_err(|err| EnsureAddressError { - zone: zone.unwrap_or("global").to_string(), - request: addrtype, - name: addrobj.clone(), - err, - }) + Err(_) => Self::create_address(zone, addrobj, addrtype) + .await + .map_err(|e| anyhow!(e)), + } } /// Gets the IP address of an interface. diff --git a/sled-diagnostics/src/logs.rs b/sled-diagnostics/src/logs.rs index 1aed79fd987..5d165a333c6 100644 --- a/sled-diagnostics/src/logs.rs +++ b/sled-diagnostics/src/logs.rs @@ -190,7 +190,6 @@ impl DiagnosticsSnapshot { async fn destroy(&mut self) -> Result<(), LogError> { if !self.destroyed { - self.destroyed = true; Zfs::destroy_snapshot( &self.snapshot.filesystem, &self.snapshot.snap_name, @@ -211,6 +210,7 @@ impl DiagnosticsSnapshot { "error" => ?e, ); })?; + self.destroyed = true; } Ok(()) }