diff --git a/Cargo.lock b/Cargo.lock index 303cc2cff3a..a6f1e172ce7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11762,6 +11762,7 @@ dependencies = [ "omicron-uuid-kinds", "omicron-workspace-hack", "proptest", + "schemars", "scopeguard", "serde", "serde_json", @@ -11826,6 +11827,7 @@ dependencies = [ "omicron-workspace-hack", "pretty_assertions", "serde_json", + "sled-agent-config-reconciler", "sled-storage", "slog", "slog-error-chain", diff --git a/clients/sled-agent-client/src/lib.rs b/clients/sled-agent-client/src/lib.rs index b27f3048439..5eb8c3ecafe 100644 --- a/clients/sled-agent-client/src/lib.rs +++ b/clients/sled-agent-client/src/lib.rs @@ -67,7 +67,6 @@ progenitor::generate_api!( OmicronPhysicalDiskConfig = omicron_common::disk::OmicronPhysicalDiskConfig, OmicronPhysicalDisksConfig = omicron_common::disk::OmicronPhysicalDisksConfig, OmicronSledConfig = nexus_sled_agent_shared::inventory::OmicronSledConfig, - OmicronSledConfigResult = nexus_sled_agent_shared::inventory::OmicronSledConfigResult, OmicronZoneConfig = nexus_sled_agent_shared::inventory::OmicronZoneConfig, OmicronZoneDataset = nexus_sled_agent_shared::inventory::OmicronZoneDataset, OmicronZoneImageSource = nexus_sled_agent_shared::inventory::OmicronZoneImageSource, diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index bf651cda923..223ca38dde9 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -7351,30 +7351,28 @@ fn inv_collection_print_sleds(collection: &Collection) { "LAST RECONCILED CONFIG", &last_reconciliation.last_reconciled_config, ); - let disk_errs = collect_config_reconciler_errors( - &last_reconciliation.external_disks, - ); - let dataset_errs = collect_config_reconciler_errors( - &last_reconciliation.datasets, - ); - let zone_errs = collect_config_reconciler_errors( - &last_reconciliation.zones, - ); - for (label, errs) in [ - ("disk", disk_errs), - ("dataset", dataset_errs), - ("zone", zone_errs), - ] { - if errs.is_empty() { - println!(" all {label}s reconciled successfully"); - } else { - println!( - " {} {label} reconciliation errors:", - errs.len() - ); - for err in errs { - println!(" {err}"); - } + } + let disk_errs = collect_config_reconciler_errors( + &last_reconciliation.external_disks, + ); + let dataset_errs = + collect_config_reconciler_errors(&last_reconciliation.datasets); + let zone_errs = + collect_config_reconciler_errors(&last_reconciliation.zones); + for (label, errs) in [ + ("disk", disk_errs), + ("dataset", dataset_errs), + ("zone", zone_errs), + ] { + if errs.is_empty() { + println!(" all {label}s reconciled successfully"); + } else { + println!( + " {} {label} reconciliation errors:", + errs.len() + ); + for err in errs { + println!(" {err}"); } } } diff --git a/dev-tools/omdb/src/bin/omdb/sled_agent.rs b/dev-tools/omdb/src/bin/omdb/sled_agent.rs index d02203e052b..fbd6260bd8f 100644 --- a/dev-tools/omdb/src/bin/omdb/sled_agent.rs +++ b/dev-tools/omdb/src/bin/omdb/sled_agent.rs @@ -34,14 +34,6 @@ enum SledAgentCommands { #[clap(subcommand)] Zones(ZoneCommands), - /// print information about zpools - #[clap(subcommand)] - Zpools(ZpoolCommands), - - /// print information about datasets - #[clap(subcommand)] - Datasets(DatasetCommands), - /// print information about the local bootstore node #[clap(subcommand)] Bootstore(BootstoreCommands), @@ -97,12 +89,6 @@ impl SledAgentArgs { SledAgentCommands::Zones(ZoneCommands::List) => { cmd_zones_list(&client).await } - SledAgentCommands::Zpools(ZpoolCommands::List) => { - cmd_zpools_list(&client).await - } - SledAgentCommands::Datasets(DatasetCommands::List) => { - cmd_datasets_list(&client).await - } SledAgentCommands::Bootstore(BootstoreCommands::Status) => { cmd_bootstore_status(&client).await } @@ -129,44 +115,6 @@ async fn cmd_zones_list( Ok(()) } -/// Runs `omdb sled-agent zpools list` -async fn cmd_zpools_list( - client: &sled_agent_client::Client, -) -> Result<(), anyhow::Error> { - let response = client.zpools_get().await.context("listing zpools")?; - let zpools = response.into_inner(); - - println!("zpools:"); - if zpools.is_empty() { - println!(" "); - } - for zpool in &zpools { - println!(" {:?}", zpool); - } - - Ok(()) -} - -/// Runs `omdb sled-agent datasets list` -async fn cmd_datasets_list( - client: &sled_agent_client::Client, -) -> Result<(), anyhow::Error> { - let response = client.datasets_get().await.context("listing datasets")?; - let response = response.into_inner(); - - println!("dataset configuration @ generation {}:", response.generation); - let datasets = response.datasets; - - if datasets.is_empty() { - println!(" "); - } - for dataset in &datasets { - println!(" {:?}", dataset); - } - - Ok(()) -} - /// Runs `omdb sled-agent bootstore status` async fn cmd_bootstore_status( client: &sled_agent_client::Client, diff --git a/dev-tools/omdb/tests/test_all_output.rs b/dev-tools/omdb/tests/test_all_output.rs index 5ed7a606fda..c8441924d7f 100644 --- a/dev-tools/omdb/tests/test_all_output.rs +++ b/dev-tools/omdb/tests/test_all_output.rs @@ -107,7 +107,6 @@ async fn test_omdb_usage_errors() { &["nexus", "sleds"], &["sled-agent"], &["sled-agent", "zones"], - &["sled-agent", "zpools"], &["oximeter", "--help"], &["oxql", "--help"], // Mispelled argument diff --git a/dev-tools/omdb/tests/usage_errors.out b/dev-tools/omdb/tests/usage_errors.out index 68ff3c59272..472e2b3cc9b 100644 --- a/dev-tools/omdb/tests/usage_errors.out +++ b/dev-tools/omdb/tests/usage_errors.out @@ -909,8 +909,6 @@ Usage: omdb sled-agent [OPTIONS] Commands: zones print information about zones - zpools print information about zpools - datasets print information about datasets bootstore print information about the local bootstore node help Print this message or the help of the given subcommand(s) @@ -949,32 +947,6 @@ Connection Options: --sled-agent-url URL of the Sled internal API [env: OMDB_SLED_AGENT_URL=] --dns-server [env: OMDB_DNS_SERVER=] -Safety Options: - -w, --destructive Allow potentially-destructive subcommands -============================================= -EXECUTING COMMAND: omdb ["sled-agent", "zpools"] -termination: Exited(2) ---------------------------------------------- -stdout: ---------------------------------------------- -stderr: -print information about zpools - -Usage: omdb sled-agent zpools [OPTIONS] - -Commands: - list Print list of all zpools managed by the sled agent - help Print this message or the help of the given subcommand(s) - -Options: - --log-level log level filter [env: LOG_LEVEL=] [default: warn] - --color Color output [default: auto] [possible values: auto, always, never] - -h, --help Print help - -Connection Options: - --sled-agent-url URL of the Sled internal API [env: OMDB_SLED_AGENT_URL=] - --dns-server [env: OMDB_DNS_SERVER=] - Safety Options: -w, --destructive Allow potentially-destructive subcommands ============================================= diff --git a/nexus-sled-agent-shared/src/inventory.rs b/nexus-sled-agent-shared/src/inventory.rs index 740ba9cf36e..70a87cba41d 100644 --- a/nexus-sled-agent-shared/src/inventory.rs +++ b/nexus-sled-agent-shared/src/inventory.rs @@ -12,16 +12,14 @@ use chrono::{DateTime, Utc}; use daft::Diffable; use id_map::IdMap; use id_map::IdMappable; +use omicron_common::disk::{DatasetKind, DatasetName}; use omicron_common::ledger::Ledgerable; use omicron_common::{ api::{ external::{ByteCount, Generation}, internal::shared::{NetworkInterface, SourceNatConfig}, }, - disk::{ - DatasetConfig, DatasetManagementStatus, DiskManagementStatus, - DiskVariant, OmicronPhysicalDiskConfig, - }, + disk::{DatasetConfig, DiskVariant, OmicronPhysicalDiskConfig}, update::ArtifactId, zpool_name::ZpoolName, }; @@ -132,6 +130,49 @@ pub struct ConfigReconcilerInventory { pub zones: BTreeMap, } +impl ConfigReconcilerInventory { + /// Iterate over all running zones as reported by the last reconciliation + /// result. + /// + /// This includes zones that are both present in `last_reconciled_config` + /// and whose status in `zones` indicates "successfully running". + pub fn running_omicron_zones( + &self, + ) -> impl Iterator { + self.zones.iter().filter_map(|(zone_id, result)| match result { + ConfigReconcilerInventoryResult::Ok => { + self.last_reconciled_config.zones.get(zone_id) + } + ConfigReconcilerInventoryResult::Err { .. } => None, + }) + } + + /// Given a sled config, produce a reconciler result that sled-agent could + /// have emitted if reconciliation succeeded. + /// + /// This method should only be used by tests and dev tools; real code should + /// look at the actual `last_reconciliation` value from the parent + /// [`Inventory`]. + pub fn debug_assume_success(config: OmicronSledConfig) -> Self { + let external_disks = config + .disks + .iter() + .map(|d| (d.id, ConfigReconcilerInventoryResult::Ok)) + .collect(); + let datasets = config + .datasets + .iter() + .map(|d| (d.id, ConfigReconcilerInventoryResult::Ok)) + .collect(); + let zones = config + .zones + .iter() + .map(|z| (z.id, ConfigReconcilerInventoryResult::Ok)) + .collect(); + Self { last_reconciled_config: config, external_disks, datasets, zones } + } +} + #[derive(Clone, Debug, PartialEq, Eq, Deserialize, JsonSchema, Serialize)] #[serde(tag = "result", rename_all = "snake_case")] pub enum ConfigReconcilerInventoryResult { @@ -187,8 +228,6 @@ pub enum SledRole { } /// Describes the set of Reconfigurator-managed configuration elements of a sled -// TODO this struct should have a generation number; at the moment, each of -// the fields has a separete one internally. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq)] pub struct OmicronSledConfig { pub generation: Generation, @@ -223,14 +262,6 @@ impl Ledgerable for OmicronSledConfig { } } -/// Result of the currently-synchronous `omicron_config_put` endpoint. -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -#[must_use = "this `DatasetManagementResult` may contain errors, which should be handled"] -pub struct OmicronSledConfigResult { - pub disks: Vec, - pub datasets: Vec, -} - /// Describes the set of Omicron-managed zones running on a sled #[derive( Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, @@ -297,6 +328,10 @@ impl OmicronZoneConfig { Some(self.id), ) } + + pub fn dataset_name(&self) -> Option { + self.zone_type.dataset_name() + } } /// Describes a persistent ZFS dataset associated with an Omicron zone @@ -583,6 +618,41 @@ impl OmicronZoneType { | OmicronZoneType::Oximeter { .. } => None, } } + + /// If this kind of zone has an associated dataset, return the dataset's + /// name. Otherwise, return `None`. + pub fn dataset_name(&self) -> Option { + let (dataset, dataset_kind) = match self { + OmicronZoneType::BoundaryNtp { .. } + | OmicronZoneType::InternalNtp { .. } + | OmicronZoneType::Nexus { .. } + | OmicronZoneType::Oximeter { .. } + | OmicronZoneType::CruciblePantry { .. } => None, + OmicronZoneType::Clickhouse { dataset, .. } => { + Some((dataset, DatasetKind::Clickhouse)) + } + OmicronZoneType::ClickhouseKeeper { dataset, .. } => { + Some((dataset, DatasetKind::ClickhouseKeeper)) + } + OmicronZoneType::ClickhouseServer { dataset, .. } => { + Some((dataset, DatasetKind::ClickhouseServer)) + } + OmicronZoneType::CockroachDb { dataset, .. } => { + Some((dataset, DatasetKind::Cockroach)) + } + OmicronZoneType::Crucible { dataset, .. } => { + Some((dataset, DatasetKind::Crucible)) + } + OmicronZoneType::ExternalDns { dataset, .. } => { + Some((dataset, DatasetKind::ExternalDns)) + } + OmicronZoneType::InternalDns { dataset, .. } => { + Some((dataset, DatasetKind::InternalDns)) + } + }?; + + Some(DatasetName::new(dataset.pool_name, dataset_kind)) + } } /// Like [`OmicronZoneType`], but without any associated data. diff --git a/nexus/inventory/src/examples.rs b/nexus/inventory/src/examples.rs index a7603195273..07af0890f46 100644 --- a/nexus/inventory/src/examples.rs +++ b/nexus/inventory/src/examples.rs @@ -16,7 +16,6 @@ use gateway_client::types::SpType; use gateway_types::rot::RotSlot; use nexus_sled_agent_shared::inventory::Baseboard; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventory; -use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryResult; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus; use nexus_sled_agent_shared::inventory::Inventory; use nexus_sled_agent_shared::inventory::InventoryDataset; @@ -643,29 +642,9 @@ pub fn sled_agent( ledgered_sled_config: Option, ) -> Inventory { // Assume the `ledgered_sled_config` was reconciled successfully. - let last_reconciliation = ledgered_sled_config.clone().map(|config| { - let external_disks = config - .disks - .iter() - .map(|d| (d.id, ConfigReconcilerInventoryResult::Ok)) - .collect(); - let datasets = config - .datasets - .iter() - .map(|d| (d.id, ConfigReconcilerInventoryResult::Ok)) - .collect(); - let zones = config - .zones - .iter() - .map(|z| (z.id, ConfigReconcilerInventoryResult::Ok)) - .collect(); - ConfigReconcilerInventory { - last_reconciled_config: config, - external_disks, - datasets, - zones, - } - }); + let last_reconciliation = ledgered_sled_config + .clone() + .map(ConfigReconcilerInventory::debug_assume_success); let reconciler_status = if last_reconciliation.is_some() { ConfigReconcilerInventoryStatus::Idle { diff --git a/nexus/reconfigurator/execution/src/omicron_sled_config.rs b/nexus/reconfigurator/execution/src/omicron_sled_config.rs index bd08e96a859..70ca8493a4a 100644 --- a/nexus/reconfigurator/execution/src/omicron_sled_config.rs +++ b/nexus/reconfigurator/execution/src/omicron_sled_config.rs @@ -10,15 +10,13 @@ use anyhow::anyhow; use futures::StreamExt; use futures::stream; use nexus_db_queries::context::OpContext; -use nexus_sled_agent_shared::inventory::OmicronSledConfigResult; use nexus_types::deployment::BlueprintSledConfig; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::SledUuid; -use slog::Logger; use slog::info; use slog::warn; +use slog_error_chain::InlineErrorChain; use std::collections::BTreeMap; -use update_engine::merge_anyhow_list; /// Idempotently ensure that the specified Omicron sled configs are deployed to /// the corresponding sleds @@ -63,13 +61,14 @@ pub(crate) async fn deploy_sled_configs( format!("Failed to put {config:#?} to sled {sled_id}") }); match result { + Ok(_) => None, Err(error) => { - warn!(log, "{error:#}"); + warn!( + log, "failed to put sled config"; + InlineErrorChain::new(error.as_ref()), + ); Some(error) } - Ok(result) => { - parse_config_result(result.into_inner(), &log).err() - } } }) .collect() @@ -78,69 +77,6 @@ pub(crate) async fn deploy_sled_configs( if errors.is_empty() { Ok(()) } else { Err(errors) } } -fn parse_config_result( - result: OmicronSledConfigResult, - log: &Logger, -) -> anyhow::Result<()> { - let (disk_errs, disk_successes): (Vec<_>, Vec<_>) = - result.disks.into_iter().partition(|status| status.err.is_some()); - - if !disk_errs.is_empty() { - warn!( - log, - "Failed to deploy disks for sled agent"; - "successfully configured disks" => disk_successes.len(), - "failed disk configurations" => disk_errs.len(), - ); - for err in &disk_errs { - warn!(log, "{err:?}"); - } - return Err(merge_anyhow_list(disk_errs.into_iter().map(|status| { - anyhow!( - "failed to deploy disk {:?}: {:#}", - status.identity, - // `disk_errs` was partitioned by `status.err.is_some()`, so - // this is safe to unwrap. - status.err.unwrap(), - ) - }))); - } - - let (dataset_errs, dataset_successes): (Vec<_>, Vec<_>) = - result.datasets.into_iter().partition(|status| status.err.is_some()); - - if !dataset_errs.is_empty() { - warn!( - log, - "Failed to deploy datasets for sled agent"; - "successfully configured datasets" => dataset_successes.len(), - "failed dataset configurations" => dataset_errs.len(), - ); - for err in &dataset_errs { - warn!(log, "{err:?}"); - } - return Err(merge_anyhow_list(dataset_errs.into_iter().map( - |status| { - anyhow!( - "failed to deploy dataset {}: {:#}", - status.dataset_name.full_name(), - // `dataset_errs` was partitioned by `status.err.is_some()`, - // so this is safe to unwrap. - status.err.unwrap(), - ) - }, - ))); - } - - info!( - log, - "Successfully deployed config to sled agent"; - "successfully configured disks" => disk_successes.len(), - "successfully configured datasets" => dataset_successes.len(), - ); - Ok(()) -} - #[cfg(test)] mod tests { use super::*; @@ -327,6 +263,9 @@ mod tests { // Observe the latest configuration stored on the simulated sled agent, // and verify that this output matches the input. + // + // TODO-cleanup Simulated sled-agent should report a unified + // `OmicronSledConfig`. let observed_disks = sim_sled_agent.omicron_physical_disks_list().unwrap(); let observed_datasets = sim_sled_agent.datasets_config_list().unwrap(); diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index 4cb65c02546..01330fb79fc 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -210,20 +210,16 @@ impl<'a> Planner<'a> { &mut self, sled_id: SledUuid, ) -> Result<(), Error> { - // The sled is not expunged. We have to see if the inventory - // reflects the parent blueprint disk generation. If it does - // then we mark any expunged disks decommissioned. + // The sled is not expunged. We have to see if the inventory reflects a + // reconciled config generation. If it does, we'll check below whether + // the reconciled generation is sufficiently advanced to decommission + // any disks. let Some(seen_generation) = self .inventory .sled_agents .get(&sled_id) - // TODO-correctness For now this is correct, because sled-agent - // doesn't ledger a config until it's applied it. However, once - // https://github.com/oxidecomputer/omicron/pull/8064 lands, - // sled-agent will ledger a config and then later reconcile it; do - // we need to wait for that reconciliation to decommission disks? - .and_then(|sa| sa.ledgered_sled_config.as_ref()) - .map(|config| config.generation) + .and_then(|sa| sa.last_reconciliation.as_ref()) + .map(|reconciled| reconciled.last_reconciled_config.generation) else { // There is no current inventory for the sled agent, so we cannot // decommission any disks. @@ -418,28 +414,37 @@ impl<'a> Planner<'a> { .blueprint .current_sled_zones(sled_id, BlueprintZoneDisposition::is_expunged) { - match zone.disposition { + // If this is a zone still waiting for cleanup, grab the generation + // in which it was expunged. Otherwise, move on. + let as_of_generation = match zone.disposition { BlueprintZoneDisposition::Expunged { as_of_generation, ready_for_cleanup, - } if !ready_for_cleanup => { - // TODO-correctness For now this is correct, because - // sled-agent doesn't ledger a config until it's applied it. - // However, as a part of landing - // https://github.com/oxidecomputer/omicron/pull/8064, - // this needs to change to check the last reconciled config - // instead of just the ledgered config. - if let Some(config) = &sled_inv.ledgered_sled_config { - if config.generation >= as_of_generation - && !config.zones.contains_key(&zone.id) - { - zones_ready_for_cleanup.push(zone.id); - } - } - } + } if !ready_for_cleanup => as_of_generation, BlueprintZoneDisposition::InService | BlueprintZoneDisposition::Expunged { .. } => continue, + }; + + // If the sled hasn't done any reconciliation, wait until it has. + let Some(reconciliation) = &sled_inv.last_reconciliation else { + continue; + }; + + // If the sled hasn't reconciled a new-enough generation, wait until + // it has. + if reconciliation.last_reconciled_config.generation + < as_of_generation + { + continue; } + + // If the sled hasn't shut down the zone, wait until it has. + if reconciliation.zones.contains_key(&zone.id) { + continue; + } + + // All checks passed: we can mark this zone as ready for cleanup. + zones_ready_for_cleanup.push(zone.id); } if !zones_ready_for_cleanup.is_empty() { @@ -568,22 +573,23 @@ impl<'a> Planner<'a> { // NTP zone), we'll need to be careful how we do it to avoid a // problem here. // - // TODO-cleanup Once - // https://github.com/oxidecomputer/omicron/pull/8064 lands, the - // above comment will be overly conservative; sled-agent won't - // reject configs just because time isn't sync'd yet. We may be able - // to remove this check entirely. (It's probably also fine to keep - // it for now; removing it just saves us an extra planning iteration - // when adding a new sled.) + // TODO-cleanup The above comment is now overly conservative; + // sled-agent won't reject configs just because time isn't sync'd + // yet. We may be able to remove this check entirely, but we'd need + // to do some testing to confirm no surprises. (It's probably also + // fine to keep it for now; removing it just saves us an extra + // planning iteration when adding a new sled.) let has_ntp_inventory = self .inventory .sled_agents .get(&sled_id) .map(|sled_agent| { - sled_agent.ledgered_sled_config.as_ref().map_or( + sled_agent.last_reconciliation.as_ref().map_or( false, - |config| { - config.zones.iter().any(|z| z.zone_type.is_ntp()) + |reconciliation| { + reconciliation + .running_omicron_zones() + .any(|z| z.zone_type.is_ntp()) }, ) }) @@ -907,7 +913,7 @@ impl<'a> Planner<'a> { // Wait for zones to appear up-to-date in the inventory. let inventory_zones = self .inventory - .all_ledgered_omicron_zones() + .all_running_omicron_zones() .map(|z| (z.id, z.image_source.clone())) .collect::>(); for &sled_id in &sleds { @@ -1134,6 +1140,8 @@ pub(crate) mod test { use clickhouse_admin_types::ClickhouseKeeperClusterMembership; use clickhouse_admin_types::KeeperId; use expectorate::assert_contents; + use nexus_sled_agent_shared::inventory::ConfigReconcilerInventory; + use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryResult; use nexus_types::deployment::BlueprintDatasetDisposition; use nexus_types::deployment::BlueprintDiffSummary; use nexus_types::deployment::BlueprintPhysicalDiskDisposition; @@ -2268,9 +2276,10 @@ pub(crate) mod test { .sled_agents .get_mut(&sled_id) .unwrap() - .ledgered_sled_config + .last_reconciliation .as_mut() .unwrap() + .last_reconciled_config .generation = Generation::from_u32(3); let blueprint3 = Planner::new_based_on( @@ -4140,6 +4149,14 @@ pub(crate) mod test { // Use our example system. let (mut collection, input, blueprint1) = example(&log, TEST_NAME); + // Don't start more internal DNS zones (which the planner would, as a + // side effect of our test details). + let input = { + let mut builder = input.into_builder(); + builder.policy_mut().target_internal_dns_zone_count = 0; + builder.build() + }; + // Find a Nexus zone we'll use for our test. let (sled_id, nexus_config) = blueprint1 .sleds @@ -4172,7 +4189,7 @@ pub(crate) mod test { // Run the planner. It should expunge all zones on the disk we just // expunged, including our Nexus zone, but not mark them as ready for // cleanup yet. - let blueprint2 = Planner::new_based_on( + let mut blueprint2 = Planner::new_based_on( logctx.log.clone(), &blueprint1, &input, @@ -4184,6 +4201,27 @@ pub(crate) mod test { .plan() .expect("planned"); + // Mark the disk we expected as "ready for cleanup"; this isn't what + // we're testing, and failing to do this will interfere with some of the + // checks we do below. + for mut disk in + blueprint2.sleds.get_mut(&sled_id).unwrap().disks.iter_mut() + { + match disk.disposition { + BlueprintPhysicalDiskDisposition::InService => (), + BlueprintPhysicalDiskDisposition::Expunged { + as_of_generation, + .. + } => { + disk.disposition = + BlueprintPhysicalDiskDisposition::Expunged { + as_of_generation, + ready_for_cleanup: true, + }; + } + } + } + // Helper to extract the Nexus zone's disposition in a blueprint. let get_nexus_disposition = |bp: &Blueprint| { bp.sleds.get(&sled_id).unwrap().zones.iter().find_map(|z| { @@ -4192,8 +4230,8 @@ pub(crate) mod test { }; // This sled's config generation should have been bumped... - let bp2_generation = - blueprint2.sleds.get(&sled_id).unwrap().sled_agent_generation; + let bp2_config = blueprint2.sleds.get(&sled_id).unwrap().clone(); + let bp2_sled_config = bp2_config.clone().into_in_service_sled_config(); assert_eq!( blueprint1 .sleds @@ -4201,24 +4239,25 @@ pub(crate) mod test { .unwrap() .sled_agent_generation .next(), - bp2_generation + bp2_sled_config.generation ); // ... and the Nexus should should have the disposition we expect. assert_eq!( get_nexus_disposition(&blueprint2), Some(BlueprintZoneDisposition::Expunged { - as_of_generation: bp2_generation, + as_of_generation: bp2_sled_config.generation, ready_for_cleanup: false, }) ); // Running the planner again should make no changes until the inventory - // reports that the zone is not running and that the sled has seen a - // new-enough generation. Try three variants that should do nothing: + // reports that the zone is not running and that the sled has reconciled + // a new-enough generation. Try these variants: // - // * same inventory as above - // * inventory reports a new generation (but zone still running) - // * inventory reports zone not running (but still the old generation) + // * same inventory as above (expect no changes) + // * new config is ledgered but not reconciled (expect no changes) + // * new config is reconciled, but zone is in an error state (expect + // no changes) eprintln!("planning with no inventory change..."); assert_planning_makes_no_changes( &logctx.log, @@ -4227,15 +4266,7 @@ pub(crate) mod test { &collection, TEST_NAME, ); - // TODO-cleanup These checks depend on `last_reconciled_config`, which - // is not yet populated; uncomment these and check them by mutating - // `last_reconciled_config` once - // https://github.com/oxidecomputer/omicron/pull/8064 lands. We could - // just mutate `ledgered_sled_config` in the meantime (as this - // commented-out code does below), but that's not really checking what - // we care about. - /* - eprintln!("planning with generation bump but zone still running..."); + eprintln!("planning with config ledgered but not reconciled..."); assert_planning_makes_no_changes( &logctx.log, &blueprint2, @@ -4246,15 +4277,15 @@ pub(crate) mod test { .sled_agents .get_mut(&sled_id) .unwrap() - .ledgered_sled_config - .as_mut() - .unwrap() - .generation = bp2_generation; + .ledgered_sled_config = Some(bp2_sled_config.clone()); collection }, TEST_NAME, ); - eprintln!("planning with zone gone but generation not bumped..."); + eprintln!( + "planning with config ledgered but \ + zones failed to shut down..." + ); assert_planning_makes_no_changes( &logctx.log, &blueprint2, @@ -4265,28 +4296,42 @@ pub(crate) mod test { .sled_agents .get_mut(&sled_id) .unwrap() - .ledgered_sled_config - .as_mut() + .ledgered_sled_config = Some(bp2_sled_config.clone()); + let mut reconciliation = + ConfigReconcilerInventory::debug_assume_success( + bp2_sled_config.clone(), + ); + // For all the zones that are in bp2_config but not + // bp2_sled_config (i.e., zones that should have been shut + // down), insert an error result in the reconciliation. + for zone_id in bp2_config.zones.keys() { + if !reconciliation.zones.contains_key(zone_id) { + reconciliation.zones.insert( + *zone_id, + ConfigReconcilerInventoryResult::Err { + message: "failed to shut down".to_string(), + }, + ); + } + } + collection + .sled_agents + .get_mut(&sled_id) .unwrap() - .zones - .retain(|z| z.id != nexus_config.id); + .last_reconciliation = Some(reconciliation); collection }, TEST_NAME, ); - */ // Now make both changes to the inventory. { - let config = &mut collection - .sled_agents - .get_mut(&sled_id) - .unwrap() - .ledgered_sled_config - .as_mut() - .unwrap(); - config.generation = bp2_generation; - config.zones.retain(|z| z.id != nexus_config.id); + let config = collection.sled_agents.get_mut(&sled_id).unwrap(); + config.ledgered_sled_config = Some(bp2_sled_config.clone()); + config.last_reconciliation = + Some(ConfigReconcilerInventory::debug_assume_success( + bp2_sled_config.clone(), + )); } // Run the planner. It mark our Nexus zone as ready for cleanup now that @@ -4306,7 +4351,7 @@ pub(crate) mod test { assert_eq!( get_nexus_disposition(&blueprint3), Some(BlueprintZoneDisposition::Expunged { - as_of_generation: bp2_generation, + as_of_generation: bp2_sled_config.generation, ready_for_cleanup: true, }) ); @@ -4315,7 +4360,7 @@ pub(crate) mod test { // since it doesn't affect what's sent to sled-agent. assert_eq!( blueprint3.sleds.get(&sled_id).unwrap().sled_agent_generation, - bp2_generation + bp2_sled_config.generation ); assert_planning_makes_no_changes( @@ -4395,8 +4440,12 @@ pub(crate) mod test { }; // This sled's config generation should have been bumped... - let bp2_generation = - blueprint2.sleds.get(&sled_id).unwrap().sled_agent_generation; + let bp2_config = blueprint2 + .sleds + .get(&sled_id) + .unwrap() + .clone() + .into_in_service_sled_config(); assert_eq!( blueprint1 .sleds @@ -4404,13 +4453,13 @@ pub(crate) mod test { .unwrap() .sled_agent_generation .next(), - bp2_generation + bp2_config.generation ); // ... and the DNS zone should should have the disposition we expect. assert_eq!( get_dns_disposition(&blueprint2), Some(BlueprintZoneDisposition::Expunged { - as_of_generation: bp2_generation, + as_of_generation: bp2_config.generation, ready_for_cleanup: false, }) ); @@ -4428,15 +4477,12 @@ pub(crate) mod test { // Make the inventory changes necessary for cleanup to proceed. { - let config = &mut collection - .sled_agents - .get_mut(&sled_id) - .unwrap() - .ledgered_sled_config - .as_mut() - .unwrap(); - config.generation = bp2_generation; - config.zones.retain(|z| z.id != internal_dns_config.id); + let config = &mut collection.sled_agents.get_mut(&sled_id).unwrap(); + config.ledgered_sled_config = Some(bp2_config.clone()); + config.last_reconciliation = + Some(ConfigReconcilerInventory::debug_assume_success( + bp2_config.clone(), + )); } // Run the planner. It should mark our internal DNS zone as ready for @@ -4458,7 +4504,7 @@ pub(crate) mod test { assert_eq!( get_dns_disposition(&blueprint3), Some(BlueprintZoneDisposition::Expunged { - as_of_generation: bp2_generation, + as_of_generation: bp2_config.generation, ready_for_cleanup: true, }) ); diff --git a/nexus/reconfigurator/planning/src/system.rs b/nexus/reconfigurator/planning/src/system.rs index 9a43c6c2564..fd277b77582 100644 --- a/nexus/reconfigurator/planning/src/system.rs +++ b/nexus/reconfigurator/planning/src/system.rs @@ -6,6 +6,7 @@ //! associated inventory collections and blueprints use anyhow::{Context, anyhow, bail, ensure}; +use chrono::Utc; use gateway_client::types::RotState; use gateway_client::types::SpState; use indexmap::IndexMap; @@ -13,6 +14,7 @@ use ipnet::Ipv6Net; use ipnet::Ipv6Subnets; use nexus_inventory::CollectionBuilder; use nexus_sled_agent_shared::inventory::Baseboard; +use nexus_sled_agent_shared::inventory::ConfigReconcilerInventory; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus; use nexus_sled_agent_shared::inventory::Inventory; use nexus_sled_agent_shared::inventory::InventoryDataset; @@ -58,6 +60,7 @@ use std::fmt::Debug; use std::net::Ipv4Addr; use std::net::Ipv6Addr; use std::sync::Arc; +use std::time::Duration; /// Describes an actual or synthetic Oxide rack for planning and testing /// @@ -394,10 +397,17 @@ impl SystemDescription { })?; let sled = Arc::make_mut(sled); - sled.inventory_sled_agent.ledgered_sled_config = Some(sled_config); + sled.inventory_sled_agent.ledgered_sled_config = + Some(sled_config.clone()); + + // Present results as though the reconciler has successfully completed. sled.inventory_sled_agent.reconciler_status = - ConfigReconcilerInventoryStatus::NotYetRun; - sled.inventory_sled_agent.last_reconciliation = None; + ConfigReconcilerInventoryStatus::Idle { + completed_at: Utc::now(), + ran_for: Duration::from_secs(5), + }; + sled.inventory_sled_agent.last_reconciliation = + Some(ConfigReconcilerInventory::debug_assume_success(sled_config)); Ok(self) } @@ -723,9 +733,16 @@ impl Sled { }) .collect(), datasets: vec![], - ledgered_sled_config: Some(sled_config), - reconciler_status: ConfigReconcilerInventoryStatus::NotYetRun, - last_reconciliation: None, + ledgered_sled_config: Some(sled_config.clone()), + reconciler_status: ConfigReconcilerInventoryStatus::Idle { + completed_at: Utc::now(), + ran_for: Duration::from_secs(5), + }, + last_reconciliation: Some( + ConfigReconcilerInventory::debug_assume_success( + sled_config, + ), + ), } }; diff --git a/nexus/src/app/background/tasks/sync_service_zone_nat.rs b/nexus/src/app/background/tasks/sync_service_zone_nat.rs index a1c1869fed9..34798bb9335 100644 --- a/nexus/src/app/background/tasks/sync_service_zone_nat.rs +++ b/nexus/src/app/background/tasks/sync_service_zone_nat.rs @@ -126,21 +126,17 @@ impl BackgroundTask for ServiceZoneNatTracker { let sled_address = oxnet::Ipv6Net::host_net(*sled.ip); // TODO-correctness Looking at inventory here is a little - // sketchy. We currently check the most-recently-ledgered zones - // which tells us what services sled-agent things it's supposed - // to be running. It might be better to check either: - // - // * `sa.last_reconciliation` (to know what zones are actually - // running; this requires - // https://github.com/oxidecomputer/omicron/pull/8064 landing) - // if the goal is to sync what's actually on the sled - // * a rendezvous table populated by reconfigurator if the goal - // is to sync with what's Nexus thinks is supposed to be - // running on the sled + // sketchy. We check the last reconciliation result, which + // should be a view of what zones are actually running on the + // sled. But maybe it would be better to act on a rendezvous + // table populated by reconfigurator if the goal is to sync with + // what's Nexus thinks is supposed to be running on the sled? let zones = sa - .ledgered_sled_config - .map(|config| config.zones) - .unwrap_or_default(); + .last_reconciliation + .iter() + .flat_map(|reconciliation| { + reconciliation.running_omicron_zones().cloned() + }); for zone in zones { let zone_type: OmicronZoneType = zone.zone_type; diff --git a/nexus/types/src/inventory.rs b/nexus/types/src/inventory.rs index 79c79a9a1e3..f8699b216cb 100644 --- a/nexus/types/src/inventory.rs +++ b/nexus/types/src/inventory.rs @@ -178,6 +178,17 @@ impl Collection { .flat_map(|config| config.zones.iter()) } + /// Iterate over all the successfully-started Omicron zones (as reported by + /// each sled-agent's last reconciliation attempt) + pub fn all_running_omicron_zones( + &self, + ) -> impl Iterator { + self.sled_agents + .values() + .filter_map(|sa| sa.last_reconciliation.as_ref()) + .flat_map(|reconciliation| reconciliation.running_omicron_zones()) + } + /// Iterate over the sled ids of sleds identified as Scrimlets pub fn scrimlets(&self) -> impl Iterator + '_ { self.sled_agents diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index c04084e07f8..b5d286d264f 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -334,30 +334,6 @@ } } }, - "/datasets": { - "get": { - "summary": "Lists the datasets that this sled is configured to use", - "operationId": "datasets_get", - "responses": { - "200": { - "description": "successful operation", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/DatasetsConfig" - } - } - } - }, - "4XX": { - "$ref": "#/components/responses/Error" - }, - "5XX": { - "$ref": "#/components/responses/Error" - } - } - } - }, "/disks/{disk_id}": { "put": { "operationId": "disk_put", @@ -516,38 +492,8 @@ "required": true }, "responses": { - "200": { - "description": "successful operation", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/OmicronSledConfigResult" - } - } - } - }, - "4XX": { - "$ref": "#/components/responses/Error" - }, - "5XX": { - "$ref": "#/components/responses/Error" - } - } - } - }, - "/omicron-physical-disks": { - "get": { - "operationId": "omicron_physical_disks_get", - "responses": { - "200": { - "description": "successful operation", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/OmicronPhysicalDisksConfig" - } - } - } + "204": { + "description": "resource updated" }, "4XX": { "$ref": "#/components/responses/Error" @@ -2229,33 +2175,6 @@ } } } - }, - "/zpools": { - "get": { - "operationId": "zpools_get", - "responses": { - "200": { - "description": "successful operation", - "content": { - "application/json": { - "schema": { - "title": "Array_of_Zpool", - "type": "array", - "items": { - "$ref": "#/components/schemas/Zpool" - } - } - } - } - }, - "4XX": { - "$ref": "#/components/responses/Error" - }, - "5XX": { - "$ref": "#/components/responses/Error" - } - } - } } }, "components": { @@ -3863,22 +3782,6 @@ "description": "The kind of dataset. See the `DatasetKind` enum in omicron-common for possible values.", "type": "string" }, - "DatasetManagementStatus": { - "description": "Identifies how a single dataset management operation may have succeeded or failed.", - "type": "object", - "properties": { - "dataset_name": { - "$ref": "#/components/schemas/DatasetName" - }, - "err": { - "nullable": true, - "type": "string" - } - }, - "required": [ - "dataset_name" - ] - }, "DatasetName": { "type": "object", "properties": { @@ -3894,29 +3797,6 @@ "pool_name" ] }, - "DatasetsConfig": { - "type": "object", - "properties": { - "datasets": { - "type": "object", - "additionalProperties": { - "$ref": "#/components/schemas/DatasetConfig" - } - }, - "generation": { - "description": "generation number of this configuration\n\nThis generation number is owned by the control plane (i.e., RSS or Nexus, depending on whether RSS-to-Nexus handoff has happened). It should not be bumped within Sled Agent.\n\nSled Agent rejects attempts to set the configuration to a generation older than the one it's currently running.\n\nNote that \"Generation::new()\", AKA, the first generation number, is reserved for \"no datasets\". This is the default configuration for a sled before any requests have been made.", - "allOf": [ - { - "$ref": "#/components/schemas/Generation" - } - ] - } - }, - "required": [ - "datasets", - "generation" - ] - }, "DhcpConfig": { "description": "DHCP configuration for a port\n\nNot present here: Hostname (DHCPv4 option 12; used in DHCPv6 option 39); we use `InstanceRuntimeState::hostname` for this value.", "type": "object", @@ -3993,128 +3873,6 @@ "vendor" ] }, - "DiskManagementError": { - "oneOf": [ - { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "not_found" - ] - } - }, - "required": [ - "type" - ] - }, - { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "internal_disk_control_plane_request" - ] - }, - "value": { - "$ref": "#/components/schemas/TypedUuidForPhysicalDiskKind" - } - }, - "required": [ - "type", - "value" - ] - }, - { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "zpool_uuid_mismatch" - ] - }, - "value": { - "type": "object", - "properties": { - "expected": { - "$ref": "#/components/schemas/TypedUuidForZpoolKind" - }, - "observed": { - "$ref": "#/components/schemas/TypedUuidForZpoolKind" - } - }, - "required": [ - "expected", - "observed" - ] - } - }, - "required": [ - "type", - "value" - ] - }, - { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "key_manager" - ] - }, - "value": { - "type": "string" - } - }, - "required": [ - "type", - "value" - ] - }, - { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "other" - ] - }, - "value": { - "type": "string" - } - }, - "required": [ - "type", - "value" - ] - } - ] - }, - "DiskManagementStatus": { - "description": "Identifies how a single disk management operation may have succeeded or failed.", - "type": "object", - "properties": { - "err": { - "nullable": true, - "allOf": [ - { - "$ref": "#/components/schemas/DiskManagementError" - } - ] - }, - "identity": { - "$ref": "#/components/schemas/DiskIdentity" - } - }, - "required": [ - "identity" - ] - }, "DiskRuntimeState": { "description": "Runtime state of the Disk, which includes its attach state and some minimal metadata", "type": "object", @@ -4413,13 +4171,6 @@ } ] }, - "DiskType": { - "type": "string", - "enum": [ - "U2", - "M2" - ] - }, "DiskVariant": { "type": "string", "enum": [ @@ -5677,29 +5428,6 @@ "pool_id" ] }, - "OmicronPhysicalDisksConfig": { - "type": "object", - "properties": { - "disks": { - "type": "array", - "items": { - "$ref": "#/components/schemas/OmicronPhysicalDiskConfig" - } - }, - "generation": { - "description": "generation number of this configuration\n\nThis generation number is owned by the control plane (i.e., RSS or Nexus, depending on whether RSS-to-Nexus handoff has happened). It should not be bumped within Sled Agent.\n\nSled Agent rejects attempts to set the configuration to a generation older than the one it's currently running.", - "allOf": [ - { - "$ref": "#/components/schemas/Generation" - } - ] - } - }, - "required": [ - "disks", - "generation" - ] - }, "OmicronSledConfig": { "description": "Describes the set of Reconfigurator-managed configuration elements of a sled", "type": "object", @@ -5732,28 +5460,6 @@ "zones" ] }, - "OmicronSledConfigResult": { - "description": "Result of the currently-synchronous `omicron_config_put` endpoint.", - "type": "object", - "properties": { - "datasets": { - "type": "array", - "items": { - "$ref": "#/components/schemas/DatasetManagementStatus" - } - }, - "disks": { - "type": "array", - "items": { - "$ref": "#/components/schemas/DiskManagementStatus" - } - } - }, - "required": [ - "datasets", - "disks" - ] - }, "OmicronZoneConfig": { "description": "Describes one Omicron-managed zone running on a sled", "type": "object", @@ -7810,21 +7516,6 @@ "version" ] }, - "Zpool": { - "type": "object", - "properties": { - "disk_type": { - "$ref": "#/components/schemas/DiskType" - }, - "id": { - "$ref": "#/components/schemas/TypedUuidForZpoolKind" - } - }, - "required": [ - "disk_type", - "id" - ] - }, "ZpoolName": { "title": "The name of a Zpool", "description": "Zpool names are of the format ox{i,p}_. They are either Internal or External, and should be unique", diff --git a/schema/all-zones-requests.json b/schema/all-zones-requests.json index 73e2e4de6e0..e9400245755 100644 --- a/schema/all-zones-requests.json +++ b/schema/all-zones-requests.json @@ -1,7 +1,7 @@ { "$schema": "http://json-schema.org/draft-07/schema#", "title": "OmicronZonesConfigLocal", - "description": "Combines the Nexus-provided `OmicronZonesConfig` (which describes what Nexus wants for all of its zones) with the locally-determined configuration for these zones.", + "description": "Legacy type of the ledgered zone config.", "type": "object", "required": [ "ledger_generation", @@ -10,20 +10,10 @@ ], "properties": { "ledger_generation": { - "description": "ledger-managed generation number\n\nThis generation is managed by the ledger facility itself. It's bumped whenever we write a new ledger. In practice, we don't currently have any reason to bump this _for a given Omicron generation_ so it's somewhat redundant. In principle, if we needed to modify the ledgered configuration due to some event that doesn't change the Omicron config (e.g., if we wanted to move the root filesystem to a different path), we could do that by bumping this generation.", - "allOf": [ - { - "$ref": "#/definitions/Generation" - } - ] + "$ref": "#/definitions/Generation" }, "omicron_generation": { - "description": "generation of the Omicron-provided part of the configuration\n\nThis generation number is outside of Sled Agent's control. We store exactly what we were given and use this number to decide when to fail requests to establish an outdated configuration.\n\nYou can think of this as a major version number, with `ledger_generation` being a minor version number. See `is_newer_than()`.", - "allOf": [ - { - "$ref": "#/definitions/Generation" - } - ] + "$ref": "#/definitions/Generation" }, "zones": { "type": "array", @@ -269,7 +259,6 @@ } }, "OmicronZoneConfigLocal": { - "description": "Combines the Nexus-provided `OmicronZoneConfig` (which describes what Nexus wants for this zone) with any locally-determined configuration (like the path to the root filesystem)", "type": "object", "required": [ "root", diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index 1b3fb78698b..acacb4cc1d6 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -138,6 +138,7 @@ tokio-stream.workspace = true tokio-util.workspace = true illumos-utils = { workspace = true, features = ["testing"] } +sled-agent-config-reconciler = { workspace = true, features = ["testing"] } sled-storage = { workspace = true, features = ["testing"] } # diff --git a/sled-agent/api/src/lib.rs b/sled-agent/api/src/lib.rs index da9c0c77c4b..316d2d68723 100644 --- a/sled-agent/api/src/lib.rs +++ b/sled-agent/api/src/lib.rs @@ -13,7 +13,7 @@ use dropshot::{ StreamingBody, TypedBody, }; use nexus_sled_agent_shared::inventory::{ - Inventory, OmicronSledConfig, OmicronSledConfigResult, SledRole, + Inventory, OmicronSledConfig, SledRole, }; use omicron_common::{ api::external::Generation, @@ -24,7 +24,7 @@ use omicron_common::{ SledIdentifiers, SwitchPorts, VirtualNetworkInterfaceHost, }, }, - disk::{DatasetsConfig, DiskVariant, OmicronPhysicalDisksConfig}, + disk::DiskVariant, ledger::Ledgerable, }; use omicron_uuid_kinds::{ @@ -266,32 +266,7 @@ pub trait SledAgentApi { async fn omicron_config_put( rqctx: RequestContext, body: TypedBody, - ) -> Result, HttpError>; - - /// Lists the datasets that this sled is configured to use - #[endpoint { - method = GET, - path = "/datasets", - }] - async fn datasets_get( - rqctx: RequestContext, - ) -> Result, HttpError>; - - #[endpoint { - method = GET, - path = "/omicron-physical-disks", - }] - async fn omicron_physical_disks_get( - rqctx: RequestContext, - ) -> Result, HttpError>; - - #[endpoint { - method = GET, - path = "/zpools", - }] - async fn zpools_get( - rqctx: RequestContext, - ) -> Result>, HttpError>; + ) -> Result; #[endpoint { method = GET, diff --git a/sled-agent/config-reconciler/Cargo.toml b/sled-agent/config-reconciler/Cargo.toml index 9d173f35861..f5f8a61e4e1 100644 --- a/sled-agent/config-reconciler/Cargo.toml +++ b/sled-agent/config-reconciler/Cargo.toml @@ -45,6 +45,7 @@ expectorate.workspace = true illumos-utils = { workspace = true, features = ["testing"] } omicron-test-utils.workspace = true proptest.workspace = true +schemars.workspace = true scopeguard.workspace = true serde_json.workspace = true sled-storage = { workspace = true, features = ["testing"] } diff --git a/sled-agent/config-reconciler/src/dataset_serialization_task.rs b/sled-agent/config-reconciler/src/dataset_serialization_task.rs index 9cc35285545..05556c3abf5 100644 --- a/sled-agent/config-reconciler/src/dataset_serialization_task.rs +++ b/sled-agent/config-reconciler/src/dataset_serialization_task.rs @@ -25,9 +25,6 @@ use illumos_utils::zfs::DestroyDatasetError; use illumos_utils::zfs::Mountpoint; use illumos_utils::zfs::WhichDatasets; use illumos_utils::zfs::Zfs; -use illumos_utils::zpool::PathInPool; -use illumos_utils::zpool::ZpoolOrRamdisk; -use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryResult; use nexus_sled_agent_shared::inventory::InventoryDataset; use omicron_common::disk::DatasetConfig; use omicron_common::disk::DatasetKind; @@ -37,7 +34,6 @@ use omicron_common::zpool_name::ZpoolName; use omicron_uuid_kinds::DatasetUuid; use sled_storage::config::MountConfig; use sled_storage::dataset::CRYPT_DATASET; -use sled_storage::dataset::U2_DEBUG_DATASET; use sled_storage::dataset::ZONE_DATASET; use sled_storage::manager::NestedDatasetConfig; use sled_storage::manager::NestedDatasetListOptions; @@ -91,7 +87,7 @@ pub enum DatasetEnsureError { } impl DatasetEnsureError { - fn is_retryable(&self) -> bool { + pub(crate) fn is_retryable(&self) -> bool { match self { // These errors might be retryable; there are probably cases where // they won't be, but we need more context than we have available @@ -161,89 +157,13 @@ pub enum NestedDatasetListError { }, } -#[derive(Debug, Clone, Default)] -pub(crate) struct DatasetEnsureResult(IdMap); - -impl DatasetEnsureResult { - pub(crate) fn has_retryable_error(&self) -> bool { - self.0.iter().any(|result| match &result.state { - DatasetState::Ensured => false, - DatasetState::FailedToEnsure(err) => err.is_retryable(), - }) - } - - pub(crate) fn to_inventory( - &self, - ) -> BTreeMap { - self.0 - .iter() - .map(|dataset| match &dataset.state { - DatasetState::Ensured => { - (dataset.config.id, ConfigReconcilerInventoryResult::Ok) - } - DatasetState::FailedToEnsure(err) => ( - dataset.config.id, - ConfigReconcilerInventoryResult::Err { - message: InlineErrorChain::new(err).to_string(), - }, - ), - }) - .collect() - } - - pub(crate) fn all_mounted_debug_datasets<'a>( - &'a self, - mount_config: &'a MountConfig, - ) -> impl Iterator + 'a { - self.all_mounted_datasets(mount_config, DatasetKind::Debug) - } - - pub(crate) fn all_mounted_zone_root_datasets<'a>( - &'a self, - mount_config: &'a MountConfig, - ) -> impl Iterator + 'a { - self.all_mounted_datasets(mount_config, DatasetKind::TransientZoneRoot) - } - - fn all_mounted_datasets<'a>( - &'a self, - mount_config: &'a MountConfig, - kind: DatasetKind, - ) -> impl Iterator + 'a { - // We're a helper called by the pub methods on this type, so we only - // have to handle the `kind`s they call us with. - let mountpoint = match &kind { - DatasetKind::Debug => U2_DEBUG_DATASET, - DatasetKind::TransientZoneRoot => ZONE_DATASET, - _ => unreachable!( - "private function called with unexpected kind {kind:?}" - ), - }; - self.0 - .iter() - .filter(|result| match &result.state { - DatasetState::Ensured => true, - DatasetState::FailedToEnsure(_) => false, - }) - .filter(move |result| *result.config.name.kind() == kind) - .map(|result| { - let pool = *result.config.name.pool(); - PathInPool { - pool: ZpoolOrRamdisk::Zpool(pool), - path: pool - .dataset_mountpoint(&mount_config.root, mountpoint), - } - }) - } -} - -#[derive(Debug, Clone)] -struct SingleDatasetEnsureResult { - config: DatasetConfig, - state: DatasetState, +#[derive(Debug)] +pub(crate) struct DatasetEnsureResult { + pub(crate) config: DatasetConfig, + pub(crate) result: Result<(), Arc>, } -impl IdMappable for SingleDatasetEnsureResult { +impl IdMappable for DatasetEnsureResult { type Id = DatasetUuid; fn id(&self) -> Self::Id { @@ -251,16 +171,18 @@ impl IdMappable for SingleDatasetEnsureResult { } } -#[derive(Debug, Clone)] -enum DatasetState { - Ensured, - FailedToEnsure(Arc), -} - #[derive(Debug, Clone)] pub(crate) struct DatasetTaskHandle(mpsc::Sender); impl DatasetTaskHandle { + // For testing, create a handle on which requests will always fail with a + // `DatasetTaskError`. + #[cfg(any(test, feature = "testing"))] + pub(crate) fn spawn_noop() -> Self { + let (tx, _rx) = mpsc::channel(1); + Self(tx) + } + pub fn spawn_dataset_task( mount_config: Arc, currently_managed_zpools_rx: CurrentlyManagedZpoolsReceiver, @@ -296,7 +218,7 @@ impl DatasetTaskHandle { mount_config, currently_managed_zpools_rx, request_rx, - datasets: DatasetEnsureResult::default(), + ensured_datasets: BTreeSet::new(), log: base_log.new(slog::o!("component" => "DatasetTask")), } .run(zfs), @@ -317,7 +239,7 @@ impl DatasetTaskHandle { pub async fn datasets_ensure( &self, datasets: IdMap, - ) -> Result { + ) -> Result, DatasetTaskError> { self.try_send_request(|tx| DatasetTaskRequest::DatasetsEnsure { datasets, tx, @@ -403,7 +325,7 @@ struct DatasetTask { mount_config: Arc, request_rx: mpsc::Receiver, currently_managed_zpools_rx: CurrentlyManagedZpoolsReceiver, - datasets: DatasetEnsureResult, + ensured_datasets: BTreeSet, log: Logger, } @@ -426,8 +348,7 @@ impl DatasetTask { _ = tx.0.send(self.inventory(zpools, zfs).await); } DatasetTaskRequest::DatasetsEnsure { datasets, tx } => { - self.datasets_ensure(datasets, zfs).await; - _ = tx.0.send(self.datasets.clone()); + _ = tx.0.send(self.datasets_ensure(datasets, zfs).await); } DatasetTaskRequest::NestedDatasetMount { name, tx } => { _ = tx.0.send(self.nested_dataset_mount(name, zfs).await); @@ -482,7 +403,9 @@ impl DatasetTask { &mut self, config: IdMap, zfs: &T, - ) { + ) -> IdMap { + let mut ensure_results = IdMap::new(); + // There's an implicit hierarchy inside the list of `DatasetConfig`s: // // 1. Each zpool may contain many datasets @@ -535,9 +458,9 @@ impl DatasetTask { "dataset" => ?dataset, ); let err = DatasetEnsureError::ZpoolNotFound(*zpool); - self.datasets.0.insert(SingleDatasetEnsureResult { + ensure_results.insert(DatasetEnsureResult { config: dataset, - state: DatasetState::FailedToEnsure(Arc::new(err)), + result: Err(Arc::new(err)), }); continue; } @@ -613,7 +536,7 @@ impl DatasetTask { const DATASET_ENSURE_CONCURRENCY_LIMIT: usize = 16; let mut non_transient_zones = futures::stream::iter( non_transient_zone_configs.into_iter().map(|dataset| async move { - let state = match Self::ensure_one_dataset( + let result = Self::ensure_one_dataset( DatasetCreationDetails::Config( &dataset, old_datasets.get(&dataset.name.full_name()), @@ -622,18 +545,14 @@ impl DatasetTask { &log, zfs, ) - .await - { - Ok(state) => state, - Err(err) => DatasetState::FailedToEnsure(Arc::new(err)), - }; - (dataset, state) + .await; + (dataset, result.map_err(Arc::new)) }), ) .buffer_unordered(DATASET_ENSURE_CONCURRENCY_LIMIT); - while let Some((config, state)) = non_transient_zones.next().await { - self.datasets.0.insert(SingleDatasetEnsureResult { config, state }); + while let Some((config, result)) = non_transient_zones.next().await { + ensure_results.insert(DatasetEnsureResult { config, result }); } // For each transient zone dataset: either ensure it or mark down why we @@ -649,30 +568,28 @@ impl DatasetTask { else { let err = DatasetEnsureError::TransientZoneRootNoConfig(zpool); - self.datasets.0.insert(SingleDatasetEnsureResult { + ensure_results.insert(DatasetEnsureResult { config: dataset, - state: DatasetState::FailedToEnsure(Arc::new(err)), + result: Err(Arc::new(err)), }); continue; }; // Have we successfully ensured that parent dataset? - match self - .datasets - .0 + match ensure_results .get(zpool_transient_zone_root_dataset_id) - .map(|d| &d.state) + .map(|d| &d.result) { - Some(DatasetState::Ensured) => (), - Some(DatasetState::FailedToEnsure(err)) => { + Some(Ok(())) => (), + Some(Err(err)) => { let err = DatasetEnsureError::TransientZoneRootFailure { zpool, err: Arc::clone(err), }; - self.datasets.0.insert(SingleDatasetEnsureResult { + ensure_results.insert(DatasetEnsureResult { config: dataset, - state: DatasetState::FailedToEnsure(Arc::new(err)), + result: Err(Arc::new(err)), }); continue; } @@ -685,7 +602,7 @@ impl DatasetTask { } transient_zone_futures.push(async move { - let state = match Self::ensure_one_dataset( + let result = Self::ensure_one_dataset( DatasetCreationDetails::Config( &dataset, old_datasets.get(&dataset.name.full_name()), @@ -694,32 +611,44 @@ impl DatasetTask { &log, zfs, ) - .await - { - Ok(state) => state, - Err(err) => DatasetState::FailedToEnsure(Arc::new(err)), - }; - (dataset, state) + .await; + (dataset, result.map_err(Arc::new)) }); } } let mut transient_zones = futures::stream::iter(transient_zone_futures) .buffer_unordered(DATASET_ENSURE_CONCURRENCY_LIMIT); - while let Some((config, state)) = transient_zones.next().await { - self.datasets.0.insert(SingleDatasetEnsureResult { config, state }); + while let Some((config, result)) = transient_zones.next().await { + ensure_results.insert(DatasetEnsureResult { config, result }); } + + // Remember all successfully-ensured datasets (used by + // `nested_dataset_ensure()` to check that any nested datasets' parents + // have been ensured). + self.ensured_datasets = ensure_results + .iter() + .filter_map(|d| { + if d.result.is_ok() { + Some(d.config.name.clone()) + } else { + None + } + }) + .collect(); + + ensure_results } /// Compare `dataset`'s properties against `old_dataset` (an set of /// recently-retrieved properties from ZFS). If we already know /// the state of `dataset` based on those properties, return `Some(state)`; /// otherwise, return `None`. - fn is_dataset_state_known( + fn is_dataset_ensure_result_known( dataset: &DatasetConfig, old_dataset: Option<&DatasetProperties>, log: &Logger, - ) -> Option { + ) -> Option> { let log = log.new(slog::o!("dataset" => dataset.name.full_name())); let Some(old_dataset) = old_dataset else { @@ -736,13 +665,11 @@ impl DatasetTask { // We cannot do anything here: we already have a dataset with this // name, but it has a different ID. Nexus has sent us bad // information (or we have a bug somewhere); refuse to proceed. - return Some(DatasetState::FailedToEnsure(Arc::new( - DatasetEnsureError::UuidMismatch { - name: dataset.name.full_name(), - expected: dataset.id, - got: old_id, - }, - ))); + return Some(Err(DatasetEnsureError::UuidMismatch { + name: dataset.name.full_name(), + expected: dataset.id, + got: old_id, + })); } let old_props = match SharedDatasetConfig::try_from(old_dataset) { @@ -778,7 +705,7 @@ impl DatasetTask { } info!(log, "No changes necessary, returning early"); - return Some(DatasetState::Ensured); + return Some(Ok(())); } // Ensures a dataset exists within a zpool. @@ -792,7 +719,7 @@ impl DatasetTask { mount_config: &MountConfig, log: &Logger, zfs: &T, - ) -> Result { + ) -> Result<(), DatasetEnsureError> { info!(log, "ensure_dataset"; "details" => ?details); // Unpack the particulars of the kind of dataset we're creating. @@ -810,10 +737,10 @@ impl DatasetTask { DatasetCreationDetails::Config(config, old_props) => { // Do we alread know the state of this dataset based on // `old_props`? - if let Some(state) = - Self::is_dataset_state_known(config, old_props, log) - { - return Ok(state); + if let Some(result) = Self::is_dataset_ensure_result_known( + config, old_props, log, + ) { + return result; } let dataset_id = Some(config.id); @@ -848,8 +775,7 @@ impl DatasetTask { id: dataset_id, additional_options: None, }) - .await?; - Ok(DatasetState::Ensured) + .await } async fn nested_dataset_mount( @@ -876,13 +802,7 @@ impl DatasetTask { let log = self.log.new(slog::o!("request" => "nested_dataset_ensure")); // Has our parent dataset been mounted? - // - // TODO-cleanup Could we get the parent dataset ID instead of its name? - // Then we could do a lookup instead of a scan. - if !self.datasets.0.iter().any(|result| { - result.config.name == config.name.root - && matches!(result.state, DatasetState::Ensured) - }) { + if !self.ensured_datasets.contains(&config.name.root) { return Err(NestedDatasetEnsureError::ParentDatasetNotMounted( config.name.root, )); @@ -1014,7 +934,7 @@ enum DatasetTaskRequest { }, DatasetsEnsure { datasets: IdMap, - tx: DebugIgnore>, + tx: DebugIgnore>>, }, NestedDatasetMount { name: NestedDatasetLocation, @@ -1396,21 +1316,20 @@ mod tests { // The returned map should record success for all datasets on managed // zpools and errors on all unmanaged pools. - assert_eq!(result.0.len(), datasets.len()); + assert_eq!(result.len(), datasets.len()); let mut num_datasets_on_managed_pools = 0; for dataset in &datasets { - let single_result = result - .0 + let single_dataset = result .get(&dataset.id) .expect("result contains entry for each dataset"); if managed_pools.contains(dataset.name.pool()) { - assert_matches!(single_result.state, DatasetState::Ensured); + assert_matches!(single_dataset.result, Ok(())); num_datasets_on_managed_pools += 1; } else { assert_matches!( - &single_result.state, - DatasetState::FailedToEnsure(err) + &single_dataset.result, + Err(err) if matches!(**err, DatasetEnsureError::ZpoolNotFound(_)) ); } @@ -1533,35 +1452,32 @@ mod tests { // the `Succeed` behavior // * errors for all other transient zones (with the specific error // depending on whether the parent failed or was omitted) - assert_eq!(result.0.len(), datasets.len()); + assert_eq!(result.len(), datasets.len()); for dataset in &datasets { let behavior = pools .get(dataset.name.pool()) .expect("datasets only exist for pools we have"); - let result = result - .0 + let result = &result .get(&dataset.id) - .expect("result contains entry for each dataset"); + .expect("result contains entry for each dataset") + .result; match (behavior, dataset.name.kind()) { ( TransientZoneRootBehavior::Succeed, DatasetKind::TransientZoneRoot | DatasetKind::TransientZone { .. }, - ) => assert_matches!(result.state, DatasetState::Ensured), + ) => assert_matches!(result, Ok(())), ( TransientZoneRootBehavior::Fail, DatasetKind::TransientZoneRoot, - ) => assert_matches!( - result.state, - DatasetState::FailedToEnsure(_) - ), + ) => assert_matches!(result, Err(_)), ( TransientZoneRootBehavior::Fail, DatasetKind::TransientZone { .. }, ) => assert_matches!( - &result.state, - DatasetState::FailedToEnsure(err) if matches!( + result, + Err(err) if matches!( **err, DatasetEnsureError::TransientZoneRootFailure { .. } ) @@ -1570,8 +1486,8 @@ mod tests { TransientZoneRootBehavior::Omit, DatasetKind::TransientZone { .. }, ) => assert_matches!( - &result.state, - DatasetState::FailedToEnsure(err) if matches!( + result, + Err(err) if matches!( **err, DatasetEnsureError::TransientZoneRootNoConfig(_) ) ), @@ -1649,11 +1565,11 @@ mod tests { // Our in-memory ZFS will return an error if we tried to mount a // transient zone before its parent zone root, so it's sufficient to // check that all the datasets ensured successfully. - assert_eq!(result.0.len(), datasets.len()); - for single_result in result.0 { + assert_eq!(result.len(), datasets.len()); + for single_result in result { assert_matches!( - single_result.state, - DatasetState::Ensured, + single_result.result, + Ok(()), "bad state for {:?}", single_result.config ); @@ -1712,13 +1628,13 @@ mod tests { .expect("no task error"); // Each dataset should have been ensured exactly once. - assert_eq!(result.0.len(), datasets.len()); + assert_eq!(result.len(), datasets.len()); { let zfs = zfs.inner.lock().unwrap(); for dataset in &datasets { assert_matches!( - result.0.get(&dataset.id).unwrap().state, - DatasetState::Ensured + result.get(&dataset.id).unwrap().result, + Ok(()) ); assert_eq!( zfs.ensure_call_counts.get(&dataset.name.full_name()), @@ -1733,13 +1649,13 @@ mod tests { .datasets_ensure(datasets.clone()) .await .expect("no task error"); - assert_eq!(result.0.len(), datasets.len()); + assert_eq!(result.len(), datasets.len()); { let zfs = zfs.inner.lock().unwrap(); for dataset in &datasets { assert_matches!( - result.0.get(&dataset.id).unwrap().state, - DatasetState::Ensured + result.get(&dataset.id).unwrap().result, + Ok(()) ); assert_eq!( zfs.ensure_call_counts.get(&dataset.name.full_name()), @@ -1783,13 +1699,13 @@ mod tests { .datasets_ensure(datasets.clone()) .await .expect("no task error"); - assert_eq!(result.0.len(), datasets.len()); + assert_eq!(result.len(), datasets.len()); { let zfs = zfs.inner.lock().unwrap(); for dataset in &datasets { assert_matches!( - result.0.get(&dataset.id).unwrap().state, - DatasetState::Ensured + result.get(&dataset.id).unwrap().result, + Ok(()) ); let expected_count = if mutated_datasets.contains(&dataset.id) { 2 } else { 1 }; @@ -1893,10 +1809,8 @@ mod tests { .datasets_ensure(datasets.clone()) .await .expect("no task error"); - assert_eq!(result.0.len(), datasets.len()); - assert!( - result.0.iter().all(|r| matches!(r.state, DatasetState::Ensured)) - ); + assert_eq!(result.len(), datasets.len()); + assert!(result.iter().all(|r| matches!(r.result, Ok(())))); // Try to ensure each of the nested datasets. This should succeed for // any where the debug dataset was mounted, and fail with an appropriate @@ -2260,14 +2174,13 @@ mod illumos_tests { .datasets_ensure([dataset.clone()].into_iter().collect()) .await .expect("task should not fail"); - assert_eq!(result.0.len(), 1); + assert_eq!(result.len(), 1); assert_matches!( result - .0 .get(&dataset.id) .expect("result contains entry for dataset") - .state, - DatasetState::Ensured + .result, + Ok(()) ); // Calling "datasets_ensure" with the same input should succeed. @@ -2275,14 +2188,13 @@ mod illumos_tests { .datasets_ensure([dataset.clone()].into_iter().collect()) .await .expect("task should not fail"); - assert_eq!(result.0.len(), 1); + assert_eq!(result.len(), 1); assert_matches!( result - .0 .get(&dataset.id) .expect("result contains entry for dataset") - .state, - DatasetState::Ensured + .result, + Ok(()) ); harness.cleanup().await; @@ -2334,14 +2246,13 @@ mod illumos_tests { .datasets_ensure([dataset.clone()].into_iter().collect()) .await .expect("task should not fail"); - assert_eq!(result.0.len(), 1); + assert_eq!(result.len(), 1); assert_matches!( result - .0 .get(&dataset.id) .expect("result contains entry for dataset") - .state, - DatasetState::Ensured + .result, + Ok(()) ); // Creating the dataset should have mounted it @@ -2356,14 +2267,13 @@ mod illumos_tests { .datasets_ensure([dataset.clone()].into_iter().collect()) .await .expect("task should not fail"); - assert_eq!(result.0.len(), 1); + assert_eq!(result.len(), 1); assert_matches!( result - .0 .get(&dataset.id) .expect("result contains entry for dataset") - .state, - DatasetState::Ensured + .result, + Ok(()) ); // ... and doing so mounts the dataset again. @@ -2418,9 +2328,9 @@ mod illumos_tests { .datasets_ensure(dataset_configs.clone()) .await .expect("task should not fail"); - assert_eq!(result.0.len(), dataset_configs.len()); - for result in &result.0 { - assert_matches!(result.state, DatasetState::Ensured); + assert_eq!(result.len(), dataset_configs.len()); + for result in &result { + assert_matches!(result.result, Ok(())); } // Creating the dataset should have mounted it @@ -2496,9 +2406,9 @@ mod illumos_tests { .datasets_ensure(datasets.clone()) .await .expect("task should not fail"); - assert_eq!(result.0.len(), datasets.len()); - for result in &result.0 { - assert_matches!(result.state, DatasetState::Ensured); + assert_eq!(result.len(), datasets.len()); + for result in &result { + assert_matches!(result.result, Ok(())); } // Calling "datasets_ensure" with the same input should succeed. @@ -2506,9 +2416,9 @@ mod illumos_tests { .datasets_ensure(datasets.clone()) .await .expect("task should not fail"); - assert_eq!(result.0.len(), datasets.len()); - for result in &result.0 { - assert_matches!(result.state, DatasetState::Ensured); + assert_eq!(result.len(), datasets.len()); + for result in &result { + assert_matches!(result.result, Ok(())); } harness.cleanup().await; @@ -2535,14 +2445,13 @@ mod illumos_tests { .datasets_ensure([debug_dataset.clone()].into_iter().collect()) .await .expect("task should not fail"); - assert_eq!(result.0.len(), 1); + assert_eq!(result.len(), 1); assert_matches!( result - .0 .get(&debug_dataset.id) .expect("result contains entry for dataset") - .state, - DatasetState::Ensured + .result, + Ok(()) ); // Start querying the state of nested datasets. diff --git a/sled-agent/config-reconciler/src/handle.rs b/sled-agent/config-reconciler/src/handle.rs index 7a3ae1ac0e8..e70fd7fab59 100644 --- a/sled-agent/config-reconciler/src/handle.rs +++ b/sled-agent/config-reconciler/src/handle.rs @@ -12,7 +12,6 @@ use nexus_sled_agent_shared::inventory::InventoryDisk; use nexus_sled_agent_shared::inventory::InventoryZpool; use nexus_sled_agent_shared::inventory::OmicronSledConfig; use omicron_common::disk::DatasetName; -use omicron_common::disk::DiskIdentity; use sled_agent_api::ArtifactConfig; use sled_storage::config::MountConfig; use sled_storage::disk::Disk; @@ -37,6 +36,7 @@ use sled_storage::dataset::U2_DEBUG_DATASET; use sled_storage::dataset::ZONE_DATASET; use crate::DatasetTaskError; +use crate::InternalDisksWithBootDisk; use crate::LedgerArtifactConfigError; use crate::LedgerNewConfigError; use crate::LedgerTaskError; @@ -277,7 +277,7 @@ impl ConfigReconcilerHandle { } /// Wait for the internal disks task to start managing the boot disk. - pub async fn wait_for_boot_disk(&mut self) -> Arc { + pub async fn wait_for_boot_disk(&mut self) -> InternalDisksWithBootDisk { self.internal_disks_rx.wait_for_boot_disk().await } @@ -343,16 +343,16 @@ impl ConfigReconcilerHandle { .await } - /// Collect inventory fields relevant to config reconciliation. - pub async fn inventory( + /// Return the currently-ledgered [`OmicronSledConfig`]. + /// + /// # Errors + /// + /// Fails if `spawn_reconciliation_task()` has not yet been called or if we + /// have not yet checked the internal disks for a ledgered config. + pub fn ledgered_sled_config( &self, - log: &Logger, - ) -> Result { - let ledgered_sled_config = match self - .ledger_task - .get() - .map(LedgerTaskHandle::current_config) - { + ) -> Result, InventoryError> { + match self.ledger_task.get().map(LedgerTaskHandle::current_config) { // If we haven't yet spawned the ledger task, or we have but // it's still waiting on disks, we don't know whether we have a // ledgered sled config. It's not reasonable to report `None` in @@ -363,12 +363,19 @@ impl ConfigReconcilerHandle { // for the boot disk and spawn the reconciler task before starting // the dropshot server that allows Nexus to collect inventory. None | Some(CurrentSledConfig::WaitingForInternalDisks) => { - return Err(InventoryError::LedgerContentsNotAvailable); + Err(InventoryError::LedgerContentsNotAvailable) } - Some(CurrentSledConfig::WaitingForInitialConfig) => None, - Some(CurrentSledConfig::Ledgered(config)) => Some(config), - }; + Some(CurrentSledConfig::WaitingForInitialConfig) => Ok(None), + Some(CurrentSledConfig::Ledgered(config)) => Ok(Some(config)), + } + } + /// Collect inventory fields relevant to config reconciliation. + pub async fn inventory( + &self, + log: &Logger, + ) -> Result { + let ledgered_sled_config = self.ledgered_sled_config()?; let zpools = self.currently_managed_zpools_rx.to_inventory(log).await; let datasets = self @@ -468,7 +475,7 @@ impl AvailableDatasetsReceiver { AvailableDatasetsReceiverInner::FakeStatic(pools) => pools .iter() .map(|(pool, path)| PathInPool { - pool: ZpoolOrRamdisk::Zpool(pool.clone()), + pool: ZpoolOrRamdisk::Zpool(*pool), path: path.join(U2_DEBUG_DATASET), }) .collect(), @@ -491,7 +498,7 @@ impl AvailableDatasetsReceiver { AvailableDatasetsReceiverInner::FakeStatic(pools) => pools .iter() .map(|(pool, path)| PathInPool { - pool: ZpoolOrRamdisk::Zpool(pool.clone()), + pool: ZpoolOrRamdisk::Zpool(*pool), path: path.join(ZONE_DATASET), }) .collect(), diff --git a/sled-agent/config-reconciler/src/internal_disks.rs b/sled-agent/config-reconciler/src/internal_disks.rs index e9c05adbdb9..0d0102047d7 100644 --- a/sled-agent/config-reconciler/src/internal_disks.rs +++ b/sled-agent/config-reconciler/src/internal_disks.rs @@ -25,6 +25,7 @@ use sled_hardware::PooledDiskError; use sled_storage::config::MountConfig; use sled_storage::dataset::CLUSTER_DATASET; use sled_storage::dataset::CONFIG_DATASET; +use sled_storage::dataset::INSTALL_DATASET; use sled_storage::dataset::M2_ARTIFACT_DATASET; use sled_storage::dataset::M2_DEBUG_DATASET; use sled_storage::disk::Disk; @@ -38,7 +39,6 @@ use slog_error_chain::InlineErrorChain; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::future::Future; -use std::mem; use std::ops::Deref; use std::sync::Arc; use std::time::Duration; @@ -70,6 +70,8 @@ enum InternalDisksReceiverInner { impl InternalDisksReceiver { /// Create an `InternalDisksReceiver` that always reports a fixed set of /// disks. + /// + /// The first disk is set as the boot disk. #[cfg(any(test, feature = "testing"))] pub fn fake_static( mount_config: Arc, @@ -77,18 +79,27 @@ impl InternalDisksReceiver { ) -> Self { let inner = InternalDisksReceiverInner::FakeStatic(Arc::new( disks - .map(|(identity, zpool_name)| { - InternalDiskDetails::fake_details(identity, zpool_name) + .enumerate() + .map(|(i, (identity, zpool_name))| { + InternalDiskDetails::fake_details( + identity, + zpool_name, + i == 0, + ) }) .collect(), )); - // We never report errors from our static set; move the sender to a task - // that idles so we don't get recv errors. + // We never report errors from our static set. If there's a Tokio + // runtime, move the sender to a task that idles so we don't get recv + // errors; otherwise, don't bother because no one can await changes on + // `errors_rx` anyway (e.g., if we're being used from a non-tokio test). let (errors_tx, errors_rx) = watch::channel(Arc::default()); - tokio::spawn(async move { - errors_tx.closed().await; - }); + if tokio::runtime::Handle::try_current().is_ok() { + tokio::spawn(async move { + errors_tx.closed().await; + }); + } Self { mount_config, inner, errors_rx } } @@ -109,7 +120,7 @@ impl InternalDisksReceiver { .clone() .into_iter() .map(|(identity, zpool_name)| { - InternalDiskDetails::fake_details(identity, zpool_name) + InternalDiskDetails::fake_details(identity, zpool_name, false) }) .collect(); let (mapped_tx, mapped_rx) = watch::channel(Arc::new(current)); @@ -122,7 +133,9 @@ impl InternalDisksReceiver { .clone() .into_iter() .map(|(identity, zpool_name)| { - InternalDiskDetails::fake_details(identity, zpool_name) + InternalDiskDetails::fake_details( + identity, zpool_name, false, + ) }) .collect(); if mapped_tx.send(Arc::new(remapped)).is_err() { @@ -206,6 +219,20 @@ impl InternalDisksReceiver { InternalDisks { disks, mount_config: Arc::clone(&self.mount_config) } } + /// Get an [`InternalDisksWithBootDisk`], panicking if we have no boot + /// disk. + /// + /// This method is only available to tests; production code should instead + /// use `current()` and/or `wait_for_boot_disk()`. + #[cfg(any(test, feature = "testing"))] + pub fn current_with_boot_disk(&self) -> InternalDisksWithBootDisk { + let disks = self.current(); + InternalDisksWithBootDisk::new(disks).expect( + "current_with_boot_disk() should be called by \ + tests that set up a fake boot disk", + ) + } + /// Get the current set of managed internal disks and mark the returned /// value as seen. /// @@ -264,40 +291,17 @@ impl InternalDisksReceiver { /// Wait until the boot disk is managed, returning its identity. /// /// Internally updates the most-recently-seen value. - pub(crate) async fn wait_for_boot_disk(&mut self) -> Arc { - match &mut self.inner { - InternalDisksReceiverInner::Real(disks_rx) => loop { - let disks = disks_rx.borrow_and_update(); - if let Some(disk) = disks.iter().find(|d| d.is_boot_disk()) { - return Arc::clone(&disk.identity); - } - mem::drop(disks); - - disks_rx - .changed() - .await - .expect("InternalDisks task never dies"); - }, - #[cfg(any(test, feature = "testing"))] - InternalDisksReceiverInner::FakeStatic(disks) => { - if let Some(disk) = disks.iter().find(|d| d.is_boot_disk()) { - return Arc::clone(&disk.id.identity); - } - panic!("fake InternalDisksReceiver has no boot disk") - } - #[cfg(any(test, feature = "testing"))] - InternalDisksReceiverInner::FakeDynamic(disks_rx) => loop { - let disks = disks_rx.borrow_and_update(); - if let Some(disk) = disks.iter().find(|d| d.is_boot_disk()) { - return Arc::clone(&disk.id.identity); - } - mem::drop(disks); - - disks_rx - .changed() - .await - .expect("InternalDisks task never dies"); - }, + pub(crate) async fn wait_for_boot_disk( + &mut self, + ) -> InternalDisksWithBootDisk { + loop { + let current = self.current_and_update(); + if let Some(with_boot_disk) = + InternalDisksWithBootDisk::new(current) + { + return with_boot_disk; + }; + self.changed().await.expect("InternalDisks task never dies"); } } @@ -321,9 +325,15 @@ impl InternalDisks { &self.mount_config } - pub fn boot_disk_zpool(&self) -> Option<&ZpoolName> { + pub fn boot_disk_zpool(&self) -> Option { self.disks.iter().find_map(|d| { - if d.is_boot_disk() { Some(&d.zpool_name) } else { None } + if d.is_boot_disk() { Some(d.zpool_name) } else { None } + }) + } + + pub fn boot_disk_install_dataset(&self) -> Option { + self.boot_disk_zpool().map(|zpool| { + zpool.dataset_mountpoint(&self.mount_config.root, INSTALL_DATASET) }) } @@ -393,6 +403,58 @@ impl InternalDisks { } } +/// An [`InternalDisks`] with a guaranteed-present boot disk. +pub struct InternalDisksWithBootDisk { + inner: InternalDisks, + boot_disk: InternalDiskDetailsId, +} + +impl InternalDisksWithBootDisk { + fn new(inner: InternalDisks) -> Option { + let boot_disk = inner + .disks + .iter() + .find_map(|d| if d.is_boot_disk() { Some(d.id()) } else { None })?; + Some(Self { inner, boot_disk }) + } + + fn boot_disk(&self) -> &InternalDiskDetails { + match self.inner.disks.get(&self.boot_disk) { + Some(details) => details, + None => unreachable!("boot disk present by construction"), + } + } + + pub fn boot_disk_id(&self) -> &Arc { + &self.boot_disk().id.identity + } + + pub fn boot_disk_zpool(&self) -> ZpoolName { + self.boot_disk().zpool_name + } + + pub fn boot_disk_install_dataset(&self) -> Utf8PathBuf { + self.boot_disk_zpool().dataset_mountpoint( + &self.inner.mount_config().root, + INSTALL_DATASET, + ) + } + + pub fn non_boot_disk_install_datasets( + &self, + ) -> impl Iterator + '_ { + self.inner.disks.iter().filter(|disk| disk.id != self.boot_disk).map( + |disk| { + let dataset = disk.zpool_name.dataset_mountpoint( + &self.inner.mount_config.root, + INSTALL_DATASET, + ); + (disk.zpool_name, dataset) + }, + ) + } +} + // A subset of `Disk` properties. We store this in `InternalDisks` instead of // `Disk`s both to avoid exposing raw `Disk`s outside this crate and to support // easier faking for tests. @@ -452,15 +514,19 @@ impl From<&'_ InternalDisk> for InternalDiskDetails { impl InternalDiskDetails { #[cfg(any(test, feature = "testing"))] - fn fake_details(identity: DiskIdentity, zpool_name: ZpoolName) -> Self { - // We can expand the interface for fake disks if we need to be able to - // specify more of these properties in future tests. + fn fake_details( + identity: DiskIdentity, + zpool_name: ZpoolName, + is_boot_disk: bool, + ) -> Self { Self { id: InternalDiskDetailsId { identity: Arc::new(identity), - is_boot_disk: false, + is_boot_disk, }, zpool_name, + // We can expand the interface for fake disks if we need to be able + // to specify more of these properties in future tests. slot: None, raw_devfs_path: None, } diff --git a/sled-agent/config-reconciler/src/ledger/legacy_configs.rs b/sled-agent/config-reconciler/src/ledger/legacy_configs.rs index 7bd3e9b7c6f..587303397ad 100644 --- a/sled-agent/config-reconciler/src/ledger/legacy_configs.rs +++ b/sled-agent/config-reconciler/src/ledger/legacy_configs.rs @@ -218,6 +218,7 @@ fn merge_old_configs( /// Legacy type of the ledgered zone config. #[derive(Debug, Clone, Deserialize, Serialize)] +#[cfg_attr(test, derive(schemars::JsonSchema))] struct OmicronZonesConfigLocal { omicron_generation: Generation, ledger_generation: Generation, @@ -237,9 +238,11 @@ impl Ledgerable for OmicronZonesConfigLocal { } #[derive(Debug, Clone, Deserialize, Serialize)] +#[cfg_attr(test, derive(schemars::JsonSchema))] struct OmicronZoneConfigLocal { zone: OmicronZoneConfig, #[serde(rename = "root")] + #[cfg_attr(test, schemars(with = "String"))] _root: Utf8PathBuf, } @@ -264,6 +267,15 @@ pub(super) mod tests { const MERGED_CONFIG_PATH: &str = "test-data/expectorate/merged-sled-config.json"; + #[test] + fn test_old_config_schema() { + let schema = schemars::schema_for!(OmicronZonesConfigLocal); + expectorate::assert_contents( + "../../schema/all-zones-requests.json", + &serde_json::to_string_pretty(&schema).unwrap(), + ); + } + #[test] fn test_merge_old_configs() { let disks: OmicronPhysicalDisksConfig = { diff --git a/sled-agent/config-reconciler/src/lib.rs b/sled-agent/config-reconciler/src/lib.rs index eaa1c6f5f12..24a3e7b4489 100644 --- a/sled-agent/config-reconciler/src/lib.rs +++ b/sled-agent/config-reconciler/src/lib.rs @@ -76,6 +76,7 @@ pub use handle::ReconcilerInventory; pub use handle::TimeSyncConfig; pub use internal_disks::InternalDisks; pub use internal_disks::InternalDisksReceiver; +pub use internal_disks::InternalDisksWithBootDisk; pub use ledger::LedgerArtifactConfigError; pub use ledger::LedgerNewConfigError; pub use ledger::LedgerTaskError; diff --git a/sled-agent/config-reconciler/src/reconciler_task.rs b/sled-agent/config-reconciler/src/reconciler_task.rs index e09c586e142..95c782b47a7 100644 --- a/sled-agent/config-reconciler/src/reconciler_task.rs +++ b/sled-agent/config-reconciler/src/reconciler_task.rs @@ -9,19 +9,23 @@ use chrono::Utc; use either::Either; use futures::future; use illumos_utils::zpool::PathInPool; +use illumos_utils::zpool::ZpoolOrRamdisk; use key_manager::StorageKeyRequester; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventory; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryResult; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus; use nexus_sled_agent_shared::inventory::OmicronSledConfig; +use omicron_common::disk::DatasetKind; +use omicron_uuid_kinds::DatasetUuid; use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::PhysicalDiskUuid; use sled_storage::config::MountConfig; +use sled_storage::dataset::U2_DEBUG_DATASET; +use sled_storage::dataset::ZONE_DATASET; use sled_storage::disk::Disk; use slog::Logger; use slog::info; use slog::warn; -use slog_error_chain::InlineErrorChain; use std::collections::BTreeMap; use std::collections::HashSet; use std::sync::Arc; @@ -30,15 +34,16 @@ use std::time::Instant; use tokio::sync::watch; use crate::TimeSyncConfig; -use crate::dataset_serialization_task::DatasetEnsureResult; use crate::dataset_serialization_task::DatasetTaskHandle; use crate::ledger::CurrentSledConfig; use crate::raw_disks::RawDisksReceiver; use crate::sled_agent_facilities::SledAgentFacilities; +mod datasets; mod external_disks; mod zones; +use self::datasets::OmicronDatasets; use self::external_disks::ExternalDisks; use self::zones::OmicronZones; @@ -66,17 +71,18 @@ pub(crate) fn spawn( currently_managed_zpools_tx, external_disks_tx, ); + let datasets = OmicronDatasets::new(dataset_task); let zones = OmicronZones::new(mount_config, time_sync_config); tokio::spawn( ReconcilerTask { key_requester, - dataset_task, current_config_rx, reconciler_result_tx, raw_disks_rx, external_disks, + datasets, zones, log, } @@ -115,8 +121,7 @@ impl ReconcilerResult { }; Either::Right( latest_result - .datasets - .all_mounted_debug_datasets(&self.mount_config), + .all_mounted_datasets(&self.mount_config, DatasetKind::Debug), ) } @@ -126,11 +131,10 @@ impl ReconcilerResult { let Some(latest_result) = &self.latest_result else { return Either::Left(std::iter::empty()); }; - Either::Right( - latest_result - .datasets - .all_mounted_zone_root_datasets(&self.mount_config), - ) + Either::Right(latest_result.all_mounted_datasets( + &self.mount_config, + DatasetKind::TransientZoneRoot, + )) } pub(crate) fn to_inventory( @@ -192,7 +196,7 @@ struct LatestReconciliationResult { sled_config: OmicronSledConfig, external_disks_inventory: BTreeMap, - datasets: DatasetEnsureResult, + datasets: BTreeMap, zones_inventory: BTreeMap, timesync_status: TimeSyncStatus, } @@ -202,19 +206,54 @@ impl LatestReconciliationResult { ConfigReconcilerInventory { last_reconciled_config: self.sled_config.clone(), external_disks: self.external_disks_inventory.clone(), - datasets: self.datasets.to_inventory(), + datasets: self.datasets.clone(), zones: self.zones_inventory.clone(), } } + + fn all_mounted_datasets<'a>( + &'a self, + mount_config: &'a MountConfig, + kind: DatasetKind, + ) -> impl Iterator + 'a { + // This is a private method only called by this file; we only have to + // handle the specific `DatasetKind`s used by our callers. + let mountpoint = match &kind { + DatasetKind::Debug => U2_DEBUG_DATASET, + DatasetKind::TransientZoneRoot => ZONE_DATASET, + _ => unreachable!( + "private function called with unexpected kind {kind:?}" + ), + }; + self.datasets + .iter() + // Filter down to successfully-ensured datasets + .filter_map(|(dataset_id, result)| match result { + ConfigReconcilerInventoryResult::Ok => { + self.sled_config.datasets.get(dataset_id) + } + ConfigReconcilerInventoryResult::Err { .. } => None, + }) + // Filter down to matching dataset kinds + .filter(move |config| *config.name.kind() == kind) + .map(|config| { + let pool = *config.name.pool(); + PathInPool { + pool: ZpoolOrRamdisk::Zpool(pool), + path: pool + .dataset_mountpoint(&mount_config.root, mountpoint), + } + }) + } } struct ReconcilerTask { key_requester: StorageKeyRequester, - dataset_task: DatasetTaskHandle, current_config_rx: watch::Receiver, reconciler_result_tx: watch::Sender, raw_disks_rx: RawDisksReceiver, external_disks: ExternalDisks, + datasets: OmicronDatasets, zones: OmicronZones, log: Logger, } @@ -369,10 +408,12 @@ impl ReconcilerTask { ) .await; - // Next, delete datasets that need to be deleted. + // Next, remove datasets we have but that aren't present in the config. // - // TODO We don't do this yet: + // Note: this doesn't actually delete them yet! // https://github.com/oxidecomputer/omicron/issues/6177 + self.datasets + .remove_datasets_if_needed(&sled_config.datasets, &self.log); // Finally, remove any external disks we're no longer supposed to use // (either due to config changes or the raw disk being gone). @@ -398,30 +439,9 @@ impl ReconcilerTask { .await; // Ensure all the datasets we want exist. - let datasets = match self - .dataset_task - .datasets_ensure(sled_config.datasets.clone()) - .await - { - Ok(result) => result, - Err(err) => { - warn!( - self.log, "failed to contact dataset task"; - InlineErrorChain::new(&err), - ); - // If we can't contact the dataset task, reuse the result from - // our previous attempt. This should still be correct (until we - // start deleting datasets, at which point we'll need a more - // holistic tracker for dataset status like we already have for - // disks and zones). - self.reconciler_result_tx - .borrow() - .latest_result - .as_ref() - .map(|inner| inner.datasets.clone()) - .unwrap_or_else(DatasetEnsureResult::default) - } - }; + self.datasets + .ensure_datasets_if_needed(sled_config.datasets.clone(), &self.log) + .await; // Collect the current timesync status (needed to start any new zones, // and also we want to report it as part of each reconciler result). @@ -438,14 +458,12 @@ impl ReconcilerTask { // the old instance. match zone_shutdown_result { Ok(()) => { - let currently_managed_zpools = - self.external_disks.currently_managed_zpools(); self.zones .start_zones_if_needed( &sled_config.zones, sled_agent_facilities, timesync_status.is_synchronized(), - ¤tly_managed_zpools, + &self.datasets, &self.log, ) .await; @@ -465,7 +483,7 @@ impl ReconcilerTask { let result = if !timesync_status.is_synchronized() || self.external_disks.has_retryable_error() || self.zones.has_retryable_error() - || datasets.has_retryable_error() + || self.datasets.has_retryable_error() { ReconciliationResult::ShouldRetry } else { @@ -475,7 +493,7 @@ impl ReconcilerTask { let inner = LatestReconciliationResult { sled_config, external_disks_inventory: self.external_disks.to_inventory(), - datasets, + datasets: self.datasets.to_inventory(), zones_inventory: self.zones.to_inventory(), timesync_status, }; diff --git a/sled-agent/config-reconciler/src/reconciler_task/datasets.rs b/sled-agent/config-reconciler/src/reconciler_task/datasets.rs new file mode 100644 index 00000000000..5232d67a1af --- /dev/null +++ b/sled-agent/config-reconciler/src/reconciler_task/datasets.rs @@ -0,0 +1,247 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Module for Omicron datasets. +//! +//! This module does not spawn a separate tokio task: our parent reconciler task +//! owns an [`OmicronDatasets`] and is able to mutate it in place during +//! reconciliation. However, we do need a [`DatasetTaskHandle`] to perform some +//! operations. This handle is shared with other "needs to perform dataset +//! operations" consumers (e.g., inventory requests perform operations to check +//! the live state of datasets directly from ZFS). + +use crate::dataset_serialization_task::DatasetEnsureError; +use crate::dataset_serialization_task::DatasetEnsureResult; +use crate::dataset_serialization_task::DatasetTaskHandle; +use id_map::IdMap; +use id_map::IdMappable; +use illumos_utils::zpool::PathInPool; +use illumos_utils::zpool::ZpoolOrRamdisk; +use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryResult; +use nexus_sled_agent_shared::inventory::OmicronZoneConfig; +use omicron_common::disk::DatasetConfig; +use omicron_common::disk::DatasetKind; +use omicron_common::disk::DatasetName; +use omicron_uuid_kinds::DatasetUuid; +use sled_storage::config::MountConfig; +use sled_storage::dataset::ZONE_DATASET; +use slog::Logger; +use slog::warn; +use slog_error_chain::InlineErrorChain; +use std::collections::BTreeMap; +use std::sync::Arc; + +#[derive(Debug, thiserror::Error)] +pub(super) enum ZoneDatasetDependencyError { + #[error("zone config is missing a filesystem pool")] + MissingFilesystemPool, + #[error( + "zone's transient root dataset is not available: {}", .0.full_name(), + )] + TransientZoneDatasetNotAvailable(DatasetName), + #[error("zone's durable dataset is not available: {}", .0.full_name())] + DurableDatasetNotAvailable(DatasetName), +} + +#[derive(Debug)] +pub(super) struct OmicronDatasets { + datasets: IdMap, + dataset_task: DatasetTaskHandle, +} + +impl OmicronDatasets { + #[cfg(any(test, feature = "testing"))] + pub(super) fn with_datasets(datasets: I) -> Self + where + I: Iterator)>, + { + let dataset_task = DatasetTaskHandle::spawn_noop(); + let datasets = datasets + .map(|(config, result)| OmicronDataset { + config, + state: match result { + Ok(()) => DatasetState::Ensured, + Err(err) => DatasetState::FailedToEnsure(Arc::new(err)), + }, + }) + .collect(); + Self { datasets, dataset_task } + } + + pub(super) fn new(dataset_task: DatasetTaskHandle) -> Self { + Self { datasets: IdMap::default(), dataset_task } + } + + /// Confirm that any dataset dependencies of `zone` have been ensured + /// successfully, returning the path for the zone's filesystem root. + pub(super) fn validate_zone_storage( + &self, + zone: &OmicronZoneConfig, + mount_config: &MountConfig, + ) -> Result { + // Confirm that there's an ensured `TransientZoneRoot` dataset on this + // zone's filesystem pool. + let Some(filesystem_pool) = zone.filesystem_pool else { + // This should never happen: Reconfigurator guarantees all zones + // have filesystem pools. `filesystem_pool` is non-optional in + // blueprints; we should make it non-optional in `OmicronZoneConfig` + // too: https://github.com/oxidecomputer/omicron/issues/8216 + return Err(ZoneDatasetDependencyError::MissingFilesystemPool); + }; + + let transient_dataset_name = DatasetName::new( + filesystem_pool, + DatasetKind::TransientZone { name: zone.zone_name() }, + ); + + // TODO-cleanup It would be nicer if the zone included the filesystem + // dataset ID, so we could just do a lookup here instead of searching. + // https://github.com/oxidecomputer/omicron/issues/7214 + if !self.datasets.iter().any(|d| { + matches!(d.state, DatasetState::Ensured) + && d.config.name == transient_dataset_name + }) { + return Err( + ZoneDatasetDependencyError::TransientZoneDatasetNotAvailable( + transient_dataset_name, + ), + ); + } + + let zone_root_path = PathInPool { + pool: ZpoolOrRamdisk::Zpool(filesystem_pool), + // TODO-cleanup Should we get this path from the dataset we found + // above? + path: filesystem_pool + .dataset_mountpoint(&mount_config.root, ZONE_DATASET), + }; + + // Confirm that the durable dataset for this zone has been ensured, if + // it has one. + let Some(durable_dataset) = zone.dataset_name() else { + return Ok(zone_root_path); + }; + + // TODO-cleanup As above, if we had an ID we could look up instead of + // searching. + if !self.datasets.iter().any(|d| { + matches!(d.state, DatasetState::Ensured) + && d.config.name == durable_dataset + }) { + return Err( + ZoneDatasetDependencyError::DurableDatasetNotAvailable( + durable_dataset, + ), + ); + } + + Ok(zone_root_path) + } + + pub(super) fn remove_datasets_if_needed( + &mut self, + datasets: &IdMap, + log: &Logger, + ) { + let mut datasets_to_remove = Vec::new(); + + for dataset in &self.datasets { + if !datasets.contains_key(&dataset.config.id) { + datasets_to_remove.push(dataset.config.id); + } + } + + for dataset_id in datasets_to_remove { + // TODO We should delete these datasets! (We should also delete any + // on-disk Omicron datasets that aren't present in `config`). + // + // https://github.com/oxidecomputer/omicron/issues/6177 + let dataset = self.datasets.remove(&dataset_id).expect( + "datasets_to_remove only has existing datasets by construction", + ); + warn!( + log, "leaking ZFS dataset (should be deleted: omicron#6177)"; + "id" => %dataset_id, + "name" => dataset.config.name.full_name(), + ) + } + } + + pub(super) async fn ensure_datasets_if_needed( + &mut self, + datasets: IdMap, + log: &Logger, + ) { + let results = match self.dataset_task.datasets_ensure(datasets).await { + Ok(results) => results, + Err(err) => { + // If we can't contact the dataset task, we leave + // `self.datasets` untouched (i.e., reuse whatever datasets we + // have from the last time we successfully contacted the dataset + // task). + warn!( + log, "failed to contact dataset task"; + InlineErrorChain::new(&err), + ); + return; + } + }; + + for DatasetEnsureResult { config, result } in results { + let state = match result { + Ok(()) => DatasetState::Ensured, + Err(err) => DatasetState::FailedToEnsure(err), + }; + self.datasets.insert(OmicronDataset { config, state }); + } + } + + pub(super) fn has_retryable_error(&self) -> bool { + self.datasets.iter().any(|d| match &d.state { + DatasetState::Ensured => false, + DatasetState::FailedToEnsure(err) => err.is_retryable(), + }) + } + + pub(crate) fn to_inventory( + &self, + ) -> BTreeMap { + self.datasets + .iter() + .map(|dataset| { + let result = match &dataset.state { + DatasetState::Ensured => { + ConfigReconcilerInventoryResult::Ok + } + DatasetState::FailedToEnsure(err) => { + ConfigReconcilerInventoryResult::Err { + message: InlineErrorChain::new(err).to_string(), + } + } + }; + (dataset.config.id, result) + }) + .collect() + } +} + +#[derive(Debug)] +struct OmicronDataset { + config: DatasetConfig, + state: DatasetState, +} + +impl IdMappable for OmicronDataset { + type Id = DatasetUuid; + + fn id(&self) -> Self::Id { + self.config.id + } +} + +#[derive(Debug)] +enum DatasetState { + Ensured, + FailedToEnsure(Arc), +} diff --git a/sled-agent/config-reconciler/src/reconciler_task/zones.rs b/sled-agent/config-reconciler/src/reconciler_task/zones.rs index 0bf14582d76..e0a6e370445 100644 --- a/sled-agent/config-reconciler/src/reconciler_task/zones.rs +++ b/sled-agent/config-reconciler/src/reconciler_task/zones.rs @@ -32,6 +32,7 @@ use slog::Logger; use slog::info; use slog::warn; use slog_error_chain::InlineErrorChain; +use std::borrow::Cow; use std::collections::BTreeMap; use std::net::IpAddr; use std::net::Ipv6Addr; @@ -39,7 +40,8 @@ use std::num::NonZeroUsize; use std::str::FromStr as _; use std::sync::Arc; -use super::CurrentlyManagedZpools; +use super::OmicronDatasets; +use super::datasets::ZoneDatasetDependencyError; #[derive(Debug, Clone)] pub enum TimeSyncStatus { @@ -246,7 +248,30 @@ impl OmicronZones { desired_zones: &IdMap, sled_agent_facilities: &T, is_time_synchronized: bool, - all_u2_pools: &CurrentlyManagedZpools, + datasets: &OmicronDatasets, + log: &Logger, + ) { + self.start_zones_if_needed_impl( + desired_zones, + sled_agent_facilities, + &RealZoneFacilities, + is_time_synchronized, + datasets, + log, + ) + .await + } + + async fn start_zones_if_needed_impl< + T: SledAgentFacilities, + U: ZoneFacilities, + >( + &mut self, + desired_zones: &IdMap, + sled_agent_facilities: &T, + zone_facilities: &U, + is_time_synchronized: bool, + datasets: &OmicronDatasets, log: &Logger, ) { // Filter desired zones down to just those that we need to start. See @@ -293,34 +318,132 @@ impl OmicronZones { }); // Build up the futures for starting each zone. - let all_u2_pools = all_u2_pools.clone().into_vec(); let start_futures = zones_to_start.map(|zone| { - sled_agent_facilities - .start_omicron_zone( - zone, - &self.mount_config, - is_time_synchronized, - &all_u2_pools, - ) - .map(move |result| { - ( - zone.clone(), - result.map_err(ZoneStartError::SledAgentStartFailed), - ) - }) + self.start_single_zone( + zone, + sled_agent_facilities, + zone_facilities, + is_time_synchronized, + datasets, + log, + ) + .map(move |result| (zone.clone(), result)) }); // Concurrently start all zones, then record the results. let start_results = future::join_all(start_futures).await; for (config, result) in start_results { let state = match result { - Ok(running_zone) => ZoneState::Running(Arc::new(running_zone)), + Ok(state) => state, Err(err) => ZoneState::FailedToStart(err), }; self.zones.insert(OmicronZone { config, state }); } } + async fn start_single_zone( + &self, + zone: &OmicronZoneConfig, + sled_agent_facilities: &T, + zone_facilities: &U, + is_time_synchronized: bool, + datasets: &OmicronDatasets, + log: &Logger, + ) -> Result { + // Ensure no zone by this name exists. This should only happen in the + // event of a sled-agent restart, in which case all the zones the + // previous sled-agent process had started are still running. + if let Some(state) = self + .ensure_removed_before_starting( + zone, + sled_agent_facilities, + zone_facilities, + log, + ) + .await? + { + return Ok(state); + } + + // Ensure that time is sync'd, if needed by this zone. + if zone.zone_type.requires_timesync() && !is_time_synchronized { + return Err(ZoneStartError::TimeNotSynchronized); + } + + // Ensure all dataset dependencies of this zone are okay. + let zone_root_path = + datasets.validate_zone_storage(zone, &self.mount_config)?; + + // The zone is not running - start it. + match sled_agent_facilities + .start_omicron_zone(zone, zone_root_path) + .await + { + Ok(running_zone) => Ok(ZoneState::Running(Arc::new(running_zone))), + Err(err) => Err(ZoneStartError::SledAgentStartFailed(err)), + } + } + + // The return type of this function is strange. The possible values are: + // + // * `Ok(None)` - the zone is not running + // * `Err(_)` - we had an error related to zone startup + // * `Ok(Some(state))` - the zone is still running and is in some state that + // we need to do more work to handle (e.g., we found a running zone but + // failed to shut it down cleanly, in which case we'll return + // `Ok(Some(ZoneState::PartiallyShutDown { .. }))`). In this case, our + // caller should do no further work to try to start `zone`, and should + // instead bubble the `state` up to be recorded. + async fn ensure_removed_before_starting< + T: SledAgentFacilities, + U: ZoneFacilities, + >( + &self, + zone: &OmicronZoneConfig, + sled_agent_facilities: &T, + zone_facilities: &U, + log: &Logger, + ) -> Result, ZoneStartError> { + let zone_name = ZoneName::new(zone); + + // If no zone by this name exists, there's nothing to remove. + if !zone_facilities.zone_with_name_exists(&zone_name).await? { + return Ok(None); + } + + // NOTE: We might want to tell the sled-agent's metrics task to stop + // tracking any links in this zone. However, we don't have very easy + // access to them, without running a command in the zone. These links + // are about to be deleted, and the metrics task will expire them after + // a while anyway, but it might be worth the trouble to do that in the + // future. + // + // Skipping that for now, follow the normal zone shutdown process + // _after_ metrics (i.e., shut down and clean up the zone). + // + // TODO-correctness There's a (very unlikely?) chance that this cleanup + // isn't right: if the running zone (which we have no active knowledge + // of) was started with a different `OmicronZoneConfig`, the cleanup + // steps we do here might not be right. + match resume_shutdown_from_stop( + zone, + sled_agent_facilities, + zone_facilities, + &zone_name, + log, + ) + .await + { + Ok(()) => Ok(None), + Err((state, err)) => { + // We didn't fail to _start_ the zone, so it doesn't make sense + // to return a `ZoneStartError`, but the zone is in a state that + // we need to remember. + Ok(Some(ZoneState::PartiallyShutDown { state, err })) + } + } + } + /// Check the timesync status from a running NTP zone (if it exists) pub(super) async fn check_timesync(&self) -> TimeSyncStatus { match &self.timesync_config { @@ -486,7 +609,8 @@ impl OmicronZone { state: PartiallyShutDownState::FailedToStop(running_zone), .. } => { - self.resume_shutdown_from_stop( + resume_shutdown_from_stop( + &self.config, sled_agent_facilities, zone_facilities, running_zone, @@ -498,14 +622,24 @@ impl OmicronZone { state: PartiallyShutDownState::FailedToCleanUp, .. } => { - self.resume_shutdown_from_cleanup( + resume_shutdown_from_cleanup( + &self.config, sled_agent_facilities, zone_facilities, &log, ) .await } - ZoneState::FailedToStart(_) => { + // With these errors, we never even tried to start the zone, so + // there's no cleanup required: we can just return. + ZoneState::FailedToStart(ZoneStartError::TimeNotSynchronized) + | ZoneState::FailedToStart(ZoneStartError::CheckZoneExists(_)) + | ZoneState::FailedToStart(ZoneStartError::DatasetDependency(_)) => { + Ok(()) + } + ZoneState::FailedToStart(ZoneStartError::SledAgentStartFailed( + err, + )) => { // TODO-correctness What do we need to do to try to shut down a // zone that we tried to start? We need fine-grained status of // what startup things succeeded that need to be cleaned up. For @@ -514,7 +648,8 @@ impl OmicronZone { log, "need to shut down zone that failed to start, but this \ is currently unimplemented: assuming no cleanup work \ - required" + required"; + "start-err" => InlineErrorChain::new(err.as_ref()), ); Ok(()) } @@ -548,83 +683,83 @@ impl OmicronZone { )); } - self.resume_shutdown_from_stop( + resume_shutdown_from_stop( + &self.config, sled_agent_facilities, zone_facilities, - running_zone, + &ZoneName::from(running_zone.name()), log, ) .await } +} - async fn resume_shutdown_from_stop< - T: SledAgentFacilities, - U: ZoneFacilities, - >( - &self, - sled_agent_facilities: &T, - zone_facilities: &U, - running_zone: &Arc, - log: &Logger, - ) -> Result<(), (PartiallyShutDownState, ZoneShutdownError)> { - if let Err(err) = zone_facilities.halt_zone(running_zone, log).await { +async fn resume_shutdown_from_stop< + T: SledAgentFacilities, + U: ZoneFacilities, +>( + config: &OmicronZoneConfig, + sled_agent_facilities: &T, + zone_facilities: &U, + zone_name: &ZoneName<'_>, + log: &Logger, +) -> Result<(), (PartiallyShutDownState, ZoneShutdownError)> { + if let Err(err) = zone_facilities.halt_zone(zone_name, log).await { + warn!( + log, + "Failed to stop running zone"; + InlineErrorChain::new(&err), + ); + return Err(( + PartiallyShutDownState::FailedToStop(zone_name.to_static()), + err, + )); + } + + resume_shutdown_from_cleanup( + config, + sled_agent_facilities, + zone_facilities, + log, + ) + .await +} + +async fn resume_shutdown_from_cleanup< + T: SledAgentFacilities, + U: ZoneFacilities, +>( + config: &OmicronZoneConfig, + sled_agent_facilities: &T, + zone_facilities: &U, + log: &Logger, +) -> Result<(), (PartiallyShutDownState, ZoneShutdownError)> { + // Special teardown for internal DNS zones: delete the global zone + // address we created for it, and tell DDM to stop advertising the + // prefix of that address. + if let OmicronZoneType::InternalDns { + gz_address, gz_address_index, .. + } = &config.zone_type + { + let addrobj = AddrObject::new( + &sled_agent_facilities.underlay_vnic().0, + &internal_dns_addrobj_name(*gz_address_index), + ) + .expect("internal DNS address object name is well-formed"); + if let Err(err) = zone_facilities.delete_gz_address(addrobj).await { warn!( log, - "Failed to stop running zone"; + "Failed to delete internal-dns gz address"; InlineErrorChain::new(&err), ); - return Err(( - PartiallyShutDownState::FailedToStop(Arc::clone(running_zone)), - err, - )); + return Err((PartiallyShutDownState::FailedToCleanUp, err)); } - self.resume_shutdown_from_cleanup( - sled_agent_facilities, - zone_facilities, - log, - ) - .await + sled_agent_facilities + .ddm_remove_internal_dns_prefix(Ipv6Subnet::new(*gz_address)); } - async fn resume_shutdown_from_cleanup< - T: SledAgentFacilities, - U: ZoneFacilities, - >( - &self, - sled_agent_facilities: &T, - zone_facilities: &U, - log: &Logger, - ) -> Result<(), (PartiallyShutDownState, ZoneShutdownError)> { - // Special teardown for internal DNS zones: delete the global zone - // address we created for it, and tell DDM to stop advertising the - // prefix of that address. - if let OmicronZoneType::InternalDns { - gz_address, - gz_address_index, - .. - } = &self.config.zone_type - { - let addrobj = AddrObject::new( - &sled_agent_facilities.underlay_vnic().0, - &internal_dns_addrobj_name(*gz_address_index), - ) - .expect("internal DNS address object name is well-formed"); - if let Err(err) = zone_facilities.delete_gz_address(addrobj).await { - warn!( - log, - "Failed to delete internal-dns gz address"; - InlineErrorChain::new(&err), - ); - return Err((PartiallyShutDownState::FailedToCleanUp, err)); - } - - sled_agent_facilities - .ddm_remove_internal_dns_prefix(Ipv6Subnet::new(*gz_address)); - } - - Ok(()) - } + Ok(()) } fn internal_dns_addrobj_name(gz_address_index: u32) -> String { @@ -688,19 +823,64 @@ enum ZoneState { FailedToStart(ZoneStartError), } +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +struct ZoneName<'a>(Cow<'a, str>); + +impl<'a> From<&'a str> for ZoneName<'a> { + fn from(value: &'a str) -> Self { + Self(Cow::Borrowed(value)) + } +} + +impl From for ZoneName<'static> { + fn from(value: String) -> Self { + Self(Cow::Owned(value)) + } +} + +impl ZoneName<'_> { + fn new(config: &OmicronZoneConfig) -> Self { + Self(Cow::Owned(config.zone_name())) + } + + fn to_string(&self) -> String { + self.0.clone().into_owned() + } + + fn to_static(&self) -> ZoneName<'static> { + ZoneName(Cow::Owned(self.0.clone().into_owned())) + } +} + #[derive(Debug)] enum PartiallyShutDownState { FailedToUntrackMetrics(Arc), - FailedToStop(Arc), + FailedToStop(ZoneName<'static>), FailedToCleanUp, } #[derive(Debug, thiserror::Error)] enum ZoneStartError { + #[error("could not determine whether zone already exists")] + CheckZoneExists(#[from] CheckZoneExistsError), + #[error("Time not yet synchronized")] + TimeNotSynchronized, + #[error(transparent)] + DatasetDependency(#[from] ZoneDatasetDependencyError), #[error("sled agent failed to start service")] SledAgentStartFailed(#[source] anyhow::Error), } +#[derive(Debug, thiserror::Error)] +enum CheckZoneExistsError { + #[error("failed to find zone {name}")] + FindByName { + name: String, + #[source] + err: AdmError, + }, +} + #[derive(Debug, thiserror::Error)] enum ZoneShutdownError { #[error("failed to untrack metrics")] @@ -712,9 +892,14 @@ enum ZoneShutdownError { } trait ZoneFacilities { + async fn zone_with_name_exists( + &self, + name: &ZoneName<'_>, + ) -> Result; + async fn halt_zone( &self, - zone: &RunningZone, + zone: &ZoneName, log: &Logger, ) -> Result<(), ZoneShutdownError>; @@ -727,20 +912,32 @@ trait ZoneFacilities { struct RealZoneFacilities; impl ZoneFacilities for RealZoneFacilities { + async fn zone_with_name_exists( + &self, + name: &ZoneName<'_>, + ) -> Result { + match Zones::real_api().find(&name.0).await { + Ok(maybe_zone) => Ok(maybe_zone.is_some()), + Err(err) => Err(CheckZoneExistsError::FindByName { + name: name.to_string(), + err, + }), + } + } + async fn halt_zone( &self, - zone: &RunningZone, + zone: &ZoneName<'_>, log: &Logger, ) -> Result<(), ZoneShutdownError> { - // We don't use `zone.stop()` here because it doesn't allow repeated - // attempts after a failure: - // https://github.com/oxidecomputer/omicron/issues/7881. Instead, use - // the lower-level `Zones::halt_and_remove_logged()` function directly. - // This may leave our `RunningZone` is a bogus state where it still - // holds a `zoneid_t` that doesn't exist anymore, but if we're in the - // shutdown path we never use that `zoneid_t`. + // We don't use `RunningZone::stop()` here because it doesn't allow + // repeated attempts after a failure + // (https://github.com/oxidecomputer/omicron/issues/7881) and because in + // the case of "an unexpected zone is running", all we have is the name. + // Instead, use the lower-level `Zones::halt_and_remove_logged()` + // function directly. Zones::real_api() - .halt_and_remove_logged(log, zone.name()) + .halt_and_remove_logged(log, &zone.0) .await .map_err(ZoneShutdownError::HaltAndRemove) } @@ -758,8 +955,9 @@ impl ZoneFacilities for RealZoneFacilities { #[cfg(test)] mod tests { use super::*; - use crate::CurrentlyManagedZpoolsReceiver; + use crate::dataset_serialization_task::DatasetEnsureError; use anyhow::anyhow; + use assert_matches::assert_matches; use camino_tempfile::Utf8TempDir; use illumos_utils::dladm::Etherstub; use illumos_utils::dladm::EtherstubVnic; @@ -771,7 +969,12 @@ mod tests { use nexus_sled_agent_shared::inventory::OmicronZoneDataset; use nexus_sled_agent_shared::inventory::OmicronZoneImageSource; use omicron_common::address::SLED_PREFIX; + use omicron_common::disk::DatasetConfig; + use omicron_common::disk::DatasetKind; + use omicron_common::disk::DatasetName; + use omicron_common::disk::SharedDatasetConfig; use omicron_test_utils::dev; + use omicron_uuid_kinds::DatasetUuid; use omicron_uuid_kinds::ZpoolUuid; use std::collections::BTreeSet; use std::collections::VecDeque; @@ -833,11 +1036,17 @@ mod tests { #[derive(Debug, Default)] struct FakeZoneFacilitiesInner { + existing_zones: BTreeSet, halt_responses: Option>>, removed_gz_addresses: BTreeSet, } impl FakeZoneFacilities { + fn push_existing_zone(&self, name: String) { + let mut inner = self.inner.lock().unwrap(); + inner.existing_zones.insert(name); + } + fn push_halt_response(&self, response: Result<(), ZoneShutdownError>) { let mut inner = self.inner.lock().unwrap(); inner.halt_responses.get_or_insert_default().push_back(response); @@ -845,9 +1054,17 @@ mod tests { } impl ZoneFacilities for FakeZoneFacilities { + async fn zone_with_name_exists( + &self, + name: &ZoneName<'_>, + ) -> Result { + let inner = self.inner.lock().unwrap(); + Ok(inner.existing_zones.contains(&*name.0)) + } + async fn halt_zone( &self, - _zone: &RunningZone, + zone: &ZoneName<'_>, _log: &Logger, ) -> Result<(), ZoneShutdownError> { // If a test has called `push_halt_response`, respsect that; @@ -855,9 +1072,18 @@ mod tests { let mut inner = self.inner.lock().unwrap(); match inner.halt_responses.as_mut() { Some(resp) => { - resp.pop_front().expect("have a response for halt_zone()") + let resp = resp + .pop_front() + .expect("have a response for halt_zone()"); + if resp.is_ok() { + inner.existing_zones.remove(&*zone.0); + } + resp + } + None => { + inner.existing_zones.remove(&*zone.0); + Ok(()) } - None => Ok(()), } } @@ -908,9 +1134,7 @@ mod tests { async fn start_omicron_zone( &self, _zone_config: &OmicronZoneConfig, - _mount_config: &MountConfig, - _is_time_synchronized: bool, - _all_u2_pools: &[ZpoolName], + _zone_root_path: PathInPool, ) -> anyhow::Result { let mut inner = self.inner.lock().unwrap(); inner @@ -965,6 +1189,69 @@ mod tests { } } + #[derive(Default)] + struct DatasetsBuilder { + datasets: Vec<(DatasetConfig, Result<(), DatasetEnsureError>)>, + } + + impl DatasetsBuilder { + fn push_root( + &mut self, + zone: &OmicronZoneConfig, + result: Result<(), DatasetEnsureError>, + ) { + let Some(pool) = zone.filesystem_pool else { + return; + }; + self.datasets.push(( + DatasetConfig { + id: DatasetUuid::new_v4(), + name: DatasetName::new( + pool, + DatasetKind::TransientZone { name: zone.zone_name() }, + ), + inner: SharedDatasetConfig::default(), + }, + result, + )); + } + + fn push_durable( + &mut self, + zone: &OmicronZoneConfig, + result: Result<(), DatasetEnsureError>, + ) { + let Some(dataset) = zone.dataset_name() else { + return; + }; + self.datasets.push(( + DatasetConfig { + id: DatasetUuid::new_v4(), + name: dataset, + inner: SharedDatasetConfig::default(), + }, + result, + )); + } + + fn build(self) -> OmicronDatasets { + OmicronDatasets::with_datasets(self.datasets.into_iter()) + } + } + + // Helper to build an all-dependencies-met `OmicronDatasets` for the given + // zone config. + fn make_datasets<'a>( + zones: impl Iterator, + ) -> OmicronDatasets { + let mut builder = DatasetsBuilder::default(); + for zone in zones { + builder.push_root(zone, Ok(())); + builder.push_durable(zone, Ok(())); + } + builder.build() + } + #[tokio::test] async fn shutdown_retries_after_failed_halt() { let logctx = dev::test_setup_log("shutdown_retries_after_failed_halt"); @@ -1055,26 +1342,24 @@ mod tests { let fake_zone_id = OmicronZoneUuid::new_v4(); let desired_zones: IdMap<_> = [make_zone_config(fake_zone_id)].into_iter().collect(); - let currently_managed_zpools = - CurrentlyManagedZpoolsReceiver::fake_static( - desired_zones.iter().map(|z| z.filesystem_pool.unwrap()), - ) - .current(); + let datasets = make_datasets(desired_zones.iter()); // Configure our fake sled-agent to fail to start a zone. let sled_agent_facilities = FakeSledAgentFacilities::default(); sled_agent_facilities.push_start_response(Err(anyhow!("test-boom"))); + let zone_facilities = FakeZoneFacilities::default(); // Starting with no zones, we should try and fail to start the one zone // in `desired_zones`. let mut zones = OmicronZones::new(nonexistent_mount_config(), TimeSyncConfig::Skip); zones - .start_zones_if_needed( + .start_zones_if_needed_impl( &desired_zones, &sled_agent_facilities, + &zone_facilities, true, - ¤tly_managed_zpools, + &datasets, &logctx.log, ) .await; @@ -1104,11 +1389,12 @@ mod tests { // Starting from the "zone failed to start" state, we should try again // to start the zone (and succeed this time). zones - .start_zones_if_needed( + .start_zones_if_needed_impl( &desired_zones, &sled_agent_facilities, + &zone_facilities, true, - ¤tly_managed_zpools, + &datasets, &logctx.log, ) .await; @@ -1128,6 +1414,168 @@ mod tests { logctx.cleanup_successful(); } + #[tokio::test] + async fn start_zone_stops_preexisting_zones() { + let logctx = dev::test_setup_log("start_zone_stops_preexisting_zones"); + + // Construct a zone we want to start. + let fake_zone = make_zone_config(OmicronZoneUuid::new_v4()); + let desired_zones: IdMap<_> = [fake_zone.clone()].into_iter().collect(); + let datasets = make_datasets(desired_zones.iter()); + + // Configure our fake zone facilities to report a zone with this name as + // already running. + let sled_agent_facilities = FakeSledAgentFacilities::default(); + let zone_facilities = FakeZoneFacilities::default(); + zone_facilities.push_existing_zone(fake_zone.zone_name()); + + let mut zones = + OmicronZones::new(nonexistent_mount_config(), TimeSyncConfig::Skip); + + // Set up our fake sled-agent to return success once the old zone has + // been halted. + let fake_zone_builder = FakeZoneBuilder::new(); + sled_agent_facilities.push_start_response(Ok(fake_zone_builder + .make_running_zone("test", logctx.log.clone()) + .await)); + + // Start zones: this should halt the preexisting zone. + zones + .start_zones_if_needed_impl( + &desired_zones, + &sled_agent_facilities, + &zone_facilities, + true, + &datasets, + &logctx.log, + ) + .await; + + assert_eq!( + zone_facilities.inner.lock().unwrap().existing_zones, + BTreeSet::new() + ); + + assert_eq!(zones.zones.len(), 1); + let zone_should_be_running = + zones.zones.get(&fake_zone.id).expect("zone is present"); + assert_eq!( + zone_should_be_running.config, + *desired_zones.get(&fake_zone.id).unwrap() + ); + match &zone_should_be_running.state { + ZoneState::Running(_) => (), + other => panic!("unexpected zone state: {other:?}"), + } + + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn start_zone_fails_if_time_not_synced_when_required() { + let logctx = dev::test_setup_log( + "start_zone_fails_if_time_not_synced_when_required", + ); + + // Construct a zone we want to start, of a type that requires time to be + // sync'd. + let fake_zone = make_zone_config(OmicronZoneUuid::new_v4()); + assert!(fake_zone.zone_type.requires_timesync()); + let desired_zones: IdMap<_> = [fake_zone.clone()].into_iter().collect(); + let datasets = make_datasets(desired_zones.iter()); + + let zone_facilities = FakeZoneFacilities::default(); + let sled_agent_facilities = FakeSledAgentFacilities::default(); + + let mut zones = OmicronZones::new( + nonexistent_mount_config(), + TimeSyncConfig::Normal, + ); + + // Start zones: this should refuse to start the zone. + zones + .start_zones_if_needed_impl( + &desired_zones, + &sled_agent_facilities, + &zone_facilities, + false, // is_time_synchronized + &datasets, + &logctx.log, + ) + .await; + + assert_eq!(zones.zones.len(), 1); + let zone = zones.zones.get(&fake_zone.id).expect("zone is present"); + assert_eq!(zone.config, *desired_zones.get(&fake_zone.id).unwrap()); + + // The zone should now be in the expected error state. + match &zone.state { + ZoneState::FailedToStart(err) => { + assert_matches!(err, ZoneStartError::TimeNotSynchronized); + } + other => panic!("unexpected zone state: {other:?}"), + } + + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn start_zone_fails_if_halting_preexisting_zone_fails() { + let logctx = dev::test_setup_log( + "start_zone_fails_if_halting_preexisting_zone_fails", + ); + + // Construct a zone we want to start. + let fake_zone = make_zone_config(OmicronZoneUuid::new_v4()); + let desired_zones: IdMap<_> = [fake_zone.clone()].into_iter().collect(); + let datasets = make_datasets(desired_zones.iter()); + + // Configure our fake zone facilities to report a zone with this name as + // already running, and configure halting this zone to fail. + let zone_facilities = FakeZoneFacilities::default(); + zone_facilities.push_existing_zone(fake_zone.zone_name()); + zone_facilities.push_halt_response(Err( + ZoneShutdownError::UntrackMetrics(anyhow!("boom")), + )); + let sled_agent_facilities = FakeSledAgentFacilities::default(); + + let mut zones = + OmicronZones::new(nonexistent_mount_config(), TimeSyncConfig::Skip); + + // Start zones: this should try and fail to halt the preexisting zone. + zones + .start_zones_if_needed_impl( + &desired_zones, + &sled_agent_facilities, + &zone_facilities, + true, + &datasets, + &logctx.log, + ) + .await; + + assert_eq!( + zone_facilities.inner.lock().unwrap().existing_zones, + [fake_zone.zone_name()].into_iter().collect::>(), + ); + + assert_eq!(zones.zones.len(), 1); + let zone = zones.zones.get(&fake_zone.id).expect("zone is present"); + assert_eq!(zone.config, *desired_zones.get(&fake_zone.id).unwrap()); + + // The zone should now be in the "partially shut down" state. + match &zone.state { + ZoneState::PartiallyShutDown { state, err } => { + assert_matches!(state, PartiallyShutDownState::FailedToStop(_)); + let err = InlineErrorChain::new(err).to_string(); + assert!(err.contains("boom"), "unexpected error: {err}"); + } + other => panic!("unexpected zone state: {other:?}"), + } + + logctx.cleanup_successful(); + } + #[tokio::test] async fn shutdown_dns_does_dns_specific_cleanup() { let logctx = @@ -1194,4 +1642,161 @@ mod tests { logctx.cleanup_successful(); } + + #[tokio::test] + async fn start_zone_fails_if_missing_root_dataset() { + let logctx = + dev::test_setup_log("start_zone_fails_if_missing_root_dataset"); + + // Construct a zone we want to start. + let fake_zone = make_zone_config(OmicronZoneUuid::new_v4()); + let desired_zones: IdMap<_> = [fake_zone.clone()].into_iter().collect(); + + // datasets0: missing root dataset entirely + let datasets0 = { + let mut builder = DatasetsBuilder::default(); + for zone in &desired_zones { + builder.push_durable(zone, Ok(())); + } + builder.build() + }; + + // datasets1: root exists but failed to ensure + let datasets1 = { + let mut builder = DatasetsBuilder::default(); + for zone in &desired_zones { + builder.push_root( + zone, + Err(DatasetEnsureError::TestError("boom")), + ); + builder.push_durable(zone, Ok(())); + } + builder.build() + }; + + let zone_facilities = FakeZoneFacilities::default(); + let sled_agent_facilities = FakeSledAgentFacilities::default(); + + // Both dataset variations should fail the same way. + for datasets in [&datasets0, &datasets1] { + let mut zones = OmicronZones::new( + nonexistent_mount_config(), + TimeSyncConfig::Skip, + ); + + zones + .start_zones_if_needed_impl( + &desired_zones, + &sled_agent_facilities, + &zone_facilities, + true, + datasets, + &logctx.log, + ) + .await; + + assert_eq!(zones.zones.len(), 1); + let zone = zones.zones.get(&fake_zone.id).expect("zone is present"); + assert_eq!(zone.config, *desired_zones.get(&fake_zone.id).unwrap()); + + // The zone should now be in the "partially shut down" state. + match &zone.state { + ZoneState::FailedToStart(err) => { + assert_matches!( + err, + ZoneStartError::DatasetDependency( + ZoneDatasetDependencyError::TransientZoneDatasetNotAvailable(_) + ) + ); + } + other => panic!("unexpected zone state: {other:?}"), + } + } + + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn start_zone_fails_if_missing_durable_dataset() { + let logctx = + dev::test_setup_log("start_zone_fails_if_missing_durable_dataset"); + + // Construct a zone we want to start, using a zone type that has a + // durable dataset. + let fake_zone = OmicronZoneConfig { + id: OmicronZoneUuid::new_v4(), + filesystem_pool: Some(ZpoolName::new_external(ZpoolUuid::new_v4())), + zone_type: OmicronZoneType::Crucible { + address: "[::1]:0".parse().unwrap(), + dataset: OmicronZoneDataset { + pool_name: ZpoolName::new_external(ZpoolUuid::new_v4()), + }, + }, + image_source: OmicronZoneImageSource::InstallDataset, + }; + let desired_zones: IdMap<_> = [fake_zone.clone()].into_iter().collect(); + + // datasets0: missing durable dataset entirely + let datasets0 = { + let mut builder = DatasetsBuilder::default(); + for zone in &desired_zones { + builder.push_root(zone, Ok(())); + } + builder.build() + }; + + // datasets1: durable exists but failed to ensure + let datasets1 = { + let mut builder = DatasetsBuilder::default(); + for zone in &desired_zones { + builder.push_root(zone, Ok(())); + builder.push_durable( + zone, + Err(DatasetEnsureError::TestError("boom")), + ); + } + builder.build() + }; + + let zone_facilities = FakeZoneFacilities::default(); + let sled_agent_facilities = FakeSledAgentFacilities::default(); + + // Both dataset variations should fail the same way. + for datasets in [&datasets0, &datasets1] { + let mut zones = OmicronZones::new( + nonexistent_mount_config(), + TimeSyncConfig::Skip, + ); + + zones + .start_zones_if_needed_impl( + &desired_zones, + &sled_agent_facilities, + &zone_facilities, + true, + datasets, + &logctx.log, + ) + .await; + + assert_eq!(zones.zones.len(), 1); + let zone = zones.zones.get(&fake_zone.id).expect("zone is present"); + assert_eq!(zone.config, *desired_zones.get(&fake_zone.id).unwrap()); + + // The zone should now be in the "partially shut down" state. + match &zone.state { + ZoneState::FailedToStart(err) => { + assert_matches!( + err, + ZoneStartError::DatasetDependency( + ZoneDatasetDependencyError::DurableDatasetNotAvailable(_) + ) + ); + } + other => panic!("unexpected zone state: {other:?}"), + } + } + + logctx.cleanup_successful(); + } } diff --git a/sled-agent/config-reconciler/src/sled_agent_facilities.rs b/sled-agent/config-reconciler/src/sled_agent_facilities.rs index 98f3a164cc2..8445b51d21b 100644 --- a/sled-agent/config-reconciler/src/sled_agent_facilities.rs +++ b/sled-agent/config-reconciler/src/sled_agent_facilities.rs @@ -7,12 +7,11 @@ use illumos_utils::dladm::EtherstubVnic; use illumos_utils::running_zone::RunningZone; -use illumos_utils::zpool::ZpoolName; +use illumos_utils::zpool::PathInPool; use nexus_sled_agent_shared::inventory::OmicronZoneConfig; use omicron_common::address::Ipv6Subnet; use omicron_common::address::SLED_PREFIX; use sled_agent_types::zone_bundle::ZoneBundleCause; -use sled_storage::config::MountConfig; use std::future::Future; use tufaceous_artifact::ArtifactHash; @@ -38,9 +37,7 @@ pub trait SledAgentFacilities: Send + Sync + 'static { fn start_omicron_zone( &self, zone_config: &OmicronZoneConfig, - mount_config: &MountConfig, - is_time_synchronized: bool, - all_u2_pools: &[ZpoolName], + zone_root_path: PathInPool, ) -> impl Future> + Send; /// Stop tracking metrics for a zone's datalinks. diff --git a/sled-agent/src/artifact_store.rs b/sled-agent/src/artifact_store.rs index c2b6965a32a..e73461020d5 100644 --- a/sled-agent/src/artifact_store.rs +++ b/sled-agent/src/artifact_store.rs @@ -17,11 +17,11 @@ //! Operations that list or modify artifacts or the configuration are called by //! Nexus and handled by the Sled Agent API. -use std::collections::BTreeMap; use std::future::Future; use std::io::{ErrorKind, Write}; use std::net::SocketAddrV6; use std::str::FromStr; +use std::sync::Arc; use std::time::Duration; use atomicwrites::{AtomicFile, OverwriteBehavior}; @@ -40,8 +40,8 @@ use sha2::{Digest, Sha256}; use sled_agent_api::{ ArtifactConfig, ArtifactListResponse, ArtifactPutResponse, }; -use sled_storage::dataset::M2_ARTIFACT_DATASET; -use sled_storage::manager::StorageHandle; +use sled_agent_config_reconciler::ConfigReconcilerHandle; +use sled_agent_config_reconciler::InternalDisksReceiver; use slog::{Logger, error, info}; use slog_error_chain::{InlineErrorChain, SlogInlineError}; use tokio::fs::File; @@ -49,8 +49,6 @@ use tokio::sync::{OwnedSemaphorePermit, mpsc, oneshot, watch}; use tokio::task::JoinSet; use tufaceous_artifact::ArtifactHash; -use crate::services::ServiceManager; - // These paths are defined under the artifact storage dataset. They // cannot conflict with any artifact paths because all artifact paths are // hexadecimal-encoded SHA-256 checksums. @@ -93,7 +91,7 @@ impl ArtifactStore { pub(crate) async fn new( log: &Logger, storage: T, - services: Option, + config_reconciler: Option>, ) -> ArtifactStore { let log = log.new(slog::o!("component" => "ArtifactStore")); @@ -133,7 +131,7 @@ impl ArtifactStore { tokio::task::spawn(ledger_manager( log.clone(), ledger_paths, - services, + config_reconciler, ledger_rx, config_tx, )); @@ -164,13 +162,15 @@ impl ArtifactStore { } } -impl ArtifactStore { +impl ArtifactStore { pub(crate) async fn start( - self, + self: Arc, sled_address: SocketAddrV6, dropshot_config: &ConfigDropshot, - ) -> Result>, StartError> - { + ) -> Result< + dropshot::HttpServer>>, + StartError, + > { let mut depot_address = sled_address; depot_address.set_port(REPO_DEPOT_PORT); @@ -259,7 +259,7 @@ impl ArtifactStore { /// Open an artifact file by hash from a storage handle. /// /// This is the same as [ArtifactStore::get], but can be called with only - /// a [StorageHandle]. + /// a storage implementation. pub(crate) async fn get_from_storage( storage: &T, log: &Logger, @@ -452,11 +452,11 @@ type LedgerManagerRequest = async fn ledger_manager( log: Logger, ledger_paths: Vec, - services: Option, + config_reconciler: Option>, mut rx: mpsc::Receiver, config_channel: watch::Sender>, ) { - let services = services.as_ref(); + let config_reconciler = config_reconciler.as_ref(); let handle_request = async |new_config: ArtifactConfig| { if ledger_paths.is_empty() { return Err(Error::NoUpdateDataset); @@ -466,21 +466,11 @@ async fn ledger_manager( { if new_config.generation > ledger.data().generation { // New config generation. First check that the configuration - // contains all artifacts that are presently in use. - let mut missing = BTreeMap::new(); - // Check artifacts from the current zone configuration. - if let Some(services) = services { - for zone in services.omicron_zones_list().await.zones { - if let Some(hash) = zone.image_source.artifact_hash() { - if !new_config.artifacts.contains(&hash) { - missing - .insert(hash, "current zone configuration"); - } - } - } - } - if !missing.is_empty() { - return Err(Error::InUseArtifactsMissing(missing)); + // is valid against the current ledgered sled config. + if let Some(config_reconciler) = config_reconciler { + config_reconciler + .validate_artifact_config(new_config.clone()) + .await??; } // Everything looks okay; update the ledger. @@ -614,14 +604,11 @@ pub trait DatasetsManager: Clone + Send + Sync + 'static { } } -impl DatasetsManager for StorageHandle { +impl DatasetsManager for InternalDisksReceiver { async fn artifact_storage_paths( &self, ) -> impl Iterator + '_ { - self.get_latest_disks() - .await - .all_m2_mountpoints(M2_ARTIFACT_DATASET) - .into_iter() + self.current().all_artifact_datasets().collect::>().into_iter() } } @@ -766,11 +753,11 @@ impl ArtifactWriter { } /// Implementation of the Repo Depot API backed by an -/// `ArtifactStore`. +/// `ArtifactStore`. enum RepoDepotImpl {} impl RepoDepotApi for RepoDepotImpl { - type Context = ArtifactStore; + type Context = Arc>; async fn artifact_get_by_sha256( rqctx: RequestContext, @@ -831,8 +818,15 @@ pub enum Error { #[error("Digest mismatch: expected {expected}, actual {actual}")] HashMismatch { expected: ArtifactHash, actual: ArtifactHash }, - #[error("Artifacts in use are not present in new config: {0:?}")] - InUseArtifactsMissing(BTreeMap), + #[error("New config is invalid per current sled config")] + InvalidPerSledConfig( + #[from] sled_agent_config_reconciler::LedgerArtifactConfigError, + ), + + #[error("Cannot validate incoming config against sled config")] + CannotValidateAgainstSledConfig( + #[from] sled_agent_config_reconciler::LedgerTaskError, + ), #[error("Blocking task failed")] Join(#[source] tokio::task::JoinError), @@ -863,7 +857,7 @@ impl From for HttpError { match err { // 4xx errors Error::HashMismatch { .. } - | Error::InUseArtifactsMissing { .. } + | Error::InvalidPerSledConfig { .. } | Error::NoConfig | Error::NotInConfig { .. } => { HttpError::for_bad_request(None, err.to_string()) @@ -894,9 +888,12 @@ impl From for HttpError { | Error::File { .. } | Error::Join(_) | Error::LedgerCommit(_) - | Error::LedgerChannel => HttpError::for_internal_error( - InlineErrorChain::new(&err).to_string(), - ), + | Error::LedgerChannel + | Error::CannotValidateAgainstSledConfig(_) => { + HttpError::for_internal_error( + InlineErrorChain::new(&err).to_string(), + ) + } } } } diff --git a/sled-agent/src/bootstrap/bootstore_setup.rs b/sled-agent/src/bootstrap/bootstore_setup.rs index 1b148290104..c9e3cefbd8e 100644 --- a/sled-agent/src/bootstrap/bootstore_setup.rs +++ b/sled-agent/src/bootstrap/bootstore_setup.rs @@ -15,7 +15,6 @@ use omicron_ddm_admin_client::Client as DdmAdminClient; use sled_hardware_types::Baseboard; use sled_hardware_types::underlay::BootstrapInterface; use sled_storage::dataset::CLUSTER_DATASET; -use sled_storage::resources::AllDisks; use slog::Logger; use std::collections::BTreeSet; use std::net::Ipv6Addr; @@ -26,7 +25,7 @@ const BOOTSTORE_FSM_STATE_FILE: &str = "bootstore-fsm-state.json"; const BOOTSTORE_NETWORK_CONFIG_FILE: &str = "bootstore-network-config.json"; pub fn new_bootstore_config( - all_disks: &AllDisks, + cluster_dataset_paths: &[Utf8PathBuf], baseboard: Baseboard, global_zone_bootstrap_ip: Ipv6Addr, ) -> Result { @@ -37,19 +36,20 @@ pub fn new_bootstore_config( learn_timeout: Duration::from_secs(5), rack_init_timeout: Duration::from_secs(300), rack_secret_request_timeout: Duration::from_secs(5), - fsm_state_ledger_paths: bootstore_fsm_state_paths(&all_disks)?, + fsm_state_ledger_paths: bootstore_fsm_state_paths( + cluster_dataset_paths, + )?, network_config_ledger_paths: bootstore_network_config_paths( - &all_disks, + cluster_dataset_paths, )?, }) } fn bootstore_fsm_state_paths( - all_disks: &AllDisks, + cluster_dataset_paths: &[Utf8PathBuf], ) -> Result, StartError> { - let paths: Vec<_> = all_disks - .all_m2_mountpoints(CLUSTER_DATASET) - .into_iter() + let paths: Vec<_> = cluster_dataset_paths + .iter() .map(|p| p.join(BOOTSTORE_FSM_STATE_FILE)) .collect(); @@ -60,11 +60,10 @@ fn bootstore_fsm_state_paths( } fn bootstore_network_config_paths( - all_disks: &AllDisks, + cluster_dataset_paths: &[Utf8PathBuf], ) -> Result, StartError> { - let paths: Vec<_> = all_disks - .all_m2_mountpoints(CLUSTER_DATASET) - .into_iter() + let paths: Vec<_> = cluster_dataset_paths + .iter() .map(|p| p.join(BOOTSTORE_NETWORK_CONFIG_FILE)) .collect(); diff --git a/sled-agent/src/bootstrap/http_entrypoints.rs b/sled-agent/src/bootstrap/http_entrypoints.rs index 44bd09387ff..12a986cf2fb 100644 --- a/sled-agent/src/bootstrap/http_entrypoints.rs +++ b/sled-agent/src/bootstrap/http_entrypoints.rs @@ -23,10 +23,10 @@ use dropshot::{ use omicron_common::api::external::Error; use omicron_uuid_kinds::RackInitUuid; use omicron_uuid_kinds::RackResetUuid; +use sled_agent_config_reconciler::InternalDisksReceiver; use sled_agent_types::rack_init::RackInitializeRequest; use sled_agent_types::rack_ops::RackOperationStatus; use sled_hardware_types::Baseboard; -use sled_storage::manager::StorageHandle; use slog::Logger; use slog_error_chain::InlineErrorChain; use sprockets_tls::keys::SprocketsConfig; @@ -37,7 +37,7 @@ use tokio::sync::{mpsc, oneshot}; pub(crate) struct BootstrapServerContext { pub(crate) base_log: Logger, pub(crate) global_zone_bootstrap_ip: Ipv6Addr, - pub(crate) storage_manager: StorageHandle, + pub(crate) internal_disks_rx: InternalDisksReceiver, pub(crate) bootstore_node_handle: bootstore::NodeHandle, pub(crate) baseboard: Baseboard, pub(crate) rss_access: RssAccess, @@ -56,7 +56,7 @@ impl BootstrapServerContext { &self.base_log, self.sprockets.clone(), self.global_zone_bootstrap_ip, - &self.storage_manager, + &self.internal_disks_rx, &self.bootstore_node_handle, request, ) diff --git a/sled-agent/src/bootstrap/pre_server.rs b/sled-agent/src/bootstrap/pre_server.rs index d5b7553d54b..283569ade96 100644 --- a/sled-agent/src/bootstrap/pre_server.rs +++ b/sled-agent/src/bootstrap/pre_server.rs @@ -20,7 +20,6 @@ use crate::long_running_tasks::{ LongRunningTaskHandles, spawn_all_longrunning_tasks, }; use crate::services::ServiceManager; -use crate::services::TimeSyncConfig; use crate::sled_agent::SledAgent; use camino::Utf8PathBuf; use cancel_safe_futures::TryStreamExt; @@ -36,6 +35,7 @@ use illumos_utils::zone::Api; use illumos_utils::zone::Zones; use omicron_common::FileKv; use omicron_common::address::Ipv6Subnet; +use sled_agent_config_reconciler::ConfigReconcilerSpawnToken; use sled_hardware::DendriteAsic; use sled_hardware::SledMode; use sled_hardware::underlay; @@ -54,6 +54,7 @@ pub(super) struct BootstrapAgentStartup { pub(super) service_manager: ServiceManager, pub(super) long_running_task_handles: LongRunningTaskHandles, pub(super) sled_agent_started_tx: oneshot::Sender, + pub(super) config_reconciler_spawn_token: ConfigReconcilerSpawnToken, } impl BootstrapAgentStartup { @@ -121,6 +122,7 @@ impl BootstrapAgentStartup { // the process and are used by both the bootstrap agent and sled agent let ( long_running_task_handles, + config_reconciler_spawn_token, sled_agent_started_tx, service_manager_ready_tx, ) = spawn_all_longrunning_tasks( @@ -134,23 +136,18 @@ impl BootstrapAgentStartup { let global_zone_bootstrap_ip = startup_networking.global_zone_bootstrap_ip; - let time_sync = if let Some(true) = config.skip_timesync { - TimeSyncConfig::Skip - } else { - TimeSyncConfig::Normal - }; - let service_manager = ServiceManager::new( &base_log, ddm_reconciler, startup_networking, sled_mode, - time_sync, config.sidecar_revision.clone(), config.switch_zone_maghemite_links.clone(), - long_running_task_handles.storage_manager.clone(), - long_running_task_handles.zone_bundler.clone(), long_running_task_handles.zone_image_resolver.clone(), + long_running_task_handles + .config_reconciler + .internal_disks_rx() + .clone(), ); // Inform the hardware monitor that the service manager is ready @@ -168,6 +165,7 @@ impl BootstrapAgentStartup { service_manager, long_running_task_handles, sled_agent_started_tx, + config_reconciler_spawn_token, }) } } diff --git a/sled-agent/src/bootstrap/rack_ops.rs b/sled-agent/src/bootstrap/rack_ops.rs index 2be59fd5880..db2e79d3ec9 100644 --- a/sled-agent/src/bootstrap/rack_ops.rs +++ b/sled-agent/src/bootstrap/rack_ops.rs @@ -9,9 +9,9 @@ use crate::rack_setup::service::SetupServiceError; use bootstore::schemes::v0 as bootstore; use omicron_uuid_kinds::RackInitUuid; use omicron_uuid_kinds::RackResetUuid; +use sled_agent_config_reconciler::InternalDisksReceiver; use sled_agent_types::rack_init::RackInitializeRequest; use sled_agent_types::rack_ops::{RackOperationStatus, RssStep}; -use sled_storage::manager::StorageHandle; use slog::Logger; use sprockets_tls::keys::SprocketsConfig; use std::mem; @@ -146,7 +146,7 @@ impl RssAccess { parent_log: &Logger, sprockets: SprocketsConfig, global_zone_bootstrap_ip: Ipv6Addr, - storage_manager: &StorageHandle, + internal_disks_rx: &InternalDisksReceiver, bootstore_node_handle: &bootstore::NodeHandle, request: RackInitializeRequest, ) -> Result { @@ -182,7 +182,7 @@ impl RssAccess { *status = RssStatus::Initializing { id, completion, step_rx }; mem::drop(status); let parent_log = parent_log.clone(); - let storage_manager = storage_manager.clone(); + let internal_disks_rx = internal_disks_rx.clone(); let bootstore_node_handle = bootstore_node_handle.clone(); let status = Arc::clone(&self.status); tokio::spawn(async move { @@ -190,7 +190,7 @@ impl RssAccess { &parent_log, sprockets, global_zone_bootstrap_ip, - storage_manager, + internal_disks_rx, bootstore_node_handle, request, step_tx, @@ -328,7 +328,7 @@ async fn rack_initialize( parent_log: &Logger, sprockets: SprocketsConfig, global_zone_bootstrap_ip: Ipv6Addr, - storage_manager: StorageHandle, + internal_disks_rx: InternalDisksReceiver, bootstore_node_handle: bootstore::NodeHandle, request: RackInitializeRequest, step_tx: watch::Sender, @@ -338,7 +338,7 @@ async fn rack_initialize( sprockets, request, global_zone_bootstrap_ip, - storage_manager, + internal_disks_rx, bootstore_node_handle, step_tx, ) diff --git a/sled-agent/src/bootstrap/rss_handle.rs b/sled-agent/src/bootstrap/rss_handle.rs index efddfa2aa25..f3872b90feb 100644 --- a/sled-agent/src/bootstrap/rss_handle.rs +++ b/sled-agent/src/bootstrap/rss_handle.rs @@ -14,10 +14,10 @@ use futures::stream::FuturesUnordered; use omicron_common::backoff::BackoffError; use omicron_common::backoff::retry_notify; use omicron_common::backoff::retry_policy_local; +use sled_agent_config_reconciler::InternalDisksReceiver; use sled_agent_types::rack_init::RackInitializeRequest; use sled_agent_types::rack_ops::RssStep; use sled_agent_types::sled::StartSledAgentRequest; -use sled_storage::manager::StorageHandle; use slog::Logger; use sprockets_tls::keys::SprocketsConfig; use std::net::Ipv6Addr; @@ -50,7 +50,7 @@ impl RssHandle { sprockets: SprocketsConfig, config: RackInitializeRequest, our_bootstrap_address: Ipv6Addr, - storage_manager: StorageHandle, + internal_disks_rx: InternalDisksReceiver, bootstore: bootstore::NodeHandle, step_tx: watch::Sender, ) -> Result<(), SetupServiceError> { @@ -59,7 +59,7 @@ impl RssHandle { let rss = RackSetupService::new( log.new(o!("component" => "RSS")), config, - storage_manager, + internal_disks_rx, tx, bootstore, step_tx, diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs index 288f35d2a33..d18b42a3466 100644 --- a/sled-agent/src/bootstrap/server.rs +++ b/sled-agent/src/bootstrap/server.rs @@ -39,11 +39,12 @@ use omicron_ddm_admin_client::DdmError; use omicron_ddm_admin_client::types::EnableStatsRequest; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::RackInitUuid; +use sled_agent_config_reconciler::ConfigReconcilerSpawnToken; +use sled_agent_config_reconciler::InternalDisksReceiver; use sled_agent_types::rack_init::RackInitializeRequest; use sled_agent_types::sled::StartSledAgentRequest; use sled_hardware::underlay; use sled_storage::dataset::CONFIG_DATASET; -use sled_storage::manager::StorageHandle; use slog::Logger; use std::io; use std::net::SocketAddr; @@ -180,12 +181,15 @@ impl Server { service_manager, long_running_task_handles, sled_agent_started_tx, + config_reconciler_spawn_token, } = BootstrapAgentStartup::run(config).await?; // Do we have a StartSledAgentRequest stored in the ledger? - let paths = - sled_config_paths(&long_running_task_handles.storage_manager) - .await?; + let internal_disks_rx = long_running_task_handles + .config_reconciler + .internal_disks_rx() + .clone(); + let paths = sled_config_paths(&internal_disks_rx).await?; let maybe_ledger = Ledger::::new(&startup_log, paths).await; @@ -204,7 +208,7 @@ impl Server { let bootstrap_context = BootstrapServerContext { base_log: base_log.clone(), global_zone_bootstrap_ip, - storage_manager: long_running_task_handles.storage_manager.clone(), + internal_disks_rx, bootstore_node_handle: long_running_task_handles.bootstore.clone(), baseboard: long_running_task_handles.hardware_manager.baseboard(), rss_access, @@ -244,6 +248,7 @@ impl Server { &config, start_sled_agent_request, long_running_task_handles.clone(), + config_reconciler_spawn_token, service_manager.clone(), &base_log, &startup_log, @@ -257,14 +262,12 @@ impl Server { .map_err(|_| ()) .expect("Failed to send to StorageMonitor"); - // For cold boot specifically, we now need to load the services - // we're responsible for, while continuing to handle hardware - // notifications. This cannot fail: we retry indefinitely until - // we're done loading services. - sled_agent.load_services().await; SledAgentState::ServerStarted(sled_agent_server) } else { - SledAgentState::Bootstrapping(Some(sled_agent_started_tx)) + SledAgentState::Bootstrapping(Some(BootstrappingDependencies { + sled_agent_started_tx, + config_reconciler_spawn_token, + })) }; // Spawn our inner task that handles any future hardware updates and any @@ -306,11 +309,16 @@ impl Server { // bootstrap server). enum SledAgentState { // We're still in the bootstrapping phase, waiting for a sled-agent request. - Bootstrapping(Option>), + Bootstrapping(Option), // ... or the sled agent server is running. ServerStarted(SledAgentServer), } +struct BootstrappingDependencies { + sled_agent_started_tx: oneshot::Sender, + config_reconciler_spawn_token: ConfigReconcilerSpawnToken, +} + #[derive(thiserror::Error, Debug)] pub enum SledAgentServerStartError { #[error("Failed to start sled-agent server: {0}")] @@ -350,6 +358,7 @@ async fn start_sled_agent( config: &SledConfig, request: StartSledAgentRequest, long_running_task_handles: LongRunningTaskHandles, + config_reconciler_spawn_token: ConfigReconcilerSpawnToken, service_manager: ServiceManager, base_log: &Logger, log: &Logger, @@ -386,9 +395,6 @@ async fn start_sled_agent( } } - // Inform the storage service that the key manager is available - long_running_task_handles.storage_manager.key_manager_ready().await; - // Inform our DDM reconciler of our underlay subnet and the information it // needs for maghemite to enable Oximeter stats. let ddm_reconciler = service_manager.ddm_reconciler(); @@ -404,6 +410,7 @@ async fn start_sled_agent( base_log.clone(), request.clone(), long_running_task_handles.clone(), + config_reconciler_spawn_token, service_manager, ) .await @@ -413,8 +420,10 @@ async fn start_sled_agent( // Record this request so the sled agent can be automatically // initialized on the next boot. - let paths = - sled_config_paths(&long_running_task_handles.storage_manager).await?; + let paths = sled_config_paths( + long_running_task_handles.config_reconciler.internal_disks_rx(), + ) + .await?; let mut ledger = Ledger::new_with(&log, paths, request); ledger.commit().await?; @@ -468,12 +477,11 @@ impl From for SledAgentServerStartError { } async fn sled_config_paths( - storage: &StorageHandle, + internal_disks_rx: &InternalDisksReceiver, ) -> Result, MissingM2Paths> { - let resources = storage.get_latest_disks().await; - let paths: Vec<_> = resources - .all_m2_mountpoints(CONFIG_DATASET) - .into_iter() + let paths: Vec<_> = internal_disks_rx + .current() + .all_config_datasets() .map(|p| p.join(SLED_AGENT_REQUEST_FILE)) .collect(); @@ -536,7 +544,7 @@ impl Inner { log: &Logger, ) { match &mut self.state { - SledAgentState::Bootstrapping(sled_agent_started_tx) => { + SledAgentState::Bootstrapping(deps) => { let request_id = request.body.id.into_untyped_uuid(); // Extract from options to satisfy the borrow checker. @@ -545,13 +553,16 @@ impl Inner { // we explicitly unwrap here, and panic on error below. // // See https://github.com/oxidecomputer/omicron/issues/4494 - let sled_agent_started_tx = - sled_agent_started_tx.take().unwrap(); + let BootstrappingDependencies { + sled_agent_started_tx, + config_reconciler_spawn_token, + } = deps.take().unwrap(); let response = match start_sled_agent( &self.config, request, self.long_running_task_handles.clone(), + config_reconciler_spawn_token, self.service_manager.clone(), &self.base_log, &log, @@ -620,15 +631,13 @@ impl Inner { } async fn uninstall_sled_local_config(&self) -> Result<(), BootstrapError> { - let config_dirs = self + let internal_disks = self .long_running_task_handles - .storage_manager - .get_latest_disks() - .await - .all_m2_mountpoints(CONFIG_DATASET) - .into_iter(); + .config_reconciler + .internal_disks_rx() + .current(); - for dir in config_dirs { + for dir in internal_disks.all_config_datasets() { for entry in dir.read_dir_utf8().map_err(|err| { BootstrapError::Io { message: format!("Deleting {dir}"), err } })? { diff --git a/sled-agent/src/hardware_monitor.rs b/sled-agent/src/hardware_monitor.rs index 9508a11bfba..0e8cd00463d 100644 --- a/sled-agent/src/hardware_monitor.rs +++ b/sled-agent/src/hardware_monitor.rs @@ -8,10 +8,10 @@ use crate::services::ServiceManager; use crate::sled_agent::SledAgent; +use sled_agent_config_reconciler::RawDisksSender; use sled_hardware::{HardwareManager, HardwareUpdate}; use sled_hardware_types::Baseboard; use sled_storage::disk::RawDisk; -use sled_storage::manager::StorageHandle; use slog::Logger; use tokio::sync::broadcast::error::RecvError; use tokio::sync::{broadcast, oneshot}; @@ -68,8 +68,8 @@ pub struct HardwareMonitor { // A reference to the hardware manager hardware_manager: HardwareManager, - // A handle to [`sled_hardware::manager::StorageManger`] - storage_manager: StorageHandle, + // A handle to send raw disk updates to the config-reconciler system. + raw_disks_tx: RawDisksSender, // A handle to the sled-agent // @@ -91,7 +91,7 @@ impl HardwareMonitor { pub fn new( log: &Logger, hardware_manager: &HardwareManager, - storage_manager: &StorageHandle, + raw_disks_tx: RawDisksSender, ) -> ( HardwareMonitor, oneshot::Sender, @@ -112,7 +112,7 @@ impl HardwareMonitor { service_manager_ready_rx, hardware_rx, hardware_manager: hardware_manager.clone(), - storage_manager: storage_manager.clone(), + raw_disks_tx, sled_agent: None, tofino_manager, }, @@ -177,36 +177,16 @@ impl HardwareMonitor { } } HardwareUpdate::DiskAdded(disk) => { - // We notify the storage manager of the hardware, but do not need to - // wait for the result to be fully processed. - // - // Here and below, we're "dropping a future" rather than - // awaiting it. That's intentional - the hardware monitor - // doesn't care when this work is finished, just when it's - // enqueued. - #[allow(clippy::let_underscore_future)] - let _ = self - .storage_manager - .detected_raw_disk(disk.into()) - .await; + self.raw_disks_tx + .add_or_update_raw_disk(disk.into(), &self.log); } HardwareUpdate::DiskRemoved(disk) => { - // We notify the storage manager of the hardware, but do not need to - // wait for the result to be fully processed. - #[allow(clippy::let_underscore_future)] - let _ = self - .storage_manager - .detected_raw_disk_removal(disk.into()) - .await; + self.raw_disks_tx + .remove_raw_disk(disk.identity(), &self.log); } HardwareUpdate::DiskUpdated(disk) => { - // We notify the storage manager of the hardware, but do not need to - // wait for the result to be fully processed. - #[allow(clippy::let_underscore_future)] - let _ = self - .storage_manager - .detected_raw_disk_update(disk.into()) - .await; + self.raw_disks_tx + .add_or_update_raw_disk(disk.into(), &self.log); } }, Err(broadcast::error::RecvError::Lagged(count)) => { @@ -280,14 +260,9 @@ impl HardwareMonitor { self.deactivate_switch().await; } - // We notify the storage manager of the hardware, but do not need to - // wait for the result to be fully processed. - #[allow(clippy::let_underscore_future)] - let _ = self - .storage_manager - .ensure_using_exactly_these_disks( - self.hardware_manager.disks().into_values().map(RawDisk::from), - ) - .await; + self.raw_disks_tx.set_raw_disks( + self.hardware_manager.disks().into_values().map(RawDisk::from), + &self.log, + ); } } diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index bdb64ac2376..74354fa7f38 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -18,7 +18,7 @@ use dropshot::{ Query, RequestContext, StreamingBody, TypedBody, }; use nexus_sled_agent_shared::inventory::{ - Inventory, OmicronSledConfig, OmicronSledConfigResult, SledRole, + Inventory, OmicronSledConfig, SledRole, }; use omicron_common::api::external::Error; use omicron_common::api::internal::nexus::{DiskRuntimeState, SledVmmState}; @@ -26,9 +26,6 @@ use omicron_common::api::internal::shared::{ ExternalIpGatewayMap, ResolvedVpcRouteSet, ResolvedVpcRouteState, SledIdentifiers, SwitchPorts, VirtualNetworkInterfaceHost, }; -use omicron_common::disk::{ - DatasetsConfig, DiskVariant, M2Slot, OmicronPhysicalDisksConfig, -}; use range_requests::PotentialRange; use sled_agent_api::*; use sled_agent_types::boot_disk::{ @@ -424,13 +421,6 @@ impl SledAgentApi for SledAgentImpl { Ok(HttpResponseDeleted()) } - async fn datasets_get( - rqctx: RequestContext, - ) -> Result, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk(sa.datasets_config_list().await?)) - } - async fn zone_bundle_cleanup( rqctx: RequestContext, ) -> Result>, HttpError> @@ -452,27 +442,11 @@ impl SledAgentApi for SledAgentImpl { async fn omicron_config_put( rqctx: RequestContext, body: TypedBody, - ) -> Result, HttpError> { + ) -> Result { let sa = rqctx.context(); let body_args = body.into_inner(); - sa.set_omicron_config(body_args) - .await - .map(HttpResponseOk) - .map_err(HttpError::from) - } - - async fn omicron_physical_disks_get( - rqctx: RequestContext, - ) -> Result, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk(sa.omicron_physical_disks_list().await?)) - } - - async fn zpools_get( - rqctx: RequestContext, - ) -> Result>, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk(sa.zpools_get().await)) + sa.set_omicron_config(body_args).await??; + Ok(HttpResponseUpdatedNoContent()) } async fn sled_role_get( @@ -816,29 +790,7 @@ impl SledAgentApi for SledAgentImpl { let boot_disk = path_params.into_inner().boot_disk; // Find our corresponding disk. - let maybe_disk_path = - sa.storage().get_latest_disks().await.iter_managed().find_map( - |(_identity, disk)| { - // Synthetic disks panic if asked for their `slot()`, so filter - // them out first; additionally, filter out any non-M2 disks. - if disk.is_synthetic() || disk.variant() != DiskVariant::M2 - { - return None; - } - - // Convert this M2 disk's slot to an M2Slot, and skip any that - // don't match the requested boot_disk. - let Ok(slot) = M2Slot::try_from(disk.slot()) else { - return None; - }; - if slot != boot_disk { - return None; - } - - let raw_devs_path = true; - Some(disk.boot_image_devfs_path(raw_devs_path)) - }, - ); + let maybe_disk_path = sa.boot_image_raw_devfs_path(boot_disk); let disk_path = match maybe_disk_path { Some(Ok(path)) => path, diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 5481672b17d..573fc717811 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -36,10 +36,9 @@ use propolis_client::Client as PropolisClient; use propolis_client::instance_spec::{ComponentV0, SpecKey}; use rand::SeedableRng; use rand::prelude::IteratorRandom; +use sled_agent_config_reconciler::AvailableDatasetsReceiver; use sled_agent_types::instance::*; use sled_agent_types::zone_bundle::ZoneBundleCause; -use sled_storage::dataset::ZONE_DATASET; -use sled_storage::manager::StorageHandle; use slog::Logger; use std::net::IpAddr; use std::net::SocketAddr; @@ -530,8 +529,8 @@ struct InstanceRunner { // Connection to Nexus nexus_client: NexusClient, - // Storage resources - storage: StorageHandle, + // Available datasets for choosing zone roots + available_datasets_rx: AvailableDatasetsReceiver, // Used to create propolis zones zone_builder_factory: ZoneBuilderFactory, @@ -1556,10 +1555,10 @@ impl Instance { nexus_client, vnic_allocator, port_manager, - storage, zone_bundler, zone_builder_factory, metrics_queue, + available_datasets_rx, } = services; let mut dhcp_config = DhcpCfg { @@ -1645,7 +1644,7 @@ impl Instance { state: InstanceStates::new(vmm_runtime, migration_id), running_state: None, nexus_client, - storage, + available_datasets_rx, zone_builder_factory, zone_bundler, metrics_queue, @@ -1991,13 +1990,10 @@ impl InstanceRunner { // configured VNICs. let zname = propolis_zone_name(&self.propolis_id); let mut rng = rand::rngs::StdRng::from_entropy(); - let latest_disks = self - .storage - .get_latest_disks() - .await - .all_u2_mountpoints(ZONE_DATASET); - let root = latest_disks + let root = self + .available_datasets_rx + .all_mounted_zone_root_datasets() .into_iter() .choose(&mut rng) .ok_or_else(|| Error::U2NotFound)?; @@ -2275,11 +2271,16 @@ mod tests { use omicron_common::api::external::{Generation, Hostname}; use omicron_common::api::internal::nexus::VmmState; use omicron_common::api::internal::shared::{DhcpConfig, SledIdentifiers}; + use omicron_common::disk::DiskIdentity; + use omicron_uuid_kinds::ZpoolUuid; use propolis_client::types::{ InstanceMigrateStatusResponse, InstanceStateMonitorResponse, }; + use sled_agent_config_reconciler::{ + CurrentlyManagedZpoolsReceiver, InternalDisksReceiver, + }; use sled_agent_types::zone_bundle::CleanupContext; - use sled_storage::manager_test_harness::StorageManagerTestHarness; + use sled_storage::config::MountConfig; use std::net::SocketAddrV6; use std::net::{Ipv4Addr, Ipv6Addr, SocketAddrV4}; use std::str::FromStr; @@ -2409,25 +2410,11 @@ mod tests { .expect("single-stepping mock server failed unexpectedly"); } - async fn setup_storage_manager(log: &Logger) -> StorageManagerTestHarness { - let mut harness = StorageManagerTestHarness::new(log).await; - let raw_disks = - harness.add_vdevs(&["u2_under_test.vdev", "m2_helping.vdev"]).await; - harness.handle().key_manager_ready().await; - let config = harness.make_config(1, &raw_disks); - let _ = harness - .handle() - .omicron_physical_disks_ensure(config.clone()) - .await - .expect("Ensuring disks should work after key manager is ready"); - harness - } - async fn instance_struct( log: &Logger, propolis_addr: SocketAddr, nexus_client: NexusClient, - storage_handle: StorageHandle, + available_datasets_rx: AvailableDatasetsReceiver, temp_dir: &str, ) -> (Instance, MetricsRx) { let id = InstanceUuid::new_v4(); @@ -2439,7 +2426,7 @@ mod tests { let (services, rx) = fake_instance_manager_services( log, - storage_handle, + available_datasets_rx, nexus_client, temp_dir, ) @@ -2523,7 +2510,7 @@ mod tests { async fn fake_instance_manager_services( log: &Logger, - storage_handle: StorageHandle, + available_datasets_rx: AvailableDatasetsReceiver, nexus_client: NexusClient, temp_dir: &str, ) -> (InstanceManagerServices, MetricsRx) { @@ -2540,7 +2527,19 @@ mod tests { let cleanup_context = CleanupContext::default(); let zone_bundler = ZoneBundler::new( log.new(o!("component" => "ZoneBundler")), - storage_handle.clone(), + InternalDisksReceiver::fake_static( + Arc::new(MountConfig::default()), + [( + DiskIdentity { + vendor: "test-vendor".to_string(), + model: "test-model".to_string(), + serial: "test-serial".to_string(), + }, + ZpoolName::new_external(ZpoolUuid::new_v4()), + )] + .into_iter(), + ), + available_datasets_rx.clone(), cleanup_context, ) .await; @@ -2550,7 +2549,7 @@ mod tests { nexus_client, vnic_allocator, port_manager, - storage: storage_handle, + available_datasets_rx, zone_bundler, zone_builder_factory: ZoneBuilderFactory::fake( Some(temp_dir), @@ -2565,7 +2564,6 @@ mod tests { /// interactions with other parts of the system (e.g. Nexus and metrics). #[allow(dead_code)] struct InstanceTestObjects { - storage_harness: StorageManagerTestHarness, nexus: FakeNexusParts, _temp_guard: Utf8TempDir, instance_manager: crate::instance_manager::InstanceManager, @@ -2574,12 +2572,13 @@ mod tests { impl InstanceTestObjects { async fn new(log: &slog::Logger) -> Self { - let storage_harness = setup_storage_manager(log).await; let nexus = FakeNexusParts::new(&log).await; let temp_guard = Utf8TempDir::new().unwrap(); let (services, metrics_rx) = fake_instance_manager_services( log, - storage_harness.handle().clone(), + AvailableDatasetsReceiver::fake_in_tempdir_for_tests( + ZpoolOrRamdisk::Ramdisk, + ), nexus.nexus_client.clone(), temp_guard.path().as_str(), ) @@ -2589,7 +2588,7 @@ mod tests { nexus_client, vnic_allocator, port_manager, - storage, + available_datasets_rx, zone_bundler, zone_builder_factory, metrics_queue, @@ -2603,7 +2602,10 @@ mod tests { nexus_client, vnic_allocator, port_manager, - storage, + CurrentlyManagedZpoolsReceiver::fake_static( + std::iter::empty(), + ), + available_datasets_rx, zone_bundler, zone_builder_factory, vmm_reservoir_manager, @@ -2612,17 +2614,12 @@ mod tests { .unwrap(); Self { - storage_harness, nexus, _temp_guard: temp_guard, instance_manager, metrics_rx, } } - - async fn cleanup(mut self) { - self.storage_harness.cleanup().await; - } } #[tokio::test] @@ -2642,9 +2639,6 @@ mod tests { _nexus_server, } = FakeNexusParts::new(&log).await; - let mut storage_harness = setup_storage_manager(&log).await; - let storage_handle = storage_harness.handle().clone(); - let temp_guard = Utf8TempDir::new().unwrap(); let (inst, mut metrics_rx) = timeout( @@ -2653,7 +2647,9 @@ mod tests { &log, propolis_addr, nexus_client, - storage_handle, + AvailableDatasetsReceiver::fake_in_tempdir_for_tests( + ZpoolOrRamdisk::Ramdisk, + ), temp_guard.path().as_str(), ), ) @@ -2708,7 +2704,6 @@ mod tests { .try_recv() .expect_err("The metrics request queue should have one message"); - storage_harness.cleanup().await; logctx.cleanup_successful(); } @@ -2727,9 +2722,6 @@ mod tests { _nexus_server, } = FakeNexusParts::new(&log).await; - let mut storage_harness = setup_storage_manager(&logctx.log).await; - let storage_handle = storage_harness.handle().clone(); - let temp_guard = Utf8TempDir::new().unwrap(); let (inst, _) = timeout( @@ -2739,7 +2731,9 @@ mod tests { // we want to test propolis not ever coming up SocketAddr::V6(SocketAddrV6::new(Ipv6Addr::LOCALHOST, 1, 0, 0)), nexus_client, - storage_handle, + AvailableDatasetsReceiver::fake_in_tempdir_for_tests( + ZpoolOrRamdisk::Ramdisk, + ), temp_guard.path().as_str(), ), ) @@ -2775,7 +2769,6 @@ mod tests { ); } - storage_harness.cleanup().await; logctx.cleanup_successful(); } @@ -2874,7 +2867,6 @@ mod tests { .try_recv() .expect_err("The metrics request queue should have one message"); - test_objects.cleanup().await; logctx.cleanup_successful(); } @@ -3020,7 +3012,6 @@ mod tests { .expect("timed out waiting for VmmState::Stopped in FakeNexus") .expect("failed to receive FakeNexus' InstanceState"); - test_objects.cleanup().await; logctx.cleanup_successful(); } @@ -3067,7 +3058,7 @@ mod tests { nexus_client, vnic_allocator, port_manager, - storage, + available_datasets_rx, zone_bundler, zone_builder_factory, metrics_queue, @@ -3102,7 +3093,7 @@ mod tests { state: InstanceStates::new(vmm_runtime, migration_id), running_state: None, nexus_client, - storage, + available_datasets_rx, zone_builder_factory, zone_bundler, metrics_queue, @@ -3125,12 +3116,10 @@ mod tests { // them directly. _nexus_server: HttpServer, _dns_server: TransientServer, - storage_harness: StorageManagerTestHarness, } impl TestInstanceRunner { async fn new(log: &slog::Logger) -> Self { - let storage_harness = setup_storage_manager(&log).await; let FakeNexusParts { nexus_client, _nexus_server, @@ -3141,7 +3130,9 @@ mod tests { let temp_guard = Utf8TempDir::new().unwrap(); let (services, _metrics_rx) = fake_instance_manager_services( &log, - storage_harness.handle().clone(), + AvailableDatasetsReceiver::fake_in_tempdir_for_tests( + ZpoolOrRamdisk::Ramdisk, + ), nexus_client, temp_guard.path().as_str(), ) @@ -3183,7 +3174,6 @@ mod tests { remove_rx, _nexus_server, _dns_server, - storage_harness, } } } @@ -3204,7 +3194,6 @@ mod tests { mut remove_rx, _nexus_server, _dns_server, - mut storage_harness, } = TestInstanceRunner::new(&log).await; let (resp_tx, resp_rx) = oneshot::channel(); @@ -3258,7 +3247,6 @@ mod tests { drop(terminate_tx); let _ = runner_task.await; - storage_harness.cleanup().await; logctx.cleanup_successful(); } @@ -3278,7 +3266,6 @@ mod tests { mut remove_rx, _nexus_server, _dns_server, - mut storage_harness, } = TestInstanceRunner::new(&log).await; let (resp_tx, resp_rx) = oneshot::channel(); @@ -3301,7 +3288,6 @@ mod tests { }; assert_eq!(state.vmm_state.state, VmmState::Failed); - storage_harness.cleanup().await; logctx.cleanup_successful(); } } diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index 9bd0ad76497..fa8a11c89d8 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -17,15 +17,14 @@ use illumos_utils::link::VnicAllocator; use illumos_utils::opte::PortManager; use illumos_utils::running_zone::ZoneBuilderFactory; use omicron_common::api::external::ByteCount; -use omicron_common::api::external::Generation; use omicron_common::api::internal::nexus::SledVmmState; use omicron_common::api::internal::shared::SledIdentifiers; use omicron_uuid_kinds::PropolisUuid; +use sled_agent_config_reconciler::AvailableDatasetsReceiver; +use sled_agent_config_reconciler::CurrentlyManagedZpoolsReceiver; use sled_agent_types::instance::*; -use sled_storage::manager::StorageHandle; -use sled_storage::resources::AllDisks; use slog::Logger; -use std::collections::{BTreeMap, HashSet}; +use std::collections::BTreeMap; use std::sync::Arc; use tokio::sync::{mpsc, oneshot}; use uuid::Uuid; @@ -66,10 +65,10 @@ pub(crate) struct InstanceManagerServices { pub nexus_client: NexusClient, pub vnic_allocator: VnicAllocator, pub port_manager: PortManager, - pub storage: StorageHandle, pub zone_bundler: ZoneBundler, pub zone_builder_factory: ZoneBuilderFactory, pub metrics_queue: MetricsRequestQueue, + pub available_datasets_rx: AvailableDatasetsReceiver, } // Describes the internals of the "InstanceManager", though most of the @@ -95,7 +94,8 @@ impl InstanceManager { nexus_client: NexusClient, vnic_allocator: VnicAllocator, port_manager: PortManager, - storage: StorageHandle, + currently_managed_zpools_rx: CurrentlyManagedZpoolsReceiver, + available_datasets_rx: AvailableDatasetsReceiver, zone_bundler: ZoneBundler, vmm_reservoir_manager: VmmReservoirManagerHandle, metrics_queue: MetricsRequestQueue, @@ -105,7 +105,8 @@ impl InstanceManager { nexus_client, vnic_allocator, port_manager, - storage, + currently_managed_zpools_rx, + available_datasets_rx, zone_bundler, ZoneBuilderFactory::new(), vmm_reservoir_manager, @@ -123,7 +124,8 @@ impl InstanceManager { nexus_client: NexusClient, vnic_allocator: VnicAllocator, port_manager: PortManager, - storage: StorageHandle, + currently_managed_zpools_rx: CurrentlyManagedZpoolsReceiver, + available_datasets_rx: AvailableDatasetsReceiver, zone_bundler: ZoneBundler, zone_builder_factory: ZoneBuilderFactory, vmm_reservoir_manager: VmmReservoirManagerHandle, @@ -142,8 +144,8 @@ impl InstanceManager { jobs: BTreeMap::new(), vnic_allocator, port_manager, - storage_generation: None, - storage, + currently_managed_zpools_rx, + available_datasets_rx, zone_bundler, zone_builder_factory, metrics_queue, @@ -315,23 +317,6 @@ impl InstanceManager { .map_err(|_| Error::FailedSendInstanceManagerClosed)?; rx.await? } - - /// Marks instances failed unless they're using storage from `disks`. - /// - /// This function looks for transient zone filesystem usage on expunged - /// zpools. - pub async fn use_only_these_disks( - &self, - disks: AllDisks, - ) -> Result<(), Error> { - let (tx, rx) = oneshot::channel(); - self.inner - .tx - .send(InstanceManagerRequest::OnlyUseDisks { disks, tx }) - .await - .map_err(|_| Error::FailedSendInstanceManagerClosed)?; - rx.await? - } } // Most requests that can be sent to the "InstanceManagerRunner" task. @@ -386,10 +371,6 @@ enum InstanceManagerRequest { propolis_id: PropolisUuid, tx: oneshot::Sender>, }, - OnlyUseDisks { - disks: AllDisks, - tx: oneshot::Sender>, - }, } // Requests that the instance manager stop processing information about a @@ -426,8 +407,8 @@ struct InstanceManagerRunner { vnic_allocator: VnicAllocator, port_manager: PortManager, - storage_generation: Option, - storage: StorageHandle, + currently_managed_zpools_rx: CurrentlyManagedZpoolsReceiver, + available_datasets_rx: AvailableDatasetsReceiver, zone_bundler: ZoneBundler, zone_builder_factory: ZoneBuilderFactory, metrics_queue: MetricsRequestQueue, @@ -458,6 +439,24 @@ impl InstanceManagerRunner { }, } }, + // If the set of currently-managed zpools has changed, shut down + // any instances due to disks that have disappeared out from + // under them. + result = self.currently_managed_zpools_rx.changed() => { + match result { + Ok(()) => { + self.use_only_currently_managed_zpools().await; + } + Err(_) => { + warn!( + self.log, + "InstanceManager's 'current zpools' channel \ + closed; shutting down", + ); + break; + } + } + } request = self.rx.recv() => { let request_variant = request.as_ref().map(|r| r.to_string()); let result = match request { @@ -495,10 +494,6 @@ impl InstanceManagerRunner { // the state... self.get_instance_state(tx, propolis_id) }, - Some(OnlyUseDisks { disks, tx } ) => { - self.use_only_these_disks(disks).await; - tx.send(Ok(())).map_err(|_| Error::FailedSendClientClosed) - }, None => { warn!(self.log, "InstanceManager's request channel closed; shutting down"); break; @@ -607,10 +602,10 @@ impl InstanceManagerRunner { nexus_client: self.nexus_client.clone(), vnic_allocator: self.vnic_allocator.clone(), port_manager: self.port_manager.clone(), - storage: self.storage.clone(), zone_bundler: self.zone_bundler.clone(), zone_builder_factory: self.zone_builder_factory.clone(), metrics_queue: self.metrics_queue.clone(), + available_datasets_rx: self.available_datasets_rx.clone(), }; let state = crate::instance::InstanceInitialState { @@ -760,24 +755,9 @@ impl InstanceManagerRunner { Ok(()) } - async fn use_only_these_disks(&mut self, disks: AllDisks) { - // Consider the generation number on the incoming request to avoid - // applying old requests. - let requested_generation = *disks.generation(); - if let Some(last_gen) = self.storage_generation { - if last_gen >= requested_generation { - // This request looks old, ignore it. - info!(self.log, "use_only_these_disks: Ignoring request"; - "last_gen" => ?last_gen, "requested_gen" => ?requested_generation); - return; - } - } - self.storage_generation = Some(requested_generation); - info!(self.log, "use_only_these_disks: Processing new request"; - "gen" => ?requested_generation); - - let u2_set: HashSet<_> = disks.all_u2_zpools().into_iter().collect(); - + async fn use_only_currently_managed_zpools(&mut self) { + let current_zpools = + self.currently_managed_zpools_rx.current_and_update(); let mut to_remove = vec![]; for (id, instance) in self.jobs.iter() { // If we can read the filesystem pool, consider it. Otherwise, move @@ -792,7 +772,7 @@ impl InstanceManagerRunner { info!(self.log, "use_only_these_disks: Cannot read filesystem pool"; "instance_id" => ?id); continue; }; - if !u2_set.contains(&filesystem_pool) { + if !current_zpools.contains(&filesystem_pool) { to_remove.push(*id); } } diff --git a/sled-agent/src/lib.rs b/sled-agent/src/lib.rs index 3a63ff5c117..4e7c27ea052 100644 --- a/sled-agent/src/lib.rs +++ b/sled-agent/src/lib.rs @@ -28,14 +28,12 @@ mod instance_manager; mod long_running_tasks; mod metrics; mod nexus; -pub mod params; mod probe_manager; mod profile; pub mod rack_setup; pub mod server; pub mod services; mod sled_agent; -mod storage_monitor; mod support_bundle; mod swap_device; mod updates; diff --git a/sled-agent/src/long_running_tasks.rs b/sled-agent/src/long_running_tasks.rs index 216961072e5..e05eb9aa417 100644 --- a/sled-agent/src/long_running_tasks.rs +++ b/sled-agent/src/long_running_tasks.rs @@ -20,39 +20,30 @@ use crate::config::Config; use crate::hardware_monitor::HardwareMonitor; use crate::services::ServiceManager; use crate::sled_agent::SledAgent; -use crate::storage_monitor::{StorageMonitor, StorageMonitorHandle}; use crate::zone_bundle::ZoneBundler; use bootstore::schemes::v0 as bootstore; -use illumos_utils::zpool::ZpoolName; use key_manager::{KeyManager, StorageKeyRequester}; +use sled_agent_config_reconciler::{ + ConfigReconcilerHandle, ConfigReconcilerSpawnToken, RawDisksSender, + TimeSyncConfig, +}; use sled_agent_types::zone_bundle::CleanupContext; -use sled_agent_zone_images::{ZoneImageSourceResolver, ZoneImageZpools}; +use sled_agent_zone_images::ZoneImageSourceResolver; use sled_hardware::{HardwareManager, SledMode, UnparsedDisk}; use sled_storage::config::MountConfig; use sled_storage::disk::RawSyntheticDisk; -use sled_storage::manager::{StorageHandle, StorageManager}; -use sled_storage::resources::AllDisks; use slog::{Logger, info}; use std::net::Ipv6Addr; +use std::sync::Arc; use tokio::sync::oneshot; /// A mechanism for interacting with all long running tasks that can be shared /// between the bootstrap-agent and sled-agent code. #[derive(Clone)] pub struct LongRunningTaskHandles { - /// A mechanism for retrieving storage keys. This interacts with the - /// [`KeyManager`] task. In the future, there may be other handles for - /// retrieving different types of keys. Separating the handles limits the - /// access for a given key type to the code that holds the handle. - pub storage_key_requester: StorageKeyRequester, - - /// A mechanism for talking to the [`StorageManager`] which is responsible - /// for establishing zpools on disks and managing their datasets. - pub storage_manager: StorageHandle, - - /// A mechanism for talking to the [`StorageMonitor`], which reacts to disk - /// changes and updates the dump devices. - pub storage_monitor_handle: StorageMonitorHandle, + /// A handle to the set of tasks managed by the sled-agent-config-reconciler + /// system. + pub config_reconciler: Arc, /// A mechanism for interacting with the hardware device tree pub hardware_manager: HardwareManager, @@ -79,15 +70,24 @@ pub async fn spawn_all_longrunning_tasks( config: &Config, ) -> ( LongRunningTaskHandles, + ConfigReconcilerSpawnToken, oneshot::Sender, oneshot::Sender, ) { let storage_key_requester = spawn_key_manager(log); - let mut storage_manager = - spawn_storage_manager(log, storage_key_requester.clone()); - let storage_monitor_handle = - spawn_storage_monitor(log, storage_manager.clone()); + let time_sync_config = if let Some(true) = config.skip_timesync { + TimeSyncConfig::Skip + } else { + TimeSyncConfig::Normal + }; + let (mut config_reconciler, config_reconciler_spawn_token) = + ConfigReconcilerHandle::new( + MountConfig::default(), + storage_key_requester, + time_sync_config, + log, + ); let nongimlet_observed_disks = config.nongimlet_observed_disks.clone().unwrap_or(vec![]); @@ -95,43 +95,39 @@ pub async fn spawn_all_longrunning_tasks( let hardware_manager = spawn_hardware_manager(log, sled_mode, nongimlet_observed_disks).await; - // Start monitoring for hardware changes + // Start monitoring for hardware changes, adding some synthetic disks if + // necessary. + let raw_disks_tx = config_reconciler.raw_disks_tx(); + upsert_synthetic_disks_if_needed(&log, &raw_disks_tx, &config).await; let (sled_agent_started_tx, service_manager_ready_tx) = - spawn_hardware_monitor(log, &hardware_manager, &storage_manager); - - // Add some synthetic disks if necessary. - upsert_synthetic_disks_if_needed(&log, &storage_manager, &config).await; + spawn_hardware_monitor(log, &hardware_manager, raw_disks_tx); // Wait for the boot disk so that we can work with any ledgers, // such as those needed by the bootstore and sled-agent info!(log, "Waiting for boot disk"); - let (disk_id, boot_zpool) = storage_manager.wait_for_boot_disk().await; - info!(log, "Found boot disk {:?}", disk_id); + let internal_disks = config_reconciler.wait_for_boot_disk().await; + info!(log, "Found boot disk {:?}", internal_disks.boot_disk_id()); - let all_disks = storage_manager.get_latest_disks().await; let bootstore = spawn_bootstore_tasks( log, - &all_disks, + &config_reconciler, &hardware_manager, global_zone_bootstrap_ip, ) .await; - let zone_bundler = - spawn_zone_bundler_tasks(log, &mut storage_manager).await; - let zone_image_resolver = - make_zone_image_resolver(log, &all_disks, &boot_zpool); + let zone_bundler = spawn_zone_bundler_tasks(log, &config_reconciler).await; + let zone_image_resolver = ZoneImageSourceResolver::new(log, internal_disks); ( LongRunningTaskHandles { - storage_key_requester, - storage_manager, - storage_monitor_handle, + config_reconciler: Arc::new(config_reconciler), hardware_manager, bootstore, zone_bundler, zone_image_resolver, }, + config_reconciler_spawn_token, sled_agent_started_tx, service_manager_ready_tx, ) @@ -146,32 +142,6 @@ fn spawn_key_manager(log: &Logger) -> StorageKeyRequester { storage_key_requester } -fn spawn_storage_manager( - log: &Logger, - key_requester: StorageKeyRequester, -) -> StorageHandle { - info!(log, "Starting StorageManager"); - let (manager, handle) = - StorageManager::new(log, MountConfig::default(), key_requester); - tokio::spawn(async move { - manager.run().await; - }); - handle -} - -fn spawn_storage_monitor( - log: &Logger, - storage_handle: StorageHandle, -) -> StorageMonitorHandle { - info!(log, "Starting StorageMonitor"); - let (storage_monitor, handle) = - StorageMonitor::new(log, MountConfig::default(), storage_handle); - tokio::spawn(async move { - storage_monitor.run().await; - }); - handle -} - async fn spawn_hardware_manager( log: &Logger, sled_mode: SledMode, @@ -197,11 +167,11 @@ async fn spawn_hardware_manager( fn spawn_hardware_monitor( log: &Logger, hardware_manager: &HardwareManager, - storage_handle: &StorageHandle, + raw_disks_tx: RawDisksSender, ) -> (oneshot::Sender, oneshot::Sender) { info!(log, "Starting HardwareMonitor"); let (mut monitor, sled_agent_started_tx, service_manager_ready_tx) = - HardwareMonitor::new(log, hardware_manager, storage_handle); + HardwareMonitor::new(log, hardware_manager, raw_disks_tx); tokio::spawn(async move { monitor.run().await; }); @@ -210,12 +180,16 @@ fn spawn_hardware_monitor( async fn spawn_bootstore_tasks( log: &Logger, - all_disks: &AllDisks, + config_reconciler: &ConfigReconcilerHandle, hardware_manager: &HardwareManager, global_zone_bootstrap_ip: Ipv6Addr, ) -> bootstore::NodeHandle { let config = new_bootstore_config( - all_disks, + &config_reconciler + .internal_disks_rx() + .current() + .all_cluster_datasets() + .collect::>(), hardware_manager.baseboard(), global_zone_bootstrap_ip, ) @@ -240,29 +214,22 @@ async fn spawn_bootstore_tasks( // `ZoneBundler::new` spawns a periodic cleanup task that runs indefinitely async fn spawn_zone_bundler_tasks( log: &Logger, - storage_handle: &mut StorageHandle, + config_reconciler: &ConfigReconcilerHandle, ) -> ZoneBundler { info!(log, "Starting ZoneBundler related tasks"); let log = log.new(o!("component" => "ZoneBundler")); - ZoneBundler::new(log, storage_handle.clone(), CleanupContext::default()) - .await -} - -fn make_zone_image_resolver( - log: &Logger, - all_disks: &AllDisks, - boot_zpool: &ZpoolName, -) -> ZoneImageSourceResolver { - let zpools = ZoneImageZpools { - root: &all_disks.mount_config().root, - all_m2_zpools: all_disks.all_m2_zpools(), - }; - ZoneImageSourceResolver::new(log, &zpools, boot_zpool) + ZoneBundler::new( + log, + config_reconciler.internal_disks_rx().clone(), + config_reconciler.available_datasets_rx(), + CleanupContext::default(), + ) + .await } async fn upsert_synthetic_disks_if_needed( log: &Logger, - storage_manager: &StorageHandle, + raw_disks_tx: &RawDisksSender, config: &Config, ) { if let Some(vdevs) = &config.vdevs { @@ -275,7 +242,7 @@ async fn upsert_synthetic_disks_if_needed( let disk = RawSyntheticDisk::load(vdev, i.try_into().unwrap()) .expect("Failed to parse synthetic disk") .into(); - storage_manager.detected_raw_disk(disk).await.await.unwrap(); + raw_disks_tx.add_or_update_raw_disk(disk, log); } } } diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs deleted file mode 100644 index 56411092a70..00000000000 --- a/sled-agent/src/params.rs +++ /dev/null @@ -1,70 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -use nexus_sled_agent_shared::inventory::{OmicronZoneConfig, OmicronZoneType}; -use omicron_common::disk::{DatasetKind, DatasetName}; -pub use sled_hardware::DendriteAsic; -use std::net::SocketAddrV6; - -/// Extension trait for `OmicronZoneType` and `OmicronZoneConfig`. -/// -/// This lives here because it requires extra dependencies that -/// nexus-sled-agent-shared doesn't have. -pub(crate) trait OmicronZoneTypeExt { - fn as_omicron_zone_type(&self) -> &OmicronZoneType; - - /// If this kind of zone has an associated dataset, return the dataset's name. - /// Otherwise, return `None`. - fn dataset_name(&self) -> Option { - self.dataset_name_and_address().map(|(name, _)| name) - } - - /// If this kind of zone has an associated dataset, return the dataset's name - /// and the associated "service address". Otherwise, return `None`. - fn dataset_name_and_address(&self) -> Option<(DatasetName, SocketAddrV6)> { - let (dataset, dataset_kind, address) = match self.as_omicron_zone_type() - { - OmicronZoneType::BoundaryNtp { .. } - | OmicronZoneType::InternalNtp { .. } - | OmicronZoneType::Nexus { .. } - | OmicronZoneType::Oximeter { .. } - | OmicronZoneType::CruciblePantry { .. } => None, - OmicronZoneType::Clickhouse { dataset, address, .. } => { - Some((dataset, DatasetKind::Clickhouse, address)) - } - OmicronZoneType::ClickhouseKeeper { dataset, address, .. } => { - Some((dataset, DatasetKind::ClickhouseKeeper, address)) - } - OmicronZoneType::ClickhouseServer { dataset, address, .. } => { - Some((dataset, DatasetKind::ClickhouseServer, address)) - } - OmicronZoneType::CockroachDb { dataset, address, .. } => { - Some((dataset, DatasetKind::Cockroach, address)) - } - OmicronZoneType::Crucible { dataset, address, .. } => { - Some((dataset, DatasetKind::Crucible, address)) - } - OmicronZoneType::ExternalDns { dataset, http_address, .. } => { - Some((dataset, DatasetKind::ExternalDns, http_address)) - } - OmicronZoneType::InternalDns { dataset, http_address, .. } => { - Some((dataset, DatasetKind::InternalDns, http_address)) - } - }?; - - Some((DatasetName::new(dataset.pool_name, dataset_kind), *address)) - } -} - -impl OmicronZoneTypeExt for OmicronZoneType { - fn as_omicron_zone_type(&self) -> &OmicronZoneType { - self - } -} - -impl OmicronZoneTypeExt for OmicronZoneConfig { - fn as_omicron_zone_type(&self) -> &OmicronZoneType { - &self.zone_type - } -} diff --git a/sled-agent/src/probe_manager.rs b/sled-agent/src/probe_manager.rs index 75729e3a872..c4f97ed22ac 100644 --- a/sled-agent/src/probe_manager.rs +++ b/sled-agent/src/probe_manager.rs @@ -10,8 +10,8 @@ use nexus_client::types::{ BackgroundTasksActivateRequest, ProbeExternalIp, ProbeInfo, }; use omicron_common::api::external::{ - Generation, VpcFirewallRuleAction, VpcFirewallRuleDirection, - VpcFirewallRulePriority, VpcFirewallRuleStatus, + VpcFirewallRuleAction, VpcFirewallRuleDirection, VpcFirewallRulePriority, + VpcFirewallRuleStatus, }; use omicron_common::api::internal::shared::{ NetworkInterface, ResolvedVpcFirewallRule, @@ -19,9 +19,10 @@ use omicron_common::api::internal::shared::{ use omicron_uuid_kinds::{GenericUuid, OmicronZoneUuid}; use rand::SeedableRng; use rand::prelude::IteratorRandom; -use sled_storage::dataset::ZONE_DATASET; -use sled_storage::manager::StorageHandle; -use sled_storage::resources::AllDisks; +use sled_agent_config_reconciler::{ + AvailableDatasetsReceiver, CurrentlyManagedZpools, + CurrentlyManagedZpoolsReceiver, +}; use slog::{Logger, error, warn}; use std::collections::{HashMap, HashSet}; use std::hash::{Hash, Hasher}; @@ -50,7 +51,6 @@ pub(crate) struct ProbeManager { } struct RunningProbes { - storage_generation: Option, zones: HashMap, } @@ -60,10 +60,10 @@ pub(crate) struct ProbeManagerInner { log: Logger, sled_id: Uuid, vnic_allocator: VnicAllocator, - storage: StorageHandle, port_manager: PortManager, metrics_queue: MetricsRequestQueue, running_probes: Mutex, + available_datasets_rx: AvailableDatasetsReceiver, zones_api: Arc, } @@ -73,9 +73,9 @@ impl ProbeManager { sled_id: Uuid, nexus_client: NexusClient, etherstub: Etherstub, - storage: StorageHandle, port_manager: PortManager, metrics_queue: MetricsRequestQueue, + available_datasets_rx: AvailableDatasetsReceiver, log: Logger, ) -> Self { Self { @@ -87,69 +87,24 @@ impl ProbeManager { Arc::new(illumos_utils::dladm::Dladm::real_api()), ), running_probes: Mutex::new(RunningProbes { - storage_generation: None, zones: HashMap::new(), }), nexus_client, log, sled_id, - storage, port_manager, metrics_queue, + available_datasets_rx, zones_api: Arc::new(illumos_utils::zone::Zones::real_api()), }), } } - pub(crate) async fn run(&self) { - self.inner.run().await; - } - - /// Removes any probes using filesystem roots on zpools that are not - /// contained in the set of "disks". - pub(crate) async fn use_only_these_disks(&self, disks: &AllDisks) { - let u2_set: HashSet<_> = disks.all_u2_zpools().into_iter().collect(); - let mut probes = self.inner.running_probes.lock().await; - - // Consider the generation number on the incoming request to avoid - // applying old requests. - let requested_generation = *disks.generation(); - if let Some(last_gen) = probes.storage_generation { - if last_gen >= requested_generation { - // This request looks old, ignore it. - info!(self.inner.log, "use_only_these_disks: Ignoring request"; - "last_gen" => ?last_gen, "requested_gen" => ?requested_generation); - return; - } - } - probes.storage_generation = Some(requested_generation); - info!(self.inner.log, "use_only_these_disks: Processing new request"; - "gen" => ?requested_generation); - - let to_remove = probes - .zones - .iter() - .filter_map(|(id, probe)| { - let probe_pool = match probe.root_zpool() { - ZpoolOrRamdisk::Zpool(zpool_name) => zpool_name, - ZpoolOrRamdisk::Ramdisk => { - info!( - self.inner.log, - "use_only_these_disks: removing probe on ramdisk"; - "id" => ?id, - ); - return None; - } - }; - - if !u2_set.contains(probe_pool) { Some(*id) } else { None } - }) - .collect::>(); - - for probe_id in to_remove { - info!(self.inner.log, "use_only_these_disks: Removing probe"; "probe_id" => ?probe_id); - self.inner.remove_probe_locked(&mut probes, probe_id).await; - } + pub(crate) async fn run( + &self, + currently_managed_zpools_rx: CurrentlyManagedZpoolsReceiver, + ) { + self.inner.run(currently_managed_zpools_rx).await; } } @@ -214,18 +169,55 @@ impl TryFrom for ProbeState { impl ProbeManagerInner { /// Run the probe manager. If it's already running this is a no-op. - async fn run(self: &Arc) { + async fn run( + self: &Arc, + currently_managed_zpools_rx: CurrentlyManagedZpoolsReceiver, + ) { let mut join_handle = self.join_handle.lock().await; if join_handle.is_none() { - *join_handle = Some(self.clone().reconciler()) + *join_handle = + Some(self.clone().reconciler(currently_managed_zpools_rx)) } } /// Run the reconciler loop on a background thread. - fn reconciler(self: Arc) -> JoinHandle<()> { + fn reconciler( + self: Arc, + mut currently_managed_zpools_rx: CurrentlyManagedZpoolsReceiver, + ) -> JoinHandle<()> { tokio::spawn(async move { loop { - sleep(RECONCILIATION_INTERVAL).await; + let sleep_fut = sleep(RECONCILIATION_INTERVAL); + tokio::pin!(sleep_fut); + + // Wait until the next reconciliation tick, but handle any + // changes to the set of disks in the meantime. + loop { + tokio::select! { + _ = &mut sleep_fut => break, + + // Cancel-safe per docs on `changed()` + result = currently_managed_zpools_rx.changed() => { + match result { + Ok(()) => { + self.use_only_these_disks( + ¤tly_managed_zpools_rx + .current_and_update() + ).await; + continue; + } + Err(_) => { + warn!( + self.log, + "ProbeManager's 'current zpools' \ + channel closed; shutting down", + ); + return; + } + } + } + } + } // Collect the target and current state. Use set operations // to determine what probes need to be added, removed and/or @@ -268,6 +260,37 @@ impl ProbeManagerInner { }) } + /// Removes any probes using filesystem roots on zpools that are not + /// contained in the set of "disks". + async fn use_only_these_disks(&self, disks: &CurrentlyManagedZpools) { + let mut probes = self.running_probes.lock().await; + + let to_remove = probes + .zones + .iter() + .filter_map(|(id, probe)| { + let probe_pool = match probe.root_zpool() { + ZpoolOrRamdisk::Zpool(zpool_name) => zpool_name, + ZpoolOrRamdisk::Ramdisk => { + info!( + self.log, + "use_only_these_disks: removing probe on ramdisk"; + "id" => ?id, + ); + return None; + } + }; + + if !disks.contains(probe_pool) { Some(*id) } else { None } + }) + .collect::>(); + + for probe_id in to_remove { + info!(self.log, "use_only_these_disks: Removing probe"; "probe_id" => ?probe_id); + self.remove_probe_locked(&mut probes, probe_id).await; + } + } + /// Add a set of probes to this sled. /// /// Returns the number of inserted probes. @@ -291,12 +314,9 @@ impl ProbeManagerInner { /// boots the probe zone. async fn add_probe(self: &Arc, probe: &ProbeState) -> Result<()> { let mut rng = rand::rngs::StdRng::from_entropy(); - let current_disks = self - .storage - .get_latest_disks() - .await - .all_u2_mountpoints(ZONE_DATASET); - let zone_root_path = current_disks + let zone_root_path = self + .available_datasets_rx + .all_mounted_zone_root_datasets() .into_iter() .choose(&mut rng) .ok_or_else(|| anyhow!("u2 not found"))?; diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 1fa6fb80e62..3b38b57dd71 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -89,7 +89,8 @@ use nexus_client::{ Client as NexusClient, Error as NexusError, types as NexusTypes, }; use nexus_sled_agent_shared::inventory::{ - OmicronSledConfig, OmicronZoneConfig, OmicronZoneType, OmicronZonesConfig, + ConfigReconcilerInventoryResult, OmicronSledConfig, OmicronZoneConfig, + OmicronZoneType, OmicronZonesConfig, }; use nexus_types::deployment::{ Blueprint, BlueprintDatasetConfig, BlueprintDatasetDisposition, @@ -117,6 +118,7 @@ use serde::{Deserialize, Serialize}; use sled_agent_client::{ Client as SledAgentClient, Error as SledAgentError, types as SledAgentTypes, }; +use sled_agent_config_reconciler::InternalDisksReceiver; use sled_agent_types::early_networking::{ EarlyNetworkConfig, EarlyNetworkConfigBody, }; @@ -127,8 +129,6 @@ use sled_agent_types::rack_ops::RssStep; use sled_agent_types::sled::StartSledAgentRequest; use sled_agent_types::time_sync::TimeSync; use sled_hardware_types::underlay::BootstrapInterface; -use sled_storage::dataset::CONFIG_DATASET; -use sled_storage::manager::StorageHandle; use slog::Logger; use slog_error_chain::InlineErrorChain; use std::collections::{BTreeMap, BTreeSet, btree_map}; @@ -201,6 +201,9 @@ pub enum SetupServiceError { #[error("Error making HTTP request to Sled Agent: {0}")] SledApi(#[from] SledAgentError), + #[error("Sled config not yet reconciled: {0}")] + ConfigNotYetReconciled(String), + #[error("Error making HTTP request to Nexus: {0}")] NexusApi(#[from] NexusError), @@ -257,15 +260,14 @@ impl RackSetupService { /// Arguments: /// - `log`: The logger. /// - `config`: The config file, which is used to setup the rack. - /// - `storage_manager`: A handle for interacting with the storage manager - /// task + /// - `internal_disks_rx`: Tells us about available internal disks /// - `local_bootstrap_agent`: Communication channel by which we can send /// commands to our local bootstrap-agent (e.g., to start sled-agents) /// - `bootstore` - A handle to call bootstore APIs pub(crate) fn new( log: Logger, config: Config, - storage_manager: StorageHandle, + internal_disks_rx: InternalDisksReceiver, local_bootstrap_agent: BootstrapAgentHandle, bootstore: bootstore::NodeHandle, step_tx: watch::Sender, @@ -275,7 +277,7 @@ impl RackSetupService { if let Err(e) = svc .run( &config, - &storage_manager, + &internal_disks_rx, local_bootstrap_agent, bootstore, step_tx, @@ -375,86 +377,9 @@ impl ServiceInner { "datasets" => ?sled_config.datasets, "zones" => ?sled_config.zones, ); - let result = client.omicron_config_put(&sled_config).await; - let error = match result { - Ok(response) => { - let response = response.into_inner(); - - // An HTTP OK may contain _partial_ success: check whether - // we got any individual disk failures, and split those out - // into transient/permanent cases based on whether they - // indicate we should retry. - let disk_errors = - response.disks.into_iter().filter_map(|status| { - status.err.map(|err| (status.identity, err)) - }); - let mut transient_errors = Vec::new(); - let mut permanent_errors = Vec::new(); - for (identity, error) in disk_errors { - if error.retryable() { - transient_errors.push(format!( - "Retryable error initializing disk \ - {} / {} / {}: {}", - identity.vendor, - identity.model, - identity.serial, - InlineErrorChain::new(&error) - )); - } else { - permanent_errors.push(format!( - "Non-retryable error initializing disk \ - {} / {} / {}: {}", - identity.vendor, - identity.model, - identity.serial, - InlineErrorChain::new(&error) - )); - } - } - if !permanent_errors.is_empty() { - return Err(BackoffError::permanent( - SetupServiceError::DiskInitializationPermanent { - permanent_errors, - }, - )); - } - if !transient_errors.is_empty() { - return Err(BackoffError::transient( - SetupServiceError::DiskInitializationTransient { - transient_errors, - }, - )); - } - - // No individual disk errors reported; all disks were - // initialized. Check for any dataset errors; these are not - // retryable. - let dataset_errors = response - .datasets - .into_iter() - .filter_map(|status| { - status.err.map(|err| { - format!( - "Error initializing dataset {}: {err}", - status.dataset_name.full_name() - ) - }) - }) - .collect::>(); - if !dataset_errors.is_empty() { - return Err(BackoffError::permanent( - SetupServiceError::DatasetInitialization { - errors: dataset_errors, - }, - )); - } - - // No individual dataset errors reported. We don't get - // status for individual zones (any failure there results in - // an HTTP-level error), so everything is good. - return Ok(()); - } - Err(error) => error, + let Err(error) = client.omicron_config_put(&sled_config).await + else { + return Ok(()); }; if let sled_agent_client::Error::ErrorResponse(response) = &error { @@ -502,6 +427,130 @@ impl ServiceInner { Ok(()) } + // Wait until the config reconciler on the target sled has successfully + // reconciled the config at `generation`. + async fn wait_for_config_reconciliation_on_sled( + &self, + sled_address: SocketAddrV6, + generation: Generation, + ) -> Result<(), SetupServiceError> { + let dur = std::time::Duration::from_secs(60); + let client = reqwest::ClientBuilder::new() + .connect_timeout(dur) + .build() + .map_err(SetupServiceError::HttpClient)?; + let log = self.log.new(o!("sled_address" => sled_address.to_string())); + let client = SledAgentClient::new_with_client( + &format!("http://{}", sled_address), + client, + log.clone(), + ); + + let inv_check = || async { + info!(log, "attempting to read sled's inventory"); + let inventory = match client.inventory().await { + Ok(response) => response.into_inner(), + Err(error) => { + // TODO Many other codes here should not be retried. See + // omicron#4578. + return Err(BackoffError::transient( + SetupServiceError::SledApi(error), + )); + } + }; + + // Has this sled's reconciler run at all? + let Some(last_reconciliation) = inventory.last_reconciliation + else { + return Err(BackoffError::transient( + SetupServiceError::ConfigNotYetReconciled( + "no reconcilation state available".to_string(), + ), + )); + }; + + // Has it attempted to reconcile our target generation? + let reconciled_gen = + last_reconciliation.last_reconciled_config.generation; + + if reconciled_gen < generation { + return Err(BackoffError::transient( + SetupServiceError::ConfigNotYetReconciled(format!( + "reconciled generation {reconciled_gen} lower than \ + desired generation {generation}", + )), + )); + } + + // Were there any errors during reconciliation? Check for disk, + // dataset, and zone errors. + let mut errors = Vec::new(); + + for (disk_id, result) in &last_reconciliation.external_disks { + match result { + ConfigReconcilerInventoryResult::Ok => (), + ConfigReconcilerInventoryResult::Err { message } => { + errors.push(format!( + "reconcilation for disk {disk_id} failed: {message}" + )); + } + } + } + for (dataset_id, result) in &last_reconciliation.datasets { + match result { + ConfigReconcilerInventoryResult::Ok => (), + ConfigReconcilerInventoryResult::Err { message } => { + errors.push(format!( + "reconcilation for dataset {dataset_id} failed: \ + {message}" + )); + } + } + } + for (zone_id, result) in &last_reconciliation.zones { + match result { + ConfigReconcilerInventoryResult::Ok => (), + ConfigReconcilerInventoryResult::Err { message } => { + errors.push(format!( + "reconcilation for zone {zone_id} failed: {message}" + )); + } + } + } + + if errors.is_empty() { + Ok(()) + } else { + // We treat all of these as transient (although it's possible + // some are permanent - we have no means for recovering from + // permanent errors during RSS other than clean-slating and + // starting over, so it's safer to treat these as transient and + // let operators investigate if things are stuck). + Err(BackoffError::transient( + SetupServiceError::ConfigNotYetReconciled( + errors.join(", "), + ), + )) + } + }; + let log_failure = |error, delay| { + warn!( + log, + "sled config not yet reconciled"; + "error" => #%error, + "retry_after" => ?delay, + ); + }; + retry_notify( + retry_policy_internal_service_aggressive(), + inv_check, + log_failure, + ) + .await?; + + Ok(()) + } + // Ensure that the desired sled configuration for a particular zone version // is deployed. // @@ -530,21 +579,27 @@ impl ServiceInner { })? .clone(); + // We bump the zone generation as we step through phases of + // RSS; use that as the overall sled config generation. + let generation = zones_config.generation; let sled_config = OmicronSledConfig { - // We bump the zone generation as we step through phases of - // RSS; use that as the overall sled config generation. - generation: zones_config.generation, + generation, disks: config .disks .iter() .map(|c| c.clone().into()) .collect(), datasets: config.datasets.values().cloned().collect(), - zones: zones_config.zones.iter().cloned().collect(), + zones: zones_config.zones.into_iter().collect(), remove_mupdate_override: None, }; self.set_config_on_sled(*sled_address, sled_config).await?; + self.wait_for_config_reconciliation_on_sled( + *sled_address, + generation, + ) + .await?; Ok::<(), SetupServiceError>(()) }), @@ -1121,7 +1176,7 @@ impl ServiceInner { async fn run( &self, config: &Config, - storage_manager: &StorageHandle, + internal_disks_rx: &InternalDisksReceiver, local_bootstrap_agent: BootstrapAgentHandle, bootstore: bootstore::NodeHandle, step_tx: watch::Sender, @@ -1134,19 +1189,18 @@ impl ServiceInner { config.az_subnet(), )?; - let started_marker_paths: Vec = storage_manager - .get_latest_disks() - .await - .all_m2_mountpoints(CONFIG_DATASET) - .into_iter() + let config_dataset_paths = internal_disks_rx + .current() + .all_config_datasets() + .collect::>(); + + let started_marker_paths: Vec = config_dataset_paths + .iter() .map(|p| p.join(RSS_STARTED_FILENAME)) .collect(); - let completed_marker_paths: Vec = storage_manager - .get_latest_disks() - .await - .all_m2_mountpoints(CONFIG_DATASET) - .into_iter() + let completed_marker_paths: Vec = config_dataset_paths + .iter() .map(|p| p.join(RSS_COMPLETED_FILENAME)) .collect(); diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs index f60f367d280..15a5c2f784d 100644 --- a/sled-agent/src/server.rs +++ b/sled-agent/src/server.rs @@ -12,6 +12,7 @@ use crate::nexus::make_nexus_client; use crate::services::ServiceManager; use internal_dns_resolver::Resolver; use omicron_uuid_kinds::SledUuid; +use sled_agent_config_reconciler::ConfigReconcilerSpawnToken; use sled_agent_types::sled::StartSledAgentRequest; use slog::Logger; use std::net::SocketAddr; @@ -39,6 +40,7 @@ impl Server { log: Logger, request: StartSledAgentRequest, long_running_tasks_handles: LongRunningTaskHandles, + config_reconciler_spawn_token: ConfigReconcilerSpawnToken, services: ServiceManager, ) -> Result { info!(log, "setting up sled agent server"); @@ -61,6 +63,7 @@ impl Server { request, services, long_running_tasks_handles, + config_reconciler_spawn_token, ) .await .map_err(|e| e.to_string())?; diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 84a4e33e2ce..a60da9210db 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -20,12 +20,11 @@ //! of what other services Nexus wants to have executing on the sled. //! //! To accomplish this, the following interfaces are exposed: -//! - [ServiceManager::ensure_all_omicron_zones_persistent] exposes an API to -//! request a set of Omicron zones that should persist beyond reboot. +//! - [ServiceManager::start_omicron_zone] exposes an API to start a new Omicron +//! zone. //! - [ServiceManager::activate_switch] exposes an API to specifically enable //! or disable (via [ServiceManager::deactivate_switch]) the switch zone. -use crate::artifact_store::ArtifactStore; use crate::bootstrap::BootstrapNetworking; use crate::bootstrap::early_networking::{ EarlyNetworkSetup, EarlyNetworkSetupError, @@ -33,9 +32,7 @@ use crate::bootstrap::early_networking::{ use crate::config::SidecarRevision; use crate::ddm_reconciler::DdmReconciler; use crate::metrics::MetricsRequestQueue; -use crate::params::{DendriteAsic, OmicronZoneTypeExt}; use crate::profile::*; -use crate::zone_bundle::ZoneBundler; use anyhow::anyhow; use camino::{Utf8Path, Utf8PathBuf}; use clickhouse_admin_types::CLICKHOUSE_KEEPER_CONFIG_DIR; @@ -65,11 +62,9 @@ use illumos_utils::{PFEXEC, execute}; use internal_dns_resolver::Resolver; use internal_dns_types::names::BOUNDARY_NTP_DNS_NAME; use internal_dns_types::names::DNS_ZONE; -use itertools::Itertools; use nexus_config::{ConfigDropshotWithTls, DeploymentConfig}; use nexus_sled_agent_shared::inventory::{ - OmicronZoneConfig, OmicronZoneImageSource, OmicronZoneType, - OmicronZonesConfig, ZoneKind, + OmicronZoneConfig, OmicronZoneImageSource, OmicronZoneType, ZoneKind, }; use omicron_common::address::AZ_PREFIX; use omicron_common::address::DENDRITE_PORT; @@ -94,41 +89,30 @@ use omicron_common::backoff::{ BackoffError, retry_notify, retry_policy_internal_service_aggressive, }; use omicron_common::disk::{DatasetKind, DatasetName}; -use omicron_common::ledger::{self, Ledger, Ledgerable}; use omicron_ddm_admin_client::DdmError; use omicron_uuid_kinds::OmicronZoneUuid; -use rand::prelude::SliceRandom; -use sled_agent_types::{ - sled::SWITCH_ZONE_BASEBOARD_FILE, time_sync::TimeSync, - zone_bundle::ZoneBundleCause, -}; -use sled_agent_zone_images::{ZoneImageSourceResolver, ZoneImageZpools}; +use sled_agent_config_reconciler::InternalDisksReceiver; +use sled_agent_types::sled::SWITCH_ZONE_BASEBOARD_FILE; +use sled_agent_zone_images::ZoneImageSourceResolver; +use sled_hardware::DendriteAsic; use sled_hardware::SledMode; use sled_hardware::is_gimlet; use sled_hardware::underlay; use sled_hardware_types::Baseboard; -use sled_storage::config::MountConfig; -use sled_storage::dataset::{CONFIG_DATASET, ZONE_DATASET}; -use sled_storage::manager::StorageHandle; use slog::Logger; use slog_error_chain::InlineErrorChain; -use std::collections::BTreeMap; -use std::collections::HashSet; use std::net::{IpAddr, Ipv6Addr, SocketAddr}; -use std::str::FromStr; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::{Arc, OnceLock}; use tokio::io::AsyncWriteExt; use tokio::sync::Mutex; -use tokio::sync::{MutexGuard, oneshot}; +use tokio::sync::oneshot; use tokio::task::JoinHandle; use tufaceous_artifact::ArtifactHash; use uuid::Uuid; use illumos_utils::zone::Zones; -const IPV6_UNSPECIFIED: IpAddr = IpAddr::V6(Ipv6Addr::UNSPECIFIED); - // These are all the same binary. They just reside at different paths. const CLICKHOUSE_SERVER_BINARY: &str = "/opt/oxide/clickhouse_server/clickhouse"; @@ -157,15 +141,9 @@ pub enum Error { #[error("Failed to find device {device}")] MissingDevice { device: String }, - #[error("Failed to access ledger: {0}")] - Ledger(#[from] ledger::Error), - #[error("Sled Agent not initialized yet")] SledAgentNotReady, - #[error("No U.2 devices found with a {ZONE_DATASET} mountpoint")] - U2NotFound, - #[error("Switch zone error: {0}")] SwitchZone(anyhow::Error), @@ -208,9 +186,6 @@ pub enum Error { #[error(transparent)] ZoneInstall(#[from] illumos_utils::running_zone::InstallZoneError), - #[error("Failed to initialize zones: {errors:?}")] - ZoneEnsure { errors: Vec<(String, Error)> }, - #[error("Error contacting ddmd: {0}")] DdmError(#[from] DdmError), @@ -241,34 +216,9 @@ pub enum Error { #[error("Failed to get address: {0}")] GetAddressFailure(#[from] illumos_utils::zone::GetAddressError), - #[error( - "Failed to launch zone {zone} because ZFS value cannot be accessed" - )] - GetZfsValue { - zone: String, - #[source] - source: illumos_utils::zfs::GetValueError, - }, - - #[error( - "Cannot launch {zone} with {dataset} (saw {prop_name} = {prop_value}, expected {prop_value_expected})" - )] - DatasetNotReady { - zone: String, - dataset: String, - prop_name: String, - prop_value: String, - prop_value_expected: String, - }, - #[error("NTP zone not ready")] NtpZoneNotReady, - // This isn't exactly "NtpZoneNotReady" -- it can happen when the NTP zone - // is up, but time is still in the process of synchronizing. - #[error("Time not yet synchronized")] - TimeNotSynchronized, - #[error("Execution error: {0}")] ExecutionError(#[from] illumos_utils::ExecutionError), @@ -347,39 +297,6 @@ impl From for omicron_common::api::external::Error { Error::RequestedZoneConfigOutdated { .. } => { omicron_common::api::external::Error::conflict(&err.to_string()) } - Error::TimeNotSynchronized => { - omicron_common::api::external::Error::unavail(&err.to_string()) - } - Error::ZoneEnsure { errors } => { - // As a special case, if any zones failed to timesync, - // prioritize that error. - // - // This conversion to a 503 error was requested in - // https://github.com/oxidecomputer/omicron/issues/4776 , - // and we preserve that behavior here, even though we may - // launch many zones at the same time. - if let Some(err) = errors.iter().find_map(|(_, err)| { - if matches!(err, Error::TimeNotSynchronized) { - Some(err) - } else { - None - } - }) { - omicron_common::api::external::Error::unavail( - &err.to_string(), - ) - } else { - let internal_message = errors - .iter() - .map(|(name, err)| { - format!("failed to start {name}: {err:?}") - }) - .join("\n"); - omicron_common::api::external::Error::InternalError { - internal_message, - } - } - } _ => omicron_common::api::external::Error::InternalError { internal_message: err.to_string(), }, @@ -387,14 +304,6 @@ impl From for omicron_common::api::external::Error { } } -/// Result of [ServiceManager::load_services] -pub enum LoadServicesResult { - /// We didn't load anything, there wasn't anything to load - NoServicesToLoad, - /// We successfully loaded the zones from our ledger. - ServicesLoaded, -} - fn display_zone_init_errors(errors: &[(String, Box)]) -> String { if errors.len() == 1 { return format!( @@ -513,99 +422,6 @@ impl RealSystemApi { impl SystemApi for RealSystemApi {} -// The filename of the ledger, within the provided directory. -const ZONES_LEDGER_FILENAME: &str = "omicron-zones.json"; - -/// Combines the Nexus-provided `OmicronZonesConfig` (which describes what Nexus -/// wants for all of its zones) with the locally-determined configuration for -/// these zones. -#[derive( - Clone, - Debug, - Eq, - PartialEq, - serde::Serialize, - serde::Deserialize, - schemars::JsonSchema, -)] -pub struct OmicronZonesConfigLocal { - /// generation of the Omicron-provided part of the configuration - /// - /// This generation number is outside of Sled Agent's control. We store - /// exactly what we were given and use this number to decide when to - /// fail requests to establish an outdated configuration. - /// - /// You can think of this as a major version number, with - /// `ledger_generation` being a minor version number. See - /// `is_newer_than()`. - pub omicron_generation: Generation, - - /// ledger-managed generation number - /// - /// This generation is managed by the ledger facility itself. It's bumped - /// whenever we write a new ledger. In practice, we don't currently have - /// any reason to bump this _for a given Omicron generation_ so it's - /// somewhat redundant. In principle, if we needed to modify the ledgered - /// configuration due to some event that doesn't change the Omicron config - /// (e.g., if we wanted to move the root filesystem to a different path), we - /// could do that by bumping this generation. - pub ledger_generation: Generation, - pub zones: Vec, -} - -impl Ledgerable for OmicronZonesConfigLocal { - fn is_newer_than(&self, other: &OmicronZonesConfigLocal) -> bool { - self.omicron_generation > other.omicron_generation - || (self.omicron_generation == other.omicron_generation - && self.ledger_generation >= other.ledger_generation) - } - - fn generation_bump(&mut self) { - self.ledger_generation = self.ledger_generation.next(); - } -} - -impl OmicronZonesConfigLocal { - /// Returns the initial configuration for generation 1, which has no zones - pub fn initial() -> OmicronZonesConfigLocal { - OmicronZonesConfigLocal { - omicron_generation: Generation::new(), - ledger_generation: Generation::new(), - zones: vec![], - } - } - - pub fn to_omicron_zones_config(self) -> OmicronZonesConfig { - OmicronZonesConfig { - generation: self.omicron_generation, - zones: self.zones.into_iter().map(|z| z.zone).collect(), - } - } -} - -/// Combines the Nexus-provided `OmicronZoneConfig` (which describes what Nexus -/// wants for this zone) with any locally-determined configuration (like the -/// path to the root filesystem) -// -// NOTE: Although the path to the root filesystem is not exactly equal to the -// ZpoolName, it is derivable from it, and the ZpoolName for the root filesystem -// is now being supplied as a part of OmicronZoneConfig. Therefore, this struct -// is less necessary than it has been historically. -#[derive( - Clone, - Debug, - Eq, - PartialEq, - serde::Serialize, - serde::Deserialize, - schemars::JsonSchema, -)] -pub struct OmicronZoneConfigLocal { - pub zone: OmicronZoneConfig, - #[schemars(with = "String")] - pub root: Utf8PathBuf, -} - /// Describes how we want a switch zone to be configured /// /// This is analogous to `OmicronZoneConfig`, but for the switch zone (which is @@ -660,7 +476,7 @@ impl illumos_utils::smf_helper::Service for SwitchService { /// Describes either an Omicron-managed zone or the switch zone, used for /// functions that operate on either one or the other enum ZoneArgs<'a> { - Omicron(&'a OmicronZoneConfigLocal), + Omicron(&'a OmicronZoneConfig), Switch(&'a SwitchZoneConfig), } @@ -668,7 +484,7 @@ impl<'a> ZoneArgs<'a> { /// If this is an Omicron zone, return its type pub fn omicron_type(&self) -> Option<&'a OmicronZoneType> { match self { - ZoneArgs::Omicron(zone_config) => Some(&zone_config.zone.zone_type), + ZoneArgs::Omicron(zone_config) => Some(&zone_config.zone_type), ZoneArgs::Switch(_) => None, } } @@ -729,57 +545,23 @@ enum SwitchZoneState { }, } -// The return type for `start_omicron_zones`. -// -// When multiple zones are started concurrently, some can fail while others -// succeed. This structure allows the function to return this nuanced -// information. -#[must_use] -struct StartZonesResult { - // The set of zones which have successfully started. - new_zones: Vec, - - // The set of (zone name, error) of zones that failed to start. - errors: Vec<(String, Error)>, -} - -// A running zone and the configuration which started it. -#[derive(Debug)] -struct OmicronZone { - runtime: RunningZone, - config: OmicronZoneConfigLocal, -} - -impl OmicronZone { - fn name(&self) -> &str { - self.runtime.name() - } -} - -type ZoneMap = BTreeMap; - /// Manages miscellaneous Sled-local services. pub struct ServiceManagerInner { log: Logger, global_zone_bootstrap_link_local_address: Ipv6Addr, switch_zone: Mutex, sled_mode: SledMode, - time_sync_config: TimeSyncConfig, time_synced: AtomicBool, switch_zone_maghemite_links: Vec, sidecar_revision: SidecarRevision, - // Zones representing running services - zones: Mutex, underlay_vnic_allocator: VnicAllocator, underlay_vnic: EtherstubVnic, bootstrap_vnic_allocator: VnicAllocator, ddm_reconciler: DdmReconciler, sled_info: OnceLock, switch_zone_bootstrap_address: Ipv6Addr, - storage: StorageHandle, - zone_bundler: ZoneBundler, zone_image_resolver: ZoneImageSourceResolver, - ledger_directory_override: OnceLock, + internal_disks_rx: InternalDisksReceiver, system_api: Box, } @@ -795,16 +577,6 @@ struct SledAgentInfo { metrics_queue: MetricsRequestQueue, } -pub(crate) enum TimeSyncConfig { - // Waits for NTP to confirm that time has been synchronized. - Normal, - // Skips timesync unconditionally. - Skip, - // Fails timesync unconditionally. - #[cfg(all(test, target_os = "illumos"))] - Fail, -} - #[derive(Clone)] pub struct ServiceManager { inner: Arc, @@ -909,39 +681,35 @@ impl ServiceManager { /// /// Args: /// - `log`: The logger - /// - `ddm_client`: Client pointed to our localhost ddmd + /// - `ddm_reconciler`: Handle for configuring our localhost ddmd /// - `bootstrap_networking`: Collection of etherstubs/VNICs set up when /// bootstrap agent begins /// - `sled_mode`: The sled's mode of operation (Gimlet vs Scrimlet). - /// - `time_sync_config`: Describes how the sled awaits synced time. /// - `sidecar_revision`: Rev of attached sidecar, if present. /// - `switch_zone_maghemite_links`: List of physical links on which /// maghemite should listen. - /// - `storage`: Shared handle to get the current state of disks/zpools. + /// - `zone_image_resolver`: how to find Omicron zone images + /// - `internal_disks_rx`: watch channel for changes to internal disks #[allow(clippy::too_many_arguments)] pub(crate) fn new( log: &Logger, ddm_reconciler: DdmReconciler, bootstrap_networking: BootstrapNetworking, sled_mode: SledMode, - time_sync_config: TimeSyncConfig, sidecar_revision: SidecarRevision, switch_zone_maghemite_links: Vec, - storage: StorageHandle, - zone_bundler: ZoneBundler, zone_image_resolver: ZoneImageSourceResolver, + internal_disks_rx: InternalDisksReceiver, ) -> Self { Self::new_inner( log, ddm_reconciler, bootstrap_networking, sled_mode, - time_sync_config, sidecar_revision, switch_zone_maghemite_links, - storage, - zone_bundler, zone_image_resolver, + internal_disks_rx, RealSystemApi::new(), ) } @@ -952,12 +720,10 @@ impl ServiceManager { ddm_reconciler: DdmReconciler, bootstrap_networking: BootstrapNetworking, sled_mode: SledMode, - time_sync_config: TimeSyncConfig, sidecar_revision: SidecarRevision, switch_zone_maghemite_links: Vec, - storage: StorageHandle, - zone_bundler: ZoneBundler, zone_image_resolver: ZoneImageSourceResolver, + internal_disks_rx: InternalDisksReceiver, system_api: Box, ) -> Self { let log = log.new(o!("component" => "ServiceManager")); @@ -971,11 +737,9 @@ impl ServiceManager { // Load the switch zone if it already exists? switch_zone: Mutex::new(SwitchZoneState::Disabled), sled_mode, - time_sync_config, time_synced: AtomicBool::new(false), sidecar_revision, switch_zone_maghemite_links, - zones: Mutex::new(BTreeMap::new()), underlay_vnic_allocator: VnicAllocator::new( "Service", bootstrap_networking.underlay_etherstub, @@ -991,25 +755,13 @@ impl ServiceManager { sled_info: OnceLock::new(), switch_zone_bootstrap_address: bootstrap_networking .switch_zone_bootstrap_ip, - storage, - zone_bundler, zone_image_resolver, - ledger_directory_override: OnceLock::new(), + internal_disks_rx, system_api, }), } } - #[cfg(all(test, target_os = "illumos"))] - fn override_ledger_directory(&self, path: Utf8PathBuf) { - self.inner.ledger_directory_override.set(path).unwrap(); - } - - #[cfg(all(test, target_os = "illumos"))] - fn override_image_directory(&self, path: Utf8PathBuf) { - self.inner.zone_image_resolver.override_image_directory(path); - } - pub(crate) fn ddm_reconciler(&self) -> &DdmReconciler { &self.inner.ddm_reconciler } @@ -1018,126 +770,6 @@ impl ServiceManager { self.inner.switch_zone_bootstrap_address } - // TODO: This function refers to an old, deprecated format for storing - // service information. It is not deprecated for cleanup purposes, but - // should otherwise not be called in new code. - async fn all_service_ledgers(&self) -> Vec { - pub const SERVICES_LEDGER_FILENAME: &str = "services.json"; - if let Some(dir) = self.inner.ledger_directory_override.get() { - return vec![dir.join(SERVICES_LEDGER_FILENAME)]; - } - let resources = self.inner.storage.get_latest_disks().await; - resources - .all_m2_mountpoints(CONFIG_DATASET) - .into_iter() - .map(|p| p.join(SERVICES_LEDGER_FILENAME)) - .collect() - } - - async fn all_omicron_zone_ledgers(&self) -> Vec { - if let Some(dir) = self.inner.ledger_directory_override.get() { - return vec![dir.join(ZONES_LEDGER_FILENAME)]; - } - let resources = self.inner.storage.get_latest_disks().await; - resources - .all_m2_mountpoints(CONFIG_DATASET) - .into_iter() - .map(|p| p.join(ZONES_LEDGER_FILENAME)) - .collect() - } - - // Loads persistent configuration about any Omicron-managed zones that we're - // supposed to be running. - async fn load_ledgered_zones( - &self, - // This argument attempts to ensure that the caller holds the right - // lock. - _map: &MutexGuard<'_, ZoneMap>, - ) -> Result>, Error> { - let log = &self.inner.log; - - // NOTE: This is a function where we used to access zones by "service - // ledgers". This format has since been deprecated, and these files, - // if they exist, should not be used. - // - // We try to clean them up at this spot. Deleting this "removal" code - // in the future should be a safe operation; this is a non-load-bearing - // cleanup. - for path in self.all_service_ledgers().await { - match tokio::fs::remove_file(&path).await { - Ok(_) => (), - Err(ref e) if e.kind() == std::io::ErrorKind::NotFound => (), - Err(e) => { - warn!( - log, - "Failed to delete old service ledger"; - "err" => ?e, - "path" => ?path, - ); - } - } - } - - // Try to load the current software's zone ledger - let ledger_paths = self.all_omicron_zone_ledgers().await; - info!(log, "Loading Omicron zones from: {ledger_paths:?}"); - let maybe_ledger = - Ledger::::new(log, ledger_paths.clone()) - .await; - - let Some(ledger) = maybe_ledger else { - info!(log, "Loading Omicron zones - No zones detected"); - return Ok(None); - }; - - info!( - log, - "Loaded Omicron zones"; - "zones_config" => ?ledger.data() - ); - Ok(Some(ledger)) - } - - // TODO(https://github.com/oxidecomputer/omicron/issues/2973): - // - // The sled agent retries this function indefinitely at the call-site, but - // we could be smarter. - // - // - If we know that disks are missing, we could wait for them - // - We could permanently fail if we are able to distinguish other errors - // more clearly. - pub async fn load_services(&self) -> Result { - let log = &self.inner.log; - let mut existing_zones = self.inner.zones.lock().await; - let Some(mut ledger) = - self.load_ledgered_zones(&existing_zones).await? - else { - // Nothing found -- nothing to do. - info!( - log, - "Loading Omicron zones - \ - no zones nor old-format services found" - ); - return Ok(LoadServicesResult::NoServicesToLoad); - }; - - let zones_config = ledger.data_mut(); - info!( - log, - "Loaded Omicron zones"; - "zones_config" => ?zones_config - ); - let omicron_zones_config = - zones_config.clone().to_omicron_zones_config(); - - self.ensure_all_omicron_zones( - &mut existing_zones, - omicron_zones_config, - ) - .await?; - Ok(LoadServicesResult::ServicesLoaded) - } - /// Sets up "Sled Agent" information, including underlay info. /// /// Any subsequent calls after the first invocation return an error. @@ -1706,12 +1338,11 @@ impl ServiceManager { // dataset into the zone. Additionally, construct a "unique enough" name // so we can create multiple zones of this type without collision. let unique_name = match &request { - ZoneArgs::Omicron(zone_config) => Some(zone_config.zone.id), + ZoneArgs::Omicron(zone_config) => Some(zone_config.id), ZoneArgs::Switch(_) => None, }; let datasets: Vec<_> = match &request { ZoneArgs::Omicron(zone_config) => zone_config - .zone .dataset_name() .map(|n| zone::Dataset { name: n.full_name() }) .into_iter() @@ -1731,25 +1362,17 @@ impl ServiceManager { // are falling back to searching `/opt/oxide` in addition to the install // datasets. let image_source = match &request { - ZoneArgs::Omicron(zone_config) => &zone_config.zone.image_source, + ZoneArgs::Omicron(zone_config) => &zone_config.image_source, ZoneArgs::Switch(_) => &OmicronZoneImageSource::InstallDataset, }; - let all_disks = self.inner.storage.get_latest_disks().await; - let zpools = ZoneImageZpools { - root: &all_disks.mount_config().root, - all_m2_zpools: all_disks.all_m2_zpools(), - }; - let boot_zpool = - all_disks.boot_disk().map(|(_, boot_zpool)| boot_zpool); let file_source = self.inner.zone_image_resolver.file_source_for( image_source, - &zpools, - boot_zpool.as_ref(), + self.inner.internal_disks_rx.current(), ); let zone_type_str = match &request { ZoneArgs::Omicron(zone_config) => { - zone_config.zone.zone_type.kind().zone_prefix() + zone_config.zone_type.kind().zone_prefix() } ZoneArgs::Switch(_) => "switch", }; @@ -1800,12 +1423,8 @@ impl ServiceManager { .add_instance(ServiceInstanceBuilder::new("default")); let running_zone = match &request { - ZoneArgs::Omicron(OmicronZoneConfigLocal { - zone: - OmicronZoneConfig { - zone_type: OmicronZoneType::Clickhouse { address, .. }, - .. - }, + ZoneArgs::Omicron(OmicronZoneConfig { + zone_type: OmicronZoneType::Clickhouse { address, .. }, .. }) => { let Some(info) = self.inner.sled_info.get() else { @@ -1889,13 +1508,8 @@ impl ServiceManager { RunningZone::boot(installed_zone).await? } - ZoneArgs::Omicron(OmicronZoneConfigLocal { - zone: - OmicronZoneConfig { - zone_type: - OmicronZoneType::ClickhouseServer { address, .. }, - .. - }, + ZoneArgs::Omicron(OmicronZoneConfig { + zone_type: OmicronZoneType::ClickhouseServer { address, .. }, .. }) => { let Some(info) = self.inner.sled_info.get() else { @@ -1979,13 +1593,8 @@ impl ServiceManager { RunningZone::boot(installed_zone).await? } - ZoneArgs::Omicron(OmicronZoneConfigLocal { - zone: - OmicronZoneConfig { - zone_type: - OmicronZoneType::ClickhouseKeeper { address, .. }, - .. - }, + ZoneArgs::Omicron(OmicronZoneConfig { + zone_type: OmicronZoneType::ClickhouseKeeper { address, .. }, .. }) => { let Some(info) = self.inner.sled_info.get() else { @@ -2062,13 +1671,9 @@ impl ServiceManager { RunningZone::boot(installed_zone).await? } - ZoneArgs::Omicron(OmicronZoneConfigLocal { - zone: - OmicronZoneConfig { - id: zone_id, - zone_type: OmicronZoneType::CockroachDb { address, .. }, - .. - }, + ZoneArgs::Omicron(OmicronZoneConfig { + id: zone_id, + zone_type: OmicronZoneType::CockroachDb { address, .. }, .. }) => { let Some(info) = self.inner.sled_info.get() else { @@ -2133,13 +1738,8 @@ impl ServiceManager { RunningZone::boot(installed_zone).await? } - ZoneArgs::Omicron(OmicronZoneConfigLocal { - zone: - OmicronZoneConfig { - zone_type: - OmicronZoneType::Crucible { address, dataset }, - .. - }, + ZoneArgs::Omicron(OmicronZoneConfig { + zone_type: OmicronZoneType::Crucible { address, dataset }, .. }) => { let Some(info) = self.inner.sled_info.get() else { @@ -2191,12 +1791,8 @@ impl ServiceManager { RunningZone::boot(installed_zone).await? } - ZoneArgs::Omicron(OmicronZoneConfigLocal { - zone: - OmicronZoneConfig { - zone_type: OmicronZoneType::CruciblePantry { address }, - .. - }, + ZoneArgs::Omicron(OmicronZoneConfig { + zone_type: OmicronZoneType::CruciblePantry { address }, .. }) => { let Some(info) = self.inner.sled_info.get() else { @@ -2238,13 +1834,9 @@ impl ServiceManager { .map_err(|err| Error::io("crucible pantry profile", err))?; RunningZone::boot(installed_zone).await? } - ZoneArgs::Omicron(OmicronZoneConfigLocal { - zone: - OmicronZoneConfig { - id, - zone_type: OmicronZoneType::Oximeter { address }, - .. - }, + ZoneArgs::Omicron(OmicronZoneConfig { + id, + zone_type: OmicronZoneType::Oximeter { address }, .. }) => { let Some(info) = self.inner.sled_info.get() else { @@ -2279,16 +1871,12 @@ impl ServiceManager { })?; RunningZone::boot(installed_zone).await? } - ZoneArgs::Omicron(OmicronZoneConfigLocal { - zone: - OmicronZoneConfig { - zone_type: - OmicronZoneType::ExternalDns { - http_address, - dns_address, - nic, - .. - }, + ZoneArgs::Omicron(OmicronZoneConfig { + zone_type: + OmicronZoneType::ExternalDns { + http_address, + dns_address, + nic, .. }, .. @@ -2342,17 +1930,13 @@ impl ServiceManager { })?; RunningZone::boot(installed_zone).await? } - ZoneArgs::Omicron(OmicronZoneConfigLocal { - zone: - OmicronZoneConfig { - zone_type: - OmicronZoneType::BoundaryNtp { - address, - dns_servers, - ntp_servers, - domain, - .. - }, + ZoneArgs::Omicron(OmicronZoneConfig { + zone_type: + OmicronZoneType::BoundaryNtp { + address, + dns_servers, + ntp_servers, + domain, .. }, .. @@ -2430,12 +2014,8 @@ impl ServiceManager { RunningZone::boot(installed_zone).await? } - ZoneArgs::Omicron(OmicronZoneConfigLocal { - zone: - OmicronZoneConfig { - zone_type: OmicronZoneType::InternalNtp { address }, - .. - }, + ZoneArgs::Omicron(OmicronZoneConfig { + zone_type: OmicronZoneType::InternalNtp { address }, .. }) => { let Some(info) = self.inner.sled_info.get() else { @@ -2491,17 +2071,13 @@ impl ServiceManager { RunningZone::boot(installed_zone).await? } - ZoneArgs::Omicron(OmicronZoneConfigLocal { - zone: - OmicronZoneConfig { - zone_type: - OmicronZoneType::InternalDns { - http_address, - dns_address, - gz_address, - gz_address_index, - .. - }, + ZoneArgs::Omicron(OmicronZoneConfig { + zone_type: + OmicronZoneType::InternalDns { + http_address, + dns_address, + gz_address, + gz_address_index, .. }, .. @@ -2586,19 +2162,15 @@ impl ServiceManager { })?; RunningZone::boot(installed_zone).await? } - ZoneArgs::Omicron(OmicronZoneConfigLocal { - zone: - OmicronZoneConfig { - zone_type: - OmicronZoneType::Nexus { - internal_address, - external_tls, - external_dns_servers, - .. - }, - id, + ZoneArgs::Omicron(OmicronZoneConfig { + zone_type: + OmicronZoneType::Nexus { + internal_address, + external_tls, + external_dns_servers, .. }, + id, .. }) => { let Some(info) = self.inner.sled_info.get() else { @@ -3478,55 +3050,24 @@ impl ServiceManager { Ok(running_zone) } - // Ensures that a single Omicron zone is running. + // Attempt to start a single Omicron zone. // // This method is NOT idempotent. // - // - If the zone already exists, in any form, it is fully removed - // before being initialized. This is primarily intended to remove "partially - // stopped/started" zones with detritus from interfering with a new zone - // being launched. - // - If zones need time to be synchronized before they are initialized - // (e.g., this is a hard requirement for CockroachDb) they can check the - // `time_is_synchronized` argument. - // - `all_u2_pools` provides a snapshot into durable storage on this sled, - // which gives the storage manager an opportunity to validate the zone's - // storage configuration against the reality of the current sled. - async fn start_omicron_zone( + // Callers must do any "pre-zone-start" validation, including: + // + // * No other zone of this same name is still running + // * Time is synchronized, if the zone requires it + // * Any datasets the zone depends on exist and have been configured and/or + // mounted appropriately + pub(crate) async fn start_omicron_zone( &self, - mount_config: &MountConfig, zone: &OmicronZoneConfig, - time_is_synchronized: bool, - all_u2_pools: &Vec, - ) -> Result { - // Ensure the zone has been fully removed before we try to boot it. - // - // This ensures that old "partially booted/stopped" zones do not - // interfere with our installation. - self.ensure_removed(&zone).await?; - - // If this zone requires timesync and we aren't ready, fail it early. - if zone.zone_type.requires_timesync() && !time_is_synchronized { - return Err(Error::TimeNotSynchronized); - } - - // Ensure that this zone's storage is ready. - let zone_root_path = self - .validate_storage_and_pick_mountpoint( - mount_config, - &zone, - &all_u2_pools, - ) - .await?; - - let config = OmicronZoneConfigLocal { - zone: zone.clone(), - root: zone_root_path.path.clone(), - }; - + zone_root_path: PathInPool, + ) -> Result { let runtime = self .initialize_zone( - ZoneArgs::Omicron(&config), + ZoneArgs::Omicron(zone), zone_root_path, // filesystems= &[], @@ -3535,698 +3076,82 @@ impl ServiceManager { ) .await?; - Ok(OmicronZone { runtime, config }) + Ok(runtime) } - // Concurrently attempts to start all zones identified by requests. - // - // This method is NOT idempotent. - // - // If we try to start ANY zones concurrently, the result is contained - // in the `StartZonesResult` value. This will contain the set of zones which - // were initialized successfully, as well as the set of zones which failed - // to start. - async fn start_omicron_zones( - &self, - mount_config: &MountConfig, - requests: impl Iterator + Clone, - time_is_synchronized: bool, - all_u2_pools: &Vec, - ) -> Result { - if let Some(name) = - requests.clone().map(|zone| zone.zone_name()).duplicates().next() - { - return Err(Error::BadServiceRequest { - service: name, - message: "Should not initialize zone twice".to_string(), - }); - } - - let futures = requests.map(|zone| async move { - self.start_omicron_zone( - mount_config, - &zone, - time_is_synchronized, - all_u2_pools, - ) - .await - .map_err(|err| (zone.zone_name(), err)) - }); - - let results = futures::future::join_all(futures).await; - - let mut new_zones = Vec::new(); - let mut errors = Vec::new(); - for result in results { - match result { - Ok(zone) => { - info!(self.inner.log, "Zone started"; "zone" => zone.name()); - new_zones.push(zone); - } - Err((name, error)) => { - warn!(self.inner.log, "Zone failed to start"; "zone" => &name); - errors.push((name, error)) - } - } + /// Adjust the system boot time to the latest boot time of all zones. + fn boottime_rewrite(&self) { + // Call out to the 'tmpx' utility program which will rewrite the wtmpx + // and utmpx databases in every zone, including the global zone, to + // reflect the adjusted system boot time. + let mut command = std::process::Command::new(PFEXEC); + let cmd = command.args(&["/usr/platform/oxide/bin/tmpx", "-Z"]); + if let Err(e) = execute(cmd) { + warn!(self.inner.log, "Updating [wu]tmpx databases failed: {}", e); } - Ok(StartZonesResult { new_zones, errors }) - } - - /// Returns the current Omicron zone configuration - pub async fn omicron_zones_list(&self) -> OmicronZonesConfig { - let log = &self.inner.log; - - // We need to take the lock in order for the information in the ledger - // to be up-to-date. - let _existing_zones = self.inner.zones.lock().await; - - // Read the existing set of services from the ledger. - let zone_ledger_paths = self.all_omicron_zone_ledgers().await; - let ledger_data = match Ledger::::new( - log, - zone_ledger_paths.clone(), - ) - .await - { - Some(ledger) => ledger.data().clone(), - None => OmicronZonesConfigLocal::initial(), - }; - - ledger_data.to_omicron_zones_config() } - /// Ensures that particular Omicron zones are running + /// Check if the synchronization state of the sled has shifted to true and + /// if so, execute the any out-of-band actions that need to be taken. /// - /// These services will be instantiated by this function, and will be - /// recorded to a local file to ensure they start automatically on next - /// boot. - pub async fn ensure_all_omicron_zones_persistent( - &self, - mut request: OmicronZonesConfig, - ) -> Result<(), Error> { - let log = &self.inner.log; - - let mut existing_zones = self.inner.zones.lock().await; - - // Ensure that any zone images from the artifact store are present. - for zone in &request.zones { - if let Some(hash) = zone.image_source.artifact_hash() { - if let Err(err) = ArtifactStore::get_from_storage( - &self.inner.storage, - &self.inner.log, - hash, - ) - .await - { - return Err(Error::ZoneArtifactNotFound { - hash, - zone_kind: zone.zone_type.kind().report_str(), - id: zone.id, - err, - }); - } - } - } - - // Read the existing set of services from the ledger. - let zone_ledger_paths = self.all_omicron_zone_ledgers().await; - let mut ledger = match Ledger::::new( - log, - zone_ledger_paths.clone(), - ) - .await + /// This function only executes the out-of-band actions once, once the + /// synchronization state has shifted to true. + pub(crate) async fn on_time_sync(&self) { + if self + .inner + .time_synced + .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed) + .is_ok() { - Some(ledger) => ledger, - None => Ledger::::new_with( - log, - zone_ledger_paths.clone(), - OmicronZonesConfigLocal::initial(), - ), - }; - - let ledger_zone_config = ledger.data_mut(); - debug!(log, "ensure_all_omicron_zones_persistent"; - "request_generation" => request.generation.to_string(), - "ledger_generation" => - ledger_zone_config.omicron_generation.to_string(), - ); - - // Absolutely refuse to downgrade the configuration. - if ledger_zone_config.omicron_generation > request.generation { - return Err(Error::RequestedZoneConfigOutdated { - requested: request.generation, - current: ledger_zone_config.omicron_generation, - }); - } + debug!(self.inner.log, "Time is now synchronized"); + // We only want to rewrite the boot time once, so we do it here + // when we know the time is synchronized. + self.boottime_rewrite(); - // If the generation is the same as what we're running, but the contents - // aren't, that's a problem, too. - if ledger_zone_config.omicron_generation == request.generation { - // Nexus should send us consistent zone orderings; however, we may - // reorder the zone list inside `ensure_all_omicron_zones`. To avoid - // equality checks failing only because the two lists are ordered - // differently, sort them both here before comparing. - let mut ledger_zones = - ledger_zone_config.clone().to_omicron_zones_config().zones; - - // We sort by ID because we assume no two zones have the same ID. If - // that assumption is wrong, we may return an error here where the - // conflict is soley the list orders, but in such a case that's the - // least of our problems. - ledger_zones.sort_by_key(|z| z.id); - request.zones.sort_by_key(|z| z.id); - - if ledger_zones != request.zones { - return Err(Error::RequestedConfigConflicts( - request.generation, - )); + // We expect to have a metrics queue by this point, so + // we can safely send a message on it to say the sled has + // been synchronized. + // + // We may want to retry or ensure this notification happens. See + // https://github.com/oxidecomputer/omicron/issues/8022. + let queue = self.metrics_queue(); + match queue.notify_time_synced_sled(self.sled_id()) { + Ok(_) => debug!( + self.inner.log, + "Notified metrics task that time is now synced", + ), + Err(e) => error!( + self.inner.log, + "Failed to notify metrics task that \ + time is now synced, metrics may not be produced."; + "error" => InlineErrorChain::new(&e), + ), } + } else { + debug!(self.inner.log, "Time was already synchronized"); } - - let omicron_generation = request.generation; - let ledger_generation = ledger_zone_config.ledger_generation; - self.ensure_all_omicron_zones(&mut existing_zones, request).await?; - let zones = existing_zones - .values() - .map(|omicron_zone| omicron_zone.config.clone()) - .collect(); - - let new_config = OmicronZonesConfigLocal { - omicron_generation, - ledger_generation, - zones, - }; - - // If the contents of the ledger would be identical, we can avoid - // performing an update and commit. - if *ledger_zone_config == new_config { - return Ok(()); - } - - // Update the zones in the ledger and write it back to both M.2s - *ledger_zone_config = new_config; - ledger.commit().await?; - - Ok(()) } - // Ensures that only the following Omicron zones are running. - // - // This method strives to be idempotent. - // - // - Starting and stopping zones is not an atomic operation - it's possible - // that we cannot start a zone after a previous one has been successfully - // created (or destroyed) intentionally. As a result, even in error cases, - // it's possible that the set of `existing_zones` changes. However, this set - // will only change in the direction of `new_request`: zones will only be - // removed if they ARE NOT part of `new_request`, and zones will only be - // added if they ARE part of `new_request`. - // - Zones are generally not updated in-place (i.e., two zone configurations - // that differ in any way are treated as entirely distinct), with an - // exception for backfilling the `filesystem_pool`, as long as the new - // request's filesystem pool matches the actual pool for that zones. This - // in-place update is allowed because changing only that property to match - // the runtime system does not require reconfiguring the zone or shutting it - // down and restarting it. - // - This method does not record any information such that these services - // are re-instantiated on boot. - async fn ensure_all_omicron_zones( + /// Ensures that a switch zone exists with the provided IP adddress. + pub async fn activate_switch( &self, - // The MutexGuard here attempts to ensure that the caller has the right - // lock held when calling this function. - existing_zones: &mut MutexGuard<'_, ZoneMap>, - new_request: OmicronZonesConfig, + // If we're reconfiguring the switch zone with an underlay address, we + // also need the rack network config to set tfport uplinks. + underlay_info: Option<(Ipv6Addr, Option<&RackNetworkConfig>)>, + baseboard: Baseboard, ) -> Result<(), Error> { - // Do some data-normalization to ensure we can compare the "requested - // set" vs the "existing set" as HashSets. - let ReconciledNewZonesRequest { - zones_to_be_removed, - zones_to_be_added, - } = reconcile_running_zones_with_new_request( - existing_zones, - new_request, - &self.inner.log, - )?; - - // Destroy zones that should not be running - for zone in zones_to_be_removed { - self.zone_bundle_and_try_remove(existing_zones, &zone).await; - } + info!(self.inner.log, "Ensuring scrimlet services (enabling services)"); + let mut filesystems: Vec = vec![]; + let mut data_links: Vec = vec![]; - // Collect information that's necessary to start new zones - let storage = self.inner.storage.get_latest_disks().await; - let mount_config = storage.mount_config(); - let all_u2_pools = storage.all_u2_zpools(); - let time_is_synchronized = - match self.timesync_get_locked(&existing_zones).await { - // Time is synchronized - Ok(TimeSync { sync: true, .. }) => true, - // Time is not synchronized, or we can't check - _ => false, - }; - - // Concurrently boot all new zones - let StartZonesResult { new_zones, errors } = self - .start_omicron_zones( - mount_config, - zones_to_be_added.iter(), - time_is_synchronized, - &all_u2_pools, - ) - .await?; - - // Add the new zones to our tracked zone set - existing_zones.extend( - new_zones.into_iter().map(|zone| (zone.name().to_string(), zone)), - ); - - // If any zones failed to start, exit with an error - if !errors.is_empty() { - return Err(Error::ZoneEnsure { errors }); - } - Ok(()) - } - - // Attempts to take a zone bundle and remove a zone. - // - // Logs, but does not return an error on failure. - async fn zone_bundle_and_try_remove( - &self, - existing_zones: &mut MutexGuard<'_, ZoneMap>, - zone: &OmicronZoneConfig, - ) { - let log = &self.inner.log; - let expected_zone_name = zone.zone_name(); - let Some(mut zone) = existing_zones.remove(&expected_zone_name) else { - warn!( - log, - "Expected to remove zone, but could not find it"; - "zone_name" => &expected_zone_name, - ); - return; - }; - // Ensure that the sled agent's metrics task is not tracking the zone's - // VNICs or OPTE ports. - if let Some(queue) = self.maybe_metrics_queue() { - match queue.untrack_zone_links(&zone.runtime) { - Ok(_) => debug!( - log, - "stopped tracking zone datalinks"; - "zone_name" => &expected_zone_name, - ), - Err(errors) => error!( - log, - "failed to stop tracking zone datalinks"; - "errors" => ?errors, - "zone_name" => &expected_zone_name - ), - } - } - debug!( - log, - "removing an existing zone"; - "zone_name" => &expected_zone_name, - ); - if let Err(e) = self - .inner - .zone_bundler - .create(&zone.runtime, ZoneBundleCause::UnexpectedZone) - .await - { - error!( - log, - "Failed to take bundle of unexpected zone"; - "zone_name" => &expected_zone_name, - InlineErrorChain::new(&e), - ); - } - if let Err(e) = zone.runtime.stop().await { - error!(log, "Failed to stop zone {}: {e}", zone.name()); - } - if let Err(e) = - self.clean_up_after_zone_shutdown(&zone.config.zone).await - { - error!( - log, - "Failed to clean up after stopping zone {}", zone.name(); - InlineErrorChain::new(&e), - ); - } - } - - // Ensures that if a zone is about to be installed, it does not exist. - async fn ensure_removed( - &self, - zone_config: &OmicronZoneConfig, - ) -> Result<(), Error> { - let zone_name = zone_config.zone_name(); - match self.inner.system_api.zones().find(&zone_name).await { - Ok(Some(zone)) => { - warn!( - self.inner.log, - "removing zone"; - "zone" => &zone_name, - "state" => ?zone.state(), - ); - // NOTE: We might want to tell the sled-agent's metrics task to - // stop tracking any links in this zone. However, we don't have - // very easy access to them, without running a command in the - // zone. These links are about to be deleted, and the metrics - // task will expire them after a while anyway, but it might be - // worth the trouble to do that in the future. - if let Err(e) = self - .inner - .system_api - .zones() - .halt_and_remove_logged(&self.inner.log, &zone_name) - .await - { - error!( - self.inner.log, - "Failed to remove zone"; - "zone" => &zone_name, - InlineErrorChain::new(&e), - ); - return Err(Error::ZoneRemoval { - zone_name: zone_name.to_string(), - err: e, - }); - } - if let Err(e) = - self.clean_up_after_zone_shutdown(zone_config).await - { - error!( - self.inner.log, - "Failed to clean up after removing zone"; - "zone" => &zone_name, - InlineErrorChain::new(&e), - ); - return Err(e); - } - Ok(()) - } - Ok(None) => Ok(()), - Err(err) => Err(Error::ZoneList(err)), - } - } - - // Perform any outside-the-zone cleanup required after shutting down a zone. - async fn clean_up_after_zone_shutdown( - &self, - zone: &OmicronZoneConfig, - ) -> Result<(), Error> { - // Special teardown for internal DNS zones: delete the global zone - // address we created for it, and tell DDM to stop advertising the - // prefix of that address. - if let OmicronZoneType::InternalDns { - gz_address, - gz_address_index, - .. - } = &zone.zone_type - { - let addrobj = AddrObject::new( - &self.inner.underlay_vnic.0, - &internal_dns_addrobj_name(*gz_address_index), - ) - .expect("internal DNS address object name is well-formed"); - Zones::delete_address(None, &addrobj).await.map_err(|err| { - Error::ZoneCleanup { - zone_name: zone.zone_name(), - err: Box::new(err), - } - })?; - - self.ddm_reconciler() - .remove_internal_dns_subnet(Ipv6Subnet::new(*gz_address)); - } - - Ok(()) - } - - // Returns a zone filesystem mountpoint, after ensuring that U.2 storage - // is valid. - async fn validate_storage_and_pick_mountpoint( - &self, - mount_config: &MountConfig, - zone: &OmicronZoneConfig, - all_u2_pools: &Vec, - ) -> Result { - let name = zone.zone_name(); - - // If the caller has requested a specific durable dataset, - // ensure that it is encrypted and that it exists. - // - // Typically, the transient filesystem pool will be placed on the same - // zpool as the durable dataset (to reduce the fault domain), but that - // decision belongs to Nexus, and is not enforced here. - if let Some(dataset) = zone.dataset_name() { - // Check that the dataset is actually ready to be used. - let [zoned, canmount, encryption] = - illumos_utils::zfs::Zfs::get_values( - &dataset.full_name(), - &["zoned", "canmount", "encryption"], - None, - ) - .await - .map_err(|err| Error::GetZfsValue { - zone: zone.zone_name(), - source: err, - })?; - - let check_property = |name, actual, expected| { - if actual != expected { - return Err(Error::DatasetNotReady { - zone: zone.zone_name(), - dataset: dataset.full_name(), - prop_name: String::from(name), - prop_value: actual, - prop_value_expected: String::from(expected), - }); - } - return Ok(()); - }; - check_property("zoned", zoned, "on")?; - check_property("canmount", canmount, "on")?; - if dataset.kind().dataset_should_be_encrypted() { - check_property("encryption", encryption, "aes-256-gcm")?; - } - - let data_pool = dataset.pool(); - if !all_u2_pools.contains(&data_pool) { - warn!( - self.inner.log, - "zone dataset requested on a zpool which doesn't exist"; - "zone" => &name, - "zpool" => %data_pool - ); - return Err(Error::MissingDevice { - device: format!("zpool: {data_pool}"), - }); - } - } - - let filesystem_pool = match (&zone.filesystem_pool, zone.dataset_name()) - { - // If a pool was explicitly requested, use it. - (Some(pool), _) => *pool, - // NOTE: The following cases are for backwards compatibility. - // - // If no pool was selected, prefer to use the same pool as the - // durable dataset. Otherwise, pick one randomly. - (None, Some(dataset)) => *dataset.pool(), - (None, None) => *all_u2_pools - .choose(&mut rand::thread_rng()) - .ok_or_else(|| Error::U2NotFound)?, - }; - - if !all_u2_pools.contains(&filesystem_pool) { - warn!( - self.inner.log, - "zone filesystem dataset requested on a zpool which doesn't exist"; - "zone" => &name, - "zpool" => %filesystem_pool - ); - return Err(Error::MissingDevice { - device: format!("zpool: {filesystem_pool}"), - }); - } - let path = filesystem_pool - .dataset_mountpoint(&mount_config.root, ZONE_DATASET); - let pool = ZpoolOrRamdisk::Zpool(filesystem_pool); - Ok(PathInPool { pool, path }) - } - - /// Adjust the system boot time to the latest boot time of all zones. - fn boottime_rewrite(&self) { - // Call out to the 'tmpx' utility program which will rewrite the wtmpx - // and utmpx databases in every zone, including the global zone, to - // reflect the adjusted system boot time. - let mut command = std::process::Command::new(PFEXEC); - let cmd = command.args(&["/usr/platform/oxide/bin/tmpx", "-Z"]); - if let Err(e) = execute(cmd) { - warn!(self.inner.log, "Updating [wu]tmpx databases failed: {}", e); - } - } - - pub async fn timesync_get(&self) -> Result { - let existing_zones = self.inner.zones.lock().await; - self.timesync_get_locked(&existing_zones).await - } - - async fn timesync_get_locked( - &self, - existing_zones: &tokio::sync::MutexGuard<'_, ZoneMap>, - ) -> Result { - let skip_timesync = match &self.inner.time_sync_config { - TimeSyncConfig::Normal => false, - TimeSyncConfig::Skip => true, - #[cfg(all(test, target_os = "illumos"))] - TimeSyncConfig::Fail => { - info!(self.inner.log, "Configured to fail timesync checks"); - return Err(Error::TimeNotSynchronized); - } - }; - - if skip_timesync { - info!(self.inner.log, "Configured to skip timesync checks"); - self.on_time_sync().await; - return Ok(TimeSync { - sync: true, - ref_id: 0, - ip_addr: IPV6_UNSPECIFIED, - stratum: 0, - ref_time: 0.0, - correction: 0.00, - }); - }; - - let ntp_zone_name = - InstalledZone::get_zone_name(ZoneKind::NTP_PREFIX, None); - - let ntp_zone = existing_zones - .iter() - .find(|(name, _)| name.starts_with(&ntp_zone_name)) - .ok_or_else(|| Error::NtpZoneNotReady)? - .1; - - // XXXNTP - This could be replaced with a direct connection to the - // daemon using a patched version of the chrony_candm crate to allow - // a custom server socket path. From the GZ, it should be possible to - // connect to the UNIX socket at - // format!("{}/var/run/chrony/chronyd.sock", ntp_zone.root()) - - match ntp_zone.runtime.run_cmd(&["/usr/bin/chronyc", "-c", "tracking"]) - { - Ok(stdout) => { - let v: Vec<&str> = stdout.split(',').collect(); - - if v.len() > 9 { - let ref_id = u32::from_str_radix(v[0], 16) - .map_err(|_| Error::NtpZoneNotReady)?; - let ip_addr = - IpAddr::from_str(v[1]).unwrap_or(IPV6_UNSPECIFIED); - let stratum = u8::from_str(v[2]) - .map_err(|_| Error::NtpZoneNotReady)?; - let ref_time = f64::from_str(v[3]) - .map_err(|_| Error::NtpZoneNotReady)?; - let correction = f64::from_str(v[4]) - .map_err(|_| Error::NtpZoneNotReady)?; - - // Per `chronyc waitsync`'s implementation, if either the - // reference IP address is not unspecified or the reference - // ID is not 0 or 0x7f7f0101, we are synchronized to a peer. - let peer_sync = !ip_addr.is_unspecified() - || (ref_id != 0 && ref_id != 0x7f7f0101); - - let sync = stratum < 10 - && ref_time > 1234567890.0 - && peer_sync - && correction.abs() <= 0.05; - - if sync { - self.on_time_sync().await; - } - - Ok(TimeSync { - sync, - ref_id, - ip_addr, - stratum, - ref_time, - correction, - }) - } else { - Err(Error::NtpZoneNotReady) - } - } - Err(e) => { - error!(self.inner.log, "chronyc command failed: {}", e); - Err(Error::NtpZoneNotReady) - } - } - } - - /// Check if the synchronization state of the sled has shifted to true and - /// if so, execute the any out-of-band actions that need to be taken. - /// - /// This function only executes the out-of-band actions once, once the - /// synchronization state has shifted to true. - async fn on_time_sync(&self) { - if self - .inner - .time_synced - .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed) - .is_ok() - { - debug!(self.inner.log, "Time is now synchronized"); - // We only want to rewrite the boot time once, so we do it here - // when we know the time is synchronized. - self.boottime_rewrite(); - - // We expect to have a metrics queue by this point, so - // we can safely send a message on it to say the sled has - // been synchronized. - // - // We may want to retry or ensure this notification happens. See - // https://github.com/oxidecomputer/omicron/issues/8022. - let queue = self.metrics_queue(); - match queue.notify_time_synced_sled(self.sled_id()) { - Ok(_) => debug!( - self.inner.log, - "Notified metrics task that time is now synced", - ), - Err(e) => error!( - self.inner.log, - "Failed to notify metrics task that \ - time is now synced, metrics may not be produced."; - "error" => InlineErrorChain::new(&e), - ), - } - } else { - debug!(self.inner.log, "Time was already synchronized"); - } - } - - /// Ensures that a switch zone exists with the provided IP adddress. - pub async fn activate_switch( - &self, - // If we're reconfiguring the switch zone with an underlay address, we - // also need the rack network config to set tfport uplinks. - underlay_info: Option<(Ipv6Addr, Option<&RackNetworkConfig>)>, - baseboard: Baseboard, - ) -> Result<(), Error> { - info!(self.inner.log, "Ensuring scrimlet services (enabling services)"); - let mut filesystems: Vec = vec![]; - let mut data_links: Vec = vec![]; - - let services = match self.inner.sled_mode { - // A pure gimlet sled should not be trying to activate a switch - // zone. - SledMode::Gimlet => { - return Err(Error::SwitchZone(anyhow::anyhow!( - "attempted to activate switch zone on non-scrimlet sled" - ))); - } + let services = match self.inner.sled_mode { + // A pure gimlet sled should not be trying to activate a switch + // zone. + SledMode::Gimlet => { + return Err(Error::SwitchZone(anyhow::anyhow!( + "attempted to activate switch zone on non-scrimlet sled" + ))); + } // Sled is a scrimlet and the real tofino driver has been loaded. SledMode::Auto @@ -4988,973 +3913,9 @@ fn internal_dns_addrobj_name(gz_address_index: u32) -> String { format!("internaldns{gz_address_index}") } -#[derive(Debug)] -struct ReconciledNewZonesRequest { - zones_to_be_removed: HashSet, - zones_to_be_added: HashSet, -} - -fn reconcile_running_zones_with_new_request( - existing_zones: &mut MutexGuard<'_, ZoneMap>, - new_request: OmicronZonesConfig, - log: &Logger, -) -> Result { - reconcile_running_zones_with_new_request_impl( - existing_zones - .values_mut() - .map(|z| (&mut z.config.zone, z.runtime.root_zpool())), - new_request, - log, - ) -} - -// Separate helper function for `reconcile_running_zones_with_new_request` that -// allows unit tests to exercise the implementation without having to construct -// a `&mut MutexGuard<'_, ZoneMap>` for `existing_zones`. -fn reconcile_running_zones_with_new_request_impl<'a>( - existing_zones_with_runtime_zpool: impl Iterator< - Item = (&'a mut OmicronZoneConfig, &'a ZpoolOrRamdisk), - >, - new_request: OmicronZonesConfig, - log: &Logger, -) -> Result { - let mut existing_zones_by_id: BTreeMap<_, _> = - existing_zones_with_runtime_zpool - .map(|(zone, zpool)| (zone.id, (zone, zpool))) - .collect(); - let mut zones_to_be_added = HashSet::new(); - let mut zones_to_be_removed = HashSet::new(); - let mut zones_to_update = Vec::new(); - - for zone in new_request.zones.into_iter() { - let Some((existing_zone, runtime_zpool)) = - existing_zones_by_id.remove(&zone.id) - else { - // This zone isn't in the existing set; add it. - zones_to_be_added.insert(zone); - continue; - }; - - // We're already running this zone. If the config hasn't changed, we - // have nothing to do. - if zone == *existing_zone { - continue; - } - - // Special case for fixing #7229. We have an incoming request for a zone - // that we're already running except the config has changed; normally, - // we'd shut the zone down and restart it. However, if we get a new - // request that is: - // - // 1. setting `filesystem_pool`, and - // 2. the config for this zone is otherwise identical, and - // 3. the new `filesystem_pool` matches the pool on which the zone is - // installed - // - // then we don't want to shut the zone down and restart it, because the - // config hasn't actually changed in any meaningful way; this is just - // reconfigurator correcting #7229. - if let Some(new_filesystem_pool) = &zone.filesystem_pool { - let differs_only_by_filesystem_pool = { - // Clone `existing_zone` and mutate its `filesystem_pool` to - // match the new request; if they now match, that's the only - // field that's different. - let mut existing = existing_zone.clone(); - existing.filesystem_pool = Some(*new_filesystem_pool); - existing == zone - }; - - let runtime_zpool = match runtime_zpool { - ZpoolOrRamdisk::Zpool(zpool_name) => zpool_name, - ZpoolOrRamdisk::Ramdisk => { - // The only zone we run on the ramdisk is the switch - // zone, for which it isn't possible to get a zone - // request, so it should be fine to put an - // `unreachable!()` here. Out of caution for future - // changes, we'll instead return an error that the - // requested zone is on the ramdisk. - error!( - log, - "fix-7229: unexpectedly received request with a \ - zone config for a zone running on ramdisk"; - "new_config" => ?zone, - "existing_config" => ?existing_zone, - ); - return Err(Error::ZoneIsRunningOnRamdisk { - zone_id: zone.id, - }); - } - }; - - if differs_only_by_filesystem_pool { - if new_filesystem_pool == runtime_zpool { - // Our #7229 special case: the new config is only filling in - // the pool, and it does so correctly. Move on to the next - // zone in the request without adding this zone to either of - // our `zone_to_be_*` sets. - info!( - log, - "fix-7229: accepted new zone config that changes only \ - filesystem_pool"; - "new_config" => ?zone, - ); - - // We should update this `existing_zone`, but delay doing so - // until we've processed all zones (so if there are any - // failures later, we don't return having partially-updated - // the existing zones). - zones_to_update.push((existing_zone, zone)); - continue; - } else { - error!( - log, - "fix-7229: rejected new zone config that changes only \ - filesystem_pool (incorrect pool)"; - "new_config" => ?zone, - "expected_pool" => %runtime_zpool, - ); - return Err(Error::InvalidFilesystemPoolZoneConfig { - zone_id: zone.id, - expected_pool: *runtime_zpool, - got_pool: *new_filesystem_pool, - }); - } - } - } - - // End of #7229 special case: this zone is already running, but the new - // request has changed it in some way. We need to shut it down and - // restart it. - zones_to_be_removed.insert(existing_zone.clone()); - zones_to_be_added.insert(zone); - } - - // Any remaining entries in `existing_zones_by_id` should be shut down. - zones_to_be_removed - .extend(existing_zones_by_id.into_values().map(|(z, _)| z.clone())); - - // All zones have been handled successfully; commit any changes to existing - // zones we found in our "fix 7229" special case above. - let num_zones_updated = zones_to_update.len(); - for (existing_zone, new_zone) in zones_to_update { - *existing_zone = new_zone; - } - - info!( - log, - "ensure_all_omicron_zones: request reconciliation done"; - "num_zones_to_be_removed" => zones_to_be_removed.len(), - "num_zones_to_be_added" => zones_to_be_added.len(), - "num_zones_updated" => num_zones_updated, - ); - Ok(ReconciledNewZonesRequest { zones_to_be_removed, zones_to_be_added }) -} - -#[cfg(all(test, target_os = "illumos"))] -mod illumos_tests { - use crate::metrics; - - use super::*; - use illumos_utils::dladm::{ - BOOTSTRAP_ETHERSTUB_NAME, Etherstub, UNDERLAY_ETHERSTUB_NAME, - UNDERLAY_ETHERSTUB_VNIC_NAME, - }; - - use nexus_sled_agent_shared::inventory::OmicronZoneImageSource; - use omicron_uuid_kinds::OmicronZoneUuid; - use sled_agent_zone_images::ZoneImageZpools; - use sled_storage::manager_test_harness::StorageManagerTestHarness; - use std::{ - net::{Ipv6Addr, SocketAddrV6}, - time::Duration, - }; - use tokio::sync::mpsc::error::TryRecvError; - use uuid::Uuid; - - // Just placeholders. Not used. - const GLOBAL_ZONE_BOOTSTRAP_IP: Ipv6Addr = Ipv6Addr::LOCALHOST; - const SWITCH_ZONE_BOOTSTRAP_IP: Ipv6Addr = Ipv6Addr::LOCALHOST; - - const EXPECTED_PORT: u16 = 12223; - - // Timeout within which we must have received a message about a zone's links - // to track. This is very generous. - const LINK_NOTIFICATION_TIMEOUT: Duration = Duration::from_secs(5); - - struct FakeSystemApi { - fake_install_dir: Utf8PathBuf, - dladm: Arc, - zones: Arc, - } - - impl FakeSystemApi { - fn new(fake_install_dir: Utf8PathBuf) -> Box { - Box::new(Self { - fake_install_dir, - dladm: illumos_utils::fakes::dladm::Dladm::new(), - zones: illumos_utils::fakes::zone::Zones::new(), - }) - } - } - - impl SystemApi for FakeSystemApi { - fn fake_install_dir(&self) -> Option<&Utf8Path> { - Some(&self.fake_install_dir) - } - - fn dladm(&self) -> Arc { - self.dladm.clone() - } - - fn zones(&self) -> Arc { - self.zones.clone() - } - } - - fn make_bootstrap_networking_config() -> BootstrapNetworking { - BootstrapNetworking { - bootstrap_etherstub: Etherstub( - BOOTSTRAP_ETHERSTUB_NAME.to_string(), - ), - global_zone_bootstrap_ip: GLOBAL_ZONE_BOOTSTRAP_IP, - global_zone_bootstrap_link_local_ip: GLOBAL_ZONE_BOOTSTRAP_IP, - switch_zone_bootstrap_ip: SWITCH_ZONE_BOOTSTRAP_IP, - underlay_etherstub: Etherstub(UNDERLAY_ETHERSTUB_NAME.to_string()), - underlay_etherstub_vnic: EtherstubVnic( - UNDERLAY_ETHERSTUB_VNIC_NAME.to_string(), - ), - } - } - - // Prepare to call "ensure" for a new service, then actually call "ensure". - async fn ensure_new_service( - mgr: &ServiceManager, - id: OmicronZoneUuid, - generation: Generation, - ) { - let address = - SocketAddrV6::new(Ipv6Addr::LOCALHOST, EXPECTED_PORT, 0, 0); - try_new_service_of_type( - mgr, - id, - generation, - OmicronZoneType::InternalNtp { address }, - ) - .await - .expect("Could not create service"); - } - - async fn try_new_service_of_type( - mgr: &ServiceManager, - id: OmicronZoneUuid, - generation: Generation, - zone_type: OmicronZoneType, - ) -> Result<(), Error> { - mgr.ensure_all_omicron_zones_persistent(OmicronZonesConfig { - generation, - zones: vec![OmicronZoneConfig { - id, - zone_type, - filesystem_pool: None, - image_source: OmicronZoneImageSource::InstallDataset, - }], - }) - .await - } - - // Prepare to call "ensure" for a service which already exists. We should - // return the service without actually installing a new zone. - async fn ensure_existing_service( - mgr: &ServiceManager, - id: OmicronZoneUuid, - generation: Generation, - ) { - let address = - SocketAddrV6::new(Ipv6Addr::LOCALHOST, EXPECTED_PORT, 0, 0); - mgr.ensure_all_omicron_zones_persistent(OmicronZonesConfig { - generation, - zones: vec![OmicronZoneConfig { - id, - zone_type: OmicronZoneType::InternalNtp { address }, - filesystem_pool: None, - image_source: OmicronZoneImageSource::InstallDataset, - }], - }) - .await - .unwrap(); - } - - // Prepare to drop the service manager. - // - // This will shut down all allocated zones, and delete their - // associated VNICs. - async fn drop_service_manager(mgr: ServiceManager) { - // Also send a message to the metrics task that the VNIC has been - // deleted. - let queue = mgr.metrics_queue(); - for zone in mgr.inner.zones.lock().await.values() { - if let Err(e) = queue.untrack_zone_links(&zone.runtime) { - error!( - mgr.inner.log, - "failed to stop tracking zone datalinks"; - "errors" => ?e, - ); - } - } - - // Explicitly drop the service manager - drop(mgr); - } - - struct TestConfig { - config_dir: camino_tempfile::Utf8TempDir, - } - - impl TestConfig { - async fn new() -> Self { - let config_dir = camino_tempfile::Utf8TempDir::new().unwrap(); - Self { config_dir } - } - - fn make_config(&self) -> Config { - Config { - sled_identifiers: SledIdentifiers { - rack_id: Uuid::new_v4(), - sled_id: Uuid::new_v4(), - model: "fake-gimlet".to_string(), - revision: 1, - serial: "fake-serial".to_string(), - }, - sidecar_revision: SidecarRevision::Physical( - "rev_whatever_its_a_test".to_string(), - ), - } - } - - fn override_paths(&self, mgr: &ServiceManager) { - let dir = self.config_dir.path(); - mgr.override_ledger_directory(dir.to_path_buf()); - mgr.override_image_directory(dir.to_path_buf()); - - // We test launching "fake" versions of the zones, but the - // logic to find paths relies on checking the existence of - // files. - std::fs::write(dir.join("oximeter.tar.gz"), "Not a real file") - .unwrap(); - std::fs::write(dir.join("ntp.tar.gz"), "Not a real file").unwrap(); - } - } - - async fn setup_storage(log: &Logger) -> StorageManagerTestHarness { - let mut harness = StorageManagerTestHarness::new(&log).await; - let raw_disks = - harness.add_vdevs(&["u2_test.vdev", "m2_test.vdev"]).await; - harness.handle().key_manager_ready().await; - let config = harness.make_config(1, &raw_disks); - let result = harness - .handle() - .omicron_physical_disks_ensure(config.clone()) - .await - .expect("Failed to ensure disks"); - assert!(!result.has_error(), "{:?}", result); - harness - } - - struct LedgerTestHelper<'a> { - log: slog::Logger, - storage_test_harness: StorageManagerTestHarness, - zone_bundler: ZoneBundler, - zone_image_resolver: ZoneImageSourceResolver, - test_config: &'a TestConfig, - } - - impl<'a> LedgerTestHelper<'a> { - async fn new(log: slog::Logger, test_config: &'a TestConfig) -> Self { - let storage_test_harness = setup_storage(&log).await; - let zone_bundler = ZoneBundler::new( - log.clone(), - storage_test_harness.handle().clone(), - Default::default(), - ) - .await; - - let mut storage_manager = storage_test_harness.handle().clone(); - let all_disks = storage_manager.get_latest_disks().await; - let (_, boot_zpool) = storage_manager.wait_for_boot_disk().await; - let zpools = ZoneImageZpools { - root: &all_disks.mount_config().root, - all_m2_zpools: all_disks.all_m2_zpools(), - }; - let zone_image_resolver = - ZoneImageSourceResolver::new(&log, &zpools, &boot_zpool); - - LedgerTestHelper { - log, - storage_test_harness, - zone_bundler, - zone_image_resolver, - test_config, - } - } - - async fn cleanup(&mut self) { - self.storage_test_harness.cleanup().await; - } - - fn new_service_manager( - &self, - system: Box, - ) -> ServiceManager { - self.new_service_manager_with_timesync(TimeSyncConfig::Skip, system) - } - - fn new_service_manager_with_timesync( - &self, - time_sync_config: TimeSyncConfig, - system: Box, - ) -> ServiceManager { - let log = &self.log; - let reconciler = - DdmReconciler::new(Ipv6Subnet::new(Ipv6Addr::LOCALHOST), log) - .expect("created DdmReconciler"); - let mgr = ServiceManager::new_inner( - log, - reconciler, - make_bootstrap_networking_config(), - SledMode::Auto, - time_sync_config, - SidecarRevision::Physical("rev-test".to_string()), - vec![], - self.storage_test_harness.handle().clone(), - self.zone_bundler.clone(), - self.zone_image_resolver.clone(), - system, - ); - self.test_config.override_paths(&mgr); - mgr - } - - async fn sled_agent_started( - log: &slog::Logger, - test_config: &TestConfig, - mgr: &ServiceManager, - metrics_queue: MetricsRequestQueue, - ) { - let port_manager = PortManager::new( - log.new(o!("component" => "PortManager")), - Ipv6Addr::new( - 0xfd00, 0x1de, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, - ), - ); - - mgr.sled_agent_started( - test_config.make_config(), - port_manager, - Ipv6Addr::LOCALHOST, - Uuid::new_v4(), - None, - metrics_queue, - ) - .await - .unwrap(); - } - } - - #[tokio::test] - async fn test_ensure_service() { - let logctx = - omicron_test_utils::dev::test_setup_log("test_ensure_service"); - let test_config = TestConfig::new().await; - let mut helper = - LedgerTestHelper::new(logctx.log.clone(), &test_config).await; - let mgr = helper.new_service_manager(FakeSystemApi::new( - test_config.config_dir.path().to_path_buf(), - )); - let (metrics_queue, mut metrics_rx) = MetricsRequestQueue::for_test(); - LedgerTestHelper::sled_agent_started( - &logctx.log, - &test_config, - &mgr, - metrics_queue, - ) - .await; - - let v1 = Generation::new(); - let found = mgr.omicron_zones_list().await; - assert_eq!(found.generation, v1); - assert!(found.zones.is_empty()); - - let v2 = v1.next(); - let id = OmicronZoneUuid::new_v4(); - ensure_new_service(&mgr, id, v2).await; - - let found = mgr.omicron_zones_list().await; - assert_eq!(found.generation, v2); - assert_eq!(found.zones.len(), 1); - assert_eq!(found.zones[0].id, id); - - // First check that we received the synced sled notification - let synced_message = tokio::time::timeout( - LINK_NOTIFICATION_TIMEOUT, - metrics_rx.recv(), - ).await.expect("Should have received a message about the sled being synced within the timeout") - .expect("Should have received a message about the sled being synced"); - assert_eq!( - synced_message, - metrics::Message::TimeSynced { sled_id: mgr.sled_id() }, - ); - - // Then, check that we received a message about the zone's VNIC. - let vnic_message = tokio::time::timeout( - LINK_NOTIFICATION_TIMEOUT, - metrics_rx.recv(), - ) - .await - .expect( - "Should have received a message about the zone's VNIC within the timeout" - ) - .expect("Should have received a message about the zone's VNIC"); - let zone_name = format!("oxz_ntp_{}", id); - assert_eq!( - vnic_message, - metrics::Message::TrackVnic { - zone_name, - name: "oxControlService0".into() - }, - ); - assert_eq!(metrics_rx.try_recv(), Err(TryRecvError::Empty)); - - drop_service_manager(mgr).await; - - helper.cleanup().await; - logctx.cleanup_successful(); - } - - #[tokio::test] - async fn test_ensure_service_before_timesync() { - let logctx = omicron_test_utils::dev::test_setup_log( - "test_ensure_service_before_timesync", - ); - let test_config = TestConfig::new().await; - let mut helper = - LedgerTestHelper::new(logctx.log.clone(), &test_config).await; - - let mgr = helper.new_service_manager_with_timesync( - TimeSyncConfig::Fail, - FakeSystemApi::new(test_config.config_dir.path().to_path_buf()), - ); - let (metrics_queue, mut metrics_rx) = MetricsRequestQueue::for_test(); - LedgerTestHelper::sled_agent_started( - &logctx.log, - &test_config, - &mgr, - metrics_queue, - ) - .await; - - let v1 = Generation::new(); - let found = mgr.omicron_zones_list().await; - assert_eq!(found.generation, v1); - assert!(found.zones.is_empty()); - - let v2 = v1.next(); - let id = OmicronZoneUuid::new_v4(); - - // Should fail: time has not yet synchronized. - let address = - SocketAddrV6::new(Ipv6Addr::LOCALHOST, EXPECTED_PORT, 0, 0); - let result = try_new_service_of_type( - &mgr, - id, - v2, - OmicronZoneType::Oximeter { address }, - ) - .await; - - // First, ensure this is the right kind of error. - let err = result.unwrap_err(); - let errors = match &err { - Error::ZoneEnsure { errors } => errors, - err => panic!("unexpected result: {err:?}"), - }; - assert_eq!(errors.len(), 1); - assert_matches::assert_matches!( - errors[0].1, - Error::TimeNotSynchronized - ); - - // Ensure we have _not_ received a message about the zone's VNIC, - // because there isn't a zone. - assert_eq!(metrics_rx.try_recv(), Err(TryRecvError::Empty)); - - // Next, ensure this still converts to an "unavail" common error - let common_err = omicron_common::api::external::Error::from(err); - assert_matches::assert_matches!( - common_err, - omicron_common::api::external::Error::ServiceUnavailable { .. } - ); - - // Should succeed: we don't care that time has not yet synchronized (for - // this specific service). - try_new_service_of_type( - &mgr, - id, - v2, - OmicronZoneType::InternalNtp { address }, - ) - .await - .unwrap(); - - drop_service_manager(mgr).await; - helper.cleanup().await; - logctx.cleanup_successful(); - } - - #[tokio::test] - async fn test_ensure_service_which_already_exists() { - let logctx = omicron_test_utils::dev::test_setup_log( - "test_ensure_service_which_already_exists", - ); - let test_config = TestConfig::new().await; - let mut helper = - LedgerTestHelper::new(logctx.log.clone(), &test_config).await; - let mgr = helper.new_service_manager(FakeSystemApi::new( - test_config.config_dir.path().to_path_buf(), - )); - let (metrics_queue, mut metrics_rx) = MetricsRequestQueue::for_test(); - LedgerTestHelper::sled_agent_started( - &logctx.log, - &test_config, - &mgr, - metrics_queue, - ) - .await; - - let v2 = Generation::new().next(); - let id = OmicronZoneUuid::new_v4(); - ensure_new_service(&mgr, id, v2).await; - let v3 = v2.next(); - ensure_existing_service(&mgr, id, v3).await; - let found = mgr.omicron_zones_list().await; - assert_eq!(found.generation, v3); - assert_eq!(found.zones.len(), 1); - assert_eq!(found.zones[0].id, id); - - // First, we will get a message about the sled being synced. - let synced_message = tokio::time::timeout( - LINK_NOTIFICATION_TIMEOUT, - metrics_rx.recv(), - ).await.expect("Should have received a message about the sled being synced within the timeout") - .expect("Should have received a message about the sled being synced"); - assert_eq!( - synced_message, - metrics::Message::TimeSynced { sled_id: mgr.sled_id() } - ); - - // In this case, the manager creates the zone once, and then "ensuring" - // it a second time is a no-op. So we simply expect the same message - // sequence as starting a zone for the first time. - let vnic_message = tokio::time::timeout( - LINK_NOTIFICATION_TIMEOUT, - metrics_rx.recv(), - ) - .await - .expect( - "Should have received a message about the zone's VNIC within the timeout" - ) - .expect("Should have received a message about the zone's VNIC"); - let zone_name = format!("oxz_ntp_{}", id); - assert_eq!( - vnic_message, - metrics::Message::TrackVnic { - zone_name, - name: "oxControlService0".into() - }, - ); - assert_eq!(metrics_rx.try_recv(), Err(TryRecvError::Empty)); - - drop_service_manager(mgr).await; - - helper.cleanup().await; - logctx.cleanup_successful(); - } - - #[tokio::test] - async fn test_services_are_recreated_on_reboot() { - let logctx = omicron_test_utils::dev::test_setup_log( - "test_services_are_recreated_on_reboot", - ); - let test_config = TestConfig::new().await; - let mut helper = - LedgerTestHelper::new(logctx.log.clone(), &test_config).await; - - // First, spin up a ServiceManager, create a new zone, and then tear - // down the ServiceManager. - let mgr = helper.new_service_manager(FakeSystemApi::new( - test_config.config_dir.path().to_path_buf(), - )); - let (metrics_queue, mut metrics_rx) = MetricsRequestQueue::for_test(); - LedgerTestHelper::sled_agent_started( - &logctx.log, - &test_config, - &mgr, - metrics_queue, - ) - .await; - - let v2 = Generation::new().next(); - let id = OmicronZoneUuid::new_v4(); - ensure_new_service(&mgr, id, v2).await; - - let sled_id = mgr.sled_id(); - drop_service_manager(mgr).await; - - // First, we will get a message about the sled being synced. - let synced_message = tokio::time::timeout( - LINK_NOTIFICATION_TIMEOUT, - metrics_rx.recv(), - ).await.expect("Should have received a message about the sled being synced within the timeout") - .expect("Should have received a message about the sled being synced"); - assert_eq!(synced_message, metrics::Message::TimeSynced { sled_id }); - - // Check that we received a message about the zone's VNIC. Since the - // manager is being dropped, it should also send a message about the - // VNIC being deleted. - let zone_name = format!("oxz_ntp_{}", id); - for expected_vnic_message in [ - metrics::Message::TrackVnic { - zone_name, - name: "oxControlService0".into(), - }, - metrics::Message::UntrackVnic { name: "oxControlService0".into() }, - ] { - println!( - "Expecting message from manager: {expected_vnic_message:#?}" - ); - let vnic_message = tokio::time::timeout( - LINK_NOTIFICATION_TIMEOUT, - metrics_rx.recv(), - ) - .await - .expect( - "Should have received a message about the zone's VNIC within the timeout" - ) - .expect("Should have received a message about the zone's VNIC"); - assert_eq!(vnic_message, expected_vnic_message,); - } - // Note that the manager has been dropped, so we should get - // disconnected, not empty. - assert_eq!(metrics_rx.try_recv(), Err(TryRecvError::Disconnected)); - - // Before we re-create the service manager - notably, using the same - // config file! - expect that a service gets initialized. - // TODO? - let mgr = helper.new_service_manager(FakeSystemApi::new( - test_config.config_dir.path().to_path_buf(), - )); - let (metrics_queue, mut metrics_rx) = MetricsRequestQueue::for_test(); - LedgerTestHelper::sled_agent_started( - &logctx.log, - &test_config, - &mgr, - metrics_queue, - ) - .await; - - let found = mgr.omicron_zones_list().await; - assert_eq!(found.generation, v2); - assert_eq!(found.zones.len(), 1); - assert_eq!(found.zones[0].id, id); - - // Note that the `omicron_zones_list()` request just returns the - // configured zones, stored in the on-disk ledger. There is nothing - // above that actually ensures that those zones exist, as far as I can - // tell! - assert_eq!(metrics_rx.try_recv(), Err(TryRecvError::Empty)); - - drop_service_manager(mgr).await; - - helper.cleanup().await; - logctx.cleanup_successful(); - } - - #[tokio::test] - async fn test_services_do_not_persist_without_config() { - let logctx = omicron_test_utils::dev::test_setup_log( - "test_services_do_not_persist_without_config", - ); - let test_config = TestConfig::new().await; - let mut helper = - LedgerTestHelper::new(logctx.log.clone(), &test_config).await; - - // First, spin up a ServiceManager, create a new zone, and then tear - // down the ServiceManager. - let mgr = helper.new_service_manager(FakeSystemApi::new( - test_config.config_dir.path().to_path_buf(), - )); - let metrics_handles = MetricsRequestQueue::for_test(); - LedgerTestHelper::sled_agent_started( - &logctx.log, - &test_config, - &mgr, - metrics_handles.0.clone(), - ) - .await; - - let v1 = Generation::new(); - let v2 = v1.next(); - let id = OmicronZoneUuid::new_v4(); - ensure_new_service(&mgr, id, v2).await; - drop_service_manager(mgr).await; - - // Next, delete the ledger. This means the zone we just created will not - // be remembered on the next initialization. - std::fs::remove_file( - test_config.config_dir.path().join(ZONES_LEDGER_FILENAME), - ) - .unwrap(); - - // Observe that the old service is not re-initialized. - let mgr = helper.new_service_manager(FakeSystemApi::new( - test_config.config_dir.path().to_path_buf(), - )); - let metrics_handles = MetricsRequestQueue::for_test(); - LedgerTestHelper::sled_agent_started( - &logctx.log, - &test_config, - &mgr, - metrics_handles.0.clone(), - ) - .await; - - let found = mgr.omicron_zones_list().await; - assert_eq!(found.generation, v1); - assert!(found.zones.is_empty()); - - drop_service_manager(mgr).await; - - helper.cleanup().await; - logctx.cleanup_successful(); - } - - #[tokio::test] - async fn test_bad_generations() { - // Start like the normal tests. - let logctx = - omicron_test_utils::dev::test_setup_log("test_bad_generations"); - let test_config = TestConfig::new().await; - let mut helper = - LedgerTestHelper::new(logctx.log.clone(), &test_config).await; - let mgr = helper.new_service_manager(FakeSystemApi::new( - test_config.config_dir.path().to_path_buf(), - )); - let metrics_handles = MetricsRequestQueue::for_test(); - LedgerTestHelper::sled_agent_started( - &logctx.log, - &test_config, - &mgr, - metrics_handles.0.clone(), - ) - .await; - - // Like the normal tests, set up a generation with one zone in it. - let v1 = Generation::new(); - let v2 = v1.next(); - let id1 = OmicronZoneUuid::new_v4(); - - let address = - SocketAddrV6::new(Ipv6Addr::LOCALHOST, EXPECTED_PORT, 0, 0); - let mut zones = vec![OmicronZoneConfig { - id: id1, - zone_type: OmicronZoneType::InternalNtp { address }, - filesystem_pool: None, - image_source: OmicronZoneImageSource::InstallDataset, - }]; - - mgr.ensure_all_omicron_zones_persistent(OmicronZonesConfig { - generation: v2, - zones: zones.clone(), - }) - .await - .unwrap(); - - let found = mgr.omicron_zones_list().await; - assert_eq!(found.generation, v2); - assert_eq!(found.zones.len(), 1); - assert_eq!(found.zones[0].id, id1); - - // Make a new list of zones that we're going to try with a bunch of - // different generation numbers. - let id2 = OmicronZoneUuid::new_v4(); - zones.push(OmicronZoneConfig { - id: id2, - zone_type: OmicronZoneType::InternalNtp { address }, - filesystem_pool: None, - image_source: OmicronZoneImageSource::InstallDataset, - }); - - // Now try to apply that list with an older generation number. This - // shouldn't work and the reported state should be unchanged. - let error = mgr - .ensure_all_omicron_zones_persistent(OmicronZonesConfig { - generation: v1, - zones: zones.clone(), - }) - .await - .expect_err("unexpectedly went backwards in zones generation"); - assert!(matches!( - error, - Error::RequestedZoneConfigOutdated { requested, current } - if requested == v1 && current == v2 - )); - let found2 = mgr.omicron_zones_list().await; - assert_eq!(found, found2); - - // Now try to apply that list with the same generation number that we - // used before. This shouldn't work either. - let error = mgr - .ensure_all_omicron_zones_persistent(OmicronZonesConfig { - generation: v2, - zones: zones.clone(), - }) - .await - .expect_err("unexpectedly changed a single zone generation"); - assert!(matches!( - error, - Error::RequestedConfigConflicts(vr) if vr == v2 - )); - let found3 = mgr.omicron_zones_list().await; - assert_eq!(found, found3); - - // But we should be able to apply this new list of zones as long as we - // advance the generation number. - let v3 = v2.next(); - mgr.ensure_all_omicron_zones_persistent(OmicronZonesConfig { - generation: v3, - zones: zones.clone(), - }) - .await - .expect("failed to remove all zones in a new generation"); - let found4 = mgr.omicron_zones_list().await; - assert_eq!(found4.generation, v3); - let mut our_zones = zones; - our_zones.sort_by(|a, b| a.id.cmp(&b.id)); - let mut found_zones = found4.zones; - found_zones.sort_by(|a, b| a.id.cmp(&b.id)); - assert_eq!(our_zones, found_zones); - - drop_service_manager(mgr).await; - - helper.cleanup().await; - logctx.cleanup_successful(); - } -} - #[cfg(test)] mod test { use super::*; - use nexus_sled_agent_shared::inventory::OmicronZoneImageSource; - use omicron_uuid_kinds::ZpoolUuid; use sled_agent_types::zone_bundle::ZoneBundleMetadata; #[test] @@ -5978,291 +3939,4 @@ mod test { &serde_json::to_string_pretty(&schema).unwrap(), ); } - - #[test] - fn test_all_zones_requests_schema() { - let schema = schemars::schema_for!(OmicronZonesConfigLocal); - expectorate::assert_contents( - "../schema/all-zones-requests.json", - &serde_json::to_string_pretty(&schema).unwrap(), - ); - } - - #[test] - fn test_fix_7229_zone_config_reconciliation() { - fn make_omicron_zone_config( - filesystem_pool: Option<&ZpoolName>, - ) -> OmicronZoneConfig { - OmicronZoneConfig { - id: OmicronZoneUuid::new_v4(), - filesystem_pool: filesystem_pool.cloned(), - zone_type: OmicronZoneType::Oximeter { - address: "[::1]:0".parse().unwrap(), - }, - image_source: OmicronZoneImageSource::InstallDataset, - } - } - - let logctx = - omicron_test_utils::dev::test_setup_log("test_ensure_service"); - let log = &logctx.log; - - let some_zpools = (0..10) - .map(|_| ZpoolName::new_external(ZpoolUuid::new_v4())) - .collect::>(); - - // Test 1: We have some zones; the new config makes no changes. - { - let mut existing = vec![ - ( - make_omicron_zone_config(None), - ZpoolOrRamdisk::Zpool(some_zpools[0]), - ), - ( - make_omicron_zone_config(Some(&some_zpools[1])), - ZpoolOrRamdisk::Zpool(some_zpools[1]), - ), - ( - make_omicron_zone_config(Some(&some_zpools[2])), - ZpoolOrRamdisk::Zpool(some_zpools[2]), - ), - ]; - let new_request = OmicronZonesConfig { - generation: Generation::new().next(), - zones: existing.iter().map(|(zone, _)| zone.clone()).collect(), - }; - let reconciled = reconcile_running_zones_with_new_request_impl( - existing.iter_mut().map(|(z, p)| (z, &*p)), - new_request.clone(), - log, - ) - .expect("reconciled successfully"); - assert_eq!(reconciled.zones_to_be_removed, HashSet::new()); - assert_eq!(reconciled.zones_to_be_added, HashSet::new()); - assert_eq!( - existing.iter().map(|(z, _)| z.clone()).collect::>(), - new_request.zones, - ); - } - - // Test 2: We have some zones; the new config changes `filesystem_pool` - // to match our runtime pools (i.e., the #7229 fix). - { - let mut existing = vec![ - ( - make_omicron_zone_config(None), - ZpoolOrRamdisk::Zpool(some_zpools[0]), - ), - ( - make_omicron_zone_config(None), - ZpoolOrRamdisk::Zpool(some_zpools[1]), - ), - ( - make_omicron_zone_config(None), - ZpoolOrRamdisk::Zpool(some_zpools[2]), - ), - ]; - let new_request = OmicronZonesConfig { - generation: Generation::new().next(), - zones: existing - .iter() - .enumerate() - .map(|(i, (zone, _))| { - let mut zone = zone.clone(); - zone.filesystem_pool = Some(some_zpools[i]); - zone - }) - .collect(), - }; - let reconciled = reconcile_running_zones_with_new_request_impl( - existing.iter_mut().map(|(z, p)| (z, &*p)), - new_request.clone(), - log, - ) - .expect("reconciled successfully"); - assert_eq!(reconciled.zones_to_be_removed, HashSet::new()); - assert_eq!(reconciled.zones_to_be_added, HashSet::new()); - assert_eq!( - existing.iter().map(|(z, _)| z.clone()).collect::>(), - new_request.zones, - ); - } - - // Test 3: We have some zones; the new config changes `filesystem_pool` - // to match our runtime pools (i.e., the #7229 fix) but also changes - // something else in the config for the final zone; we should attempt to - // remove and re-add that final zone. - { - let mut existing = vec![ - ( - make_omicron_zone_config(None), - ZpoolOrRamdisk::Zpool(some_zpools[0]), - ), - ( - make_omicron_zone_config(None), - ZpoolOrRamdisk::Zpool(some_zpools[1]), - ), - ( - make_omicron_zone_config(None), - ZpoolOrRamdisk::Zpool(some_zpools[2]), - ), - ]; - let new_request = OmicronZonesConfig { - generation: Generation::new().next(), - zones: existing - .iter() - .enumerate() - .map(|(i, (zone, _))| { - let mut zone = zone.clone(); - zone.filesystem_pool = Some(some_zpools[i]); - if i == 2 { - zone.zone_type = OmicronZoneType::Oximeter { - address: "[::1]:10000".parse().unwrap(), - }; - } - zone - }) - .collect(), - }; - let reconciled = reconcile_running_zones_with_new_request_impl( - existing.iter_mut().map(|(z, p)| (z, &*p)), - new_request.clone(), - log, - ) - .expect("reconciled successfully"); - assert_eq!( - reconciled.zones_to_be_removed, - HashSet::from([existing[2].0.clone()]), - ); - assert_eq!( - reconciled.zones_to_be_added, - HashSet::from([new_request.zones[2].clone()]), - ); - // The first two existing zones should have been updated to match - // the new request. - assert_eq!( - Vec::from_iter(existing[..2].iter().map(|(z, _)| z.clone())), - &new_request.zones[..2], - ); - } - - // Test 4: We have some zones; the new config changes `filesystem_pool` - // to match our runtime pools (i.e., the #7229 fix), except the new pool - // on the final zone is incorrect. We should get an error back. - { - let mut existing = vec![ - ( - make_omicron_zone_config(None), - ZpoolOrRamdisk::Zpool(some_zpools[0]), - ), - ( - make_omicron_zone_config(None), - ZpoolOrRamdisk::Zpool(some_zpools[1]), - ), - ( - make_omicron_zone_config(None), - ZpoolOrRamdisk::Zpool(some_zpools[2]), - ), - ]; - let existing_orig = - existing.iter().map(|(z, _)| z.clone()).collect::>(); - let new_request = OmicronZonesConfig { - generation: Generation::new().next(), - zones: existing - .iter() - .enumerate() - .map(|(i, (zone, _))| { - let mut zone = zone.clone(); - if i < 2 { - zone.filesystem_pool = Some(some_zpools[i]); - } else { - zone.filesystem_pool = Some(some_zpools[4]); - } - zone - }) - .collect(), - }; - let err = reconcile_running_zones_with_new_request_impl( - existing.iter_mut().map(|(z, p)| (z, &*p)), - new_request.clone(), - log, - ) - .expect_err("should not have reconciled successfully"); - - match err { - Error::InvalidFilesystemPoolZoneConfig { - zone_id, - expected_pool, - got_pool, - } => { - assert_eq!(zone_id, existing[2].0.id); - assert_eq!(expected_pool, some_zpools[2]); - assert_eq!(got_pool, some_zpools[4]); - } - _ => panic!("unexpected error: {err}"), - } - // reconciliation failed, so the contents of our existing configs - // should not have changed (even though a couple of the changes - // were okay, we should either take all or none to maintain - // consistency with the generation-tagged OmicronZonesConfig) - assert_eq!( - existing.iter().map(|(z, _)| z.clone()).collect::>(), - existing_orig, - ); - } - - // Test 5: We have some zones. The new config applies #7229 fix to the - // first zone, doesn't include the remaining zones, and adds some new - // zones. We should see "the remaining zones" removed, the "new zones" - // added, and the 7229-fixed zone not in either set. - { - let mut existing = vec![ - ( - make_omicron_zone_config(None), - ZpoolOrRamdisk::Zpool(some_zpools[0]), - ), - ( - make_omicron_zone_config(None), - ZpoolOrRamdisk::Zpool(some_zpools[1]), - ), - ( - make_omicron_zone_config(None), - ZpoolOrRamdisk::Zpool(some_zpools[2]), - ), - ]; - let new_request = OmicronZonesConfig { - generation: Generation::new().next(), - zones: vec![ - { - let mut z = existing[0].0.clone(); - z.filesystem_pool = Some(some_zpools[0]); - z - }, - make_omicron_zone_config(None), - make_omicron_zone_config(None), - ], - }; - let reconciled = reconcile_running_zones_with_new_request_impl( - existing.iter_mut().map(|(z, p)| (z, &*p)), - new_request.clone(), - log, - ) - .expect("reconciled successfully"); - - assert_eq!( - reconciled.zones_to_be_removed, - HashSet::from_iter( - existing[1..].iter().map(|(z, _)| z.clone()) - ), - ); - assert_eq!( - reconciled.zones_to_be_added, - HashSet::from_iter(new_request.zones[1..].iter().cloned()), - ); - // Only the first existing zone is being kept; ensure it matches the - // new request. - assert_eq!(existing[0].0, new_request.zones[0]); - } - logctx.cleanup_successful(); - } } diff --git a/sled-agent/src/sim/http_entrypoints.rs b/sled-agent/src/sim/http_entrypoints.rs index 7b90e089345..a84cf857d8e 100644 --- a/sled-agent/src/sim/http_entrypoints.rs +++ b/sled-agent/src/sim/http_entrypoints.rs @@ -26,7 +26,6 @@ use dropshot::TypedBody; use dropshot::endpoint; use nexus_sled_agent_shared::inventory::Inventory; use nexus_sled_agent_shared::inventory::OmicronSledConfig; -use nexus_sled_agent_shared::inventory::OmicronSledConfigResult; use nexus_sled_agent_shared::inventory::SledRole; use omicron_common::api::internal::nexus::DiskRuntimeState; use omicron_common::api::internal::nexus::SledVmmState; @@ -36,8 +35,6 @@ use omicron_common::api::internal::shared::VirtualNetworkInterfaceHost; use omicron_common::api::internal::shared::{ ResolvedVpcRouteSet, ResolvedVpcRouteState, SwitchPorts, }; -use omicron_common::disk::DatasetsConfig; -use omicron_common::disk::OmicronPhysicalDisksConfig; use range_requests::PotentialRange; use sled_agent_api::*; use sled_agent_types::boot_disk::BootDiskOsWriteStatus; @@ -340,28 +337,14 @@ impl SledAgentApi for SledAgentSimImpl { )) } - async fn datasets_get( - rqctx: RequestContext, - ) -> Result, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk(sa.datasets_config_list()?)) - } - - async fn omicron_physical_disks_get( - rqctx: RequestContext, - ) -> Result, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk(sa.omicron_physical_disks_list()?)) - } - async fn omicron_config_put( rqctx: RequestContext, body: TypedBody, - ) -> Result, HttpError> { + ) -> Result { let sa = rqctx.context(); let body_args = body.into_inner(); - let result = sa.set_omicron_config(body_args)?; - Ok(HttpResponseOk(result)) + sa.set_omicron_config(body_args)?; + Ok(HttpResponseUpdatedNoContent()) } async fn sled_add( @@ -658,12 +641,6 @@ impl SledAgentApi for SledAgentSimImpl { method_unimplemented() } - async fn zpools_get( - _rqctx: RequestContext, - ) -> Result>, HttpError> { - method_unimplemented() - } - async fn sled_role_get( _rqctx: RequestContext, ) -> Result, HttpError> { diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index 61adf7e0c59..46fdaa6a356 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -25,8 +25,8 @@ use dropshot::HttpError; use futures::Stream; use nexus_sled_agent_shared::inventory::{ ConfigReconcilerInventoryStatus, Inventory, InventoryDataset, - InventoryDisk, InventoryZpool, OmicronSledConfig, OmicronSledConfigResult, - OmicronZonesConfig, SledRole, + InventoryDisk, InventoryZpool, OmicronSledConfig, OmicronZonesConfig, + SledRole, }; use omicron_common::api::external::{ ByteCount, DiskState, Error, Generation, ResourceType, @@ -913,7 +913,9 @@ impl SledAgent { pub fn set_omicron_config( &self, config: OmicronSledConfig, - ) -> Result { + ) -> Result<(), HttpError> { + // TODO Update the simulator to work on `OmicronSledConfig` instead of + // the three separate legacy configs let disks_config = OmicronPhysicalDisksConfig { generation: config.generation, disks: config.disks.into_iter().collect(), @@ -926,16 +928,14 @@ impl SledAgent { generation: config.generation, zones: config.zones.into_iter().collect(), }; - let (disks, datasets) = { - let mut storage = self.storage.lock(); - let DisksManagementResult { status: disks } = - storage.omicron_physical_disks_ensure(disks_config)?; - let DatasetsManagementResult { status: datasets } = - storage.datasets_ensure(datasets_config)?; - (disks, datasets) - }; + + let mut storage = self.storage.lock(); + let _ = storage.omicron_physical_disks_ensure(disks_config)?; + let _ = storage.datasets_ensure(datasets_config)?; *self.fake_zones.lock().unwrap() = zones_config; - Ok(OmicronSledConfigResult { disks, datasets }) + //*self.sled_config.lock().unwrap() = Some(config); + + Ok(()) } pub fn omicron_zones_list(&self) -> OmicronZonesConfig { diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index cff825ba474..240d00d838b 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -11,19 +11,18 @@ use crate::bootstrap::early_networking::EarlyNetworkSetupError; use crate::config::Config; use crate::instance_manager::InstanceManager; use crate::long_running_tasks::LongRunningTaskHandles; -use crate::metrics::MetricsManager; +use crate::metrics::{MetricsManager, MetricsRequestQueue}; use crate::nexus::{ NexusClient, NexusNotifierHandle, NexusNotifierInput, NexusNotifierTask, }; -use crate::params::OmicronZoneTypeExt; use crate::probe_manager::ProbeManager; use crate::services::{self, ServiceManager}; -use crate::storage_monitor::StorageMonitorHandle; use crate::support_bundle::logs::SupportBundleLogs; use crate::support_bundle::storage::SupportBundleManager; use crate::vmm_reservoir::{ReservoirMode, VmmReservoirManager}; -use crate::zone_bundle; use crate::zone_bundle::BundleError; +use crate::zone_bundle::{self, ZoneBundler}; +use anyhow::anyhow; use bootstore::schemes::v0 as bootstore; use camino::Utf8PathBuf; use derive_more::From; @@ -31,10 +30,11 @@ use dropshot::HttpError; use futures::StreamExt; use futures::stream::FuturesUnordered; use illumos_utils::opte::PortManager; +use illumos_utils::running_zone::RunningZone; +use illumos_utils::zpool::PathInPool; +use itertools::Itertools as _; use nexus_sled_agent_shared::inventory::{ - ConfigReconcilerInventoryStatus, Inventory, InventoryDataset, - InventoryDisk, InventoryZpool, OmicronSledConfig, OmicronSledConfigResult, - OmicronZonesConfig, SledRole, + Inventory, OmicronSledConfig, OmicronZoneConfig, SledRole, }; use omicron_common::address::{ Ipv6Subnet, SLED_PREFIX, get_sled_address, get_switch_zone_address, @@ -49,13 +49,14 @@ use omicron_common::api::internal::shared::{ use omicron_common::backoff::{ BackoffError, retry_notify, retry_policy_internal_service_aggressive, }; -use omicron_common::disk::{ - DatasetsConfig, DatasetsManagementResult, DisksManagementResult, - OmicronPhysicalDisksConfig, -}; +use omicron_common::disk::M2Slot; use omicron_ddm_admin_client::Client as DdmAdminClient; use omicron_uuid_kinds::{GenericUuid, PropolisUuid, SledUuid}; -use sled_agent_api::Zpool; +use sled_agent_config_reconciler::{ + ConfigReconcilerHandle, ConfigReconcilerSpawnToken, InternalDisksReceiver, + LedgerNewConfigError, LedgerTaskError, ReconcilerInventory, + SledAgentArtifactStore, SledAgentFacilities, TimeSyncStatus, +}; use sled_agent_types::disk::DiskStateRequested; use sled_agent_types::early_networking::EarlyNetworkConfig; use sled_agent_types::instance::{ @@ -66,23 +67,25 @@ use sled_agent_types::sled::{BaseboardId, StartSledAgentRequest}; use sled_agent_types::time_sync::TimeSync; use sled_agent_types::zone_bundle::{ BundleUtilization, CleanupContext, CleanupCount, CleanupPeriod, - PriorityOrder, StorageLimit, ZoneBundleMetadata, + PriorityOrder, StorageLimit, ZoneBundleCause, ZoneBundleMetadata, }; use sled_diagnostics::SledDiagnosticsCmdError; use sled_diagnostics::SledDiagnosticsCmdOutput; -use sled_hardware::{HardwareManager, MemoryReservations, underlay}; +use sled_hardware::{ + HardwareManager, MemoryReservations, PooledDiskError, underlay, +}; use sled_hardware_types::Baseboard; use sled_hardware_types::underlay::BootstrapInterface; -use sled_storage::manager::StorageHandle; use slog::Logger; use slog_error_chain::InlineErrorChain; use sprockets_tls::keys::SprocketsConfig; use std::collections::BTreeMap; use std::net::{Ipv6Addr, SocketAddrV6}; use std::sync::Arc; +use tufaceous_artifact::ArtifactHash; use uuid::Uuid; -use illumos_utils::dladm::Dladm; +use illumos_utils::dladm::{Dladm, EtherstubVnic}; use illumos_utils::zone::Api; use illumos_utils::zone::Zones; @@ -115,9 +118,6 @@ pub enum Error { #[error("Failed to operate on underlay device: {0}")] Underlay(#[from] underlay::Error), - #[error("Failed to request firewall rules")] - FirewallRequest(#[source] nexus_client::Error), - #[error(transparent)] Services(#[from] crate::services::Error), @@ -130,9 +130,6 @@ pub enum Error { #[error("Error managing storage: {0}")] Storage(#[from] sled_storage::error::Error), - #[error("Error monitoring storage: {0}")] - StorageMonitor(#[from] crate::storage_monitor::Error), - #[error("Error updating: {0}")] Download(#[from] crate::updates::Error), @@ -171,6 +168,9 @@ pub enum Error { #[error(transparent)] RepoDepotStart(#[from] crate::artifact_store::StartError), + + #[error("Time not yet synchronized")] + TimeNotSynchronized, } impl From for omicron_common::api::external::Error { @@ -296,18 +296,15 @@ pub enum InventoryError { // system. #[error(transparent)] BadByteCount(#[from] ByteCountRangeError), - #[error("failed to get current ledgered disks")] - GetDisksConfig(#[source] sled_storage::error::Error), - #[error("failed to get current ledgered datasets")] - GetDatasetsConfig(#[source] sled_storage::error::Error), + #[error(transparent)] + InventoryError(#[from] sled_agent_config_reconciler::InventoryError), } impl From for omicron_common::api::external::Error { fn from(inventory_error: InventoryError) -> Self { match inventory_error { e @ (InventoryError::BadByteCount(..) - | InventoryError::GetDisksConfig(_) - | InventoryError::GetDatasetsConfig(_)) => { + | InventoryError::InventoryError(_)) => { omicron_common::api::external::Error::internal_error( &InlineErrorChain::new(&e).to_string(), ) @@ -338,12 +335,8 @@ struct SledAgentInner { // This is used for idempotence checks during RSS/Add-Sled internal APIs start_request: StartSledAgentRequest, - // Component of Sled Agent responsible for storage and dataset management. - storage: StorageHandle, - - // Component of Sled Agent responsible for monitoring storage and updating - // dump devices. - storage_monitor: StorageMonitorHandle, + // Handle to the sled-agent-config-reconciler system. + config_reconciler: Arc, // Component of Sled Agent responsible for managing Propolis instances. instances: InstanceManager, @@ -357,9 +350,6 @@ struct SledAgentInner { // Other Oxide-controlled services running on this Sled. services: ServiceManager, - // Connection to Nexus. - nexus_client: NexusClient, - // A mechanism for notifiying nexus about sled-agent updates nexus_notifier: NexusNotifierHandle, @@ -382,7 +372,7 @@ struct SledAgentInner { probes: ProbeManager, // Component of Sled Agent responsible for managing the artifact store. - repo_depot: dropshot::HttpServer>, + repo_depot: dropshot::HttpServer>>, } impl SledAgentInner { @@ -411,6 +401,7 @@ impl SledAgent { request: StartSledAgentRequest, services: ServiceManager, long_running_task_handles: LongRunningTaskHandles, + config_reconciler_spawn_token: ConfigReconcilerSpawnToken, ) -> Result { // Pass the "parent_log" to all subcomponents that want to set their own // "component" value. @@ -430,11 +421,12 @@ impl SledAgent { .cleanup_snapshots() .await; - let storage_manager = &long_running_task_handles.storage_manager; - let boot_disk = storage_manager - .get_latest_disks() - .await - .boot_disk() + let config_reconciler = + Arc::clone(&long_running_task_handles.config_reconciler); + let boot_disk_zpool = config_reconciler + .internal_disks_rx() + .current() + .boot_disk_zpool() .ok_or_else(|| Error::BootDiskNotFound)?; // Configure a swap device of the configured size before other system setup. @@ -443,7 +435,7 @@ impl SledAgent { info!(log, "Requested swap device of size {} GiB", sz); crate::swap_device::ensure_swap_device( &parent_log, - &boot_disk.1, + &boot_disk_zpool, sz, )?; } @@ -456,7 +448,8 @@ impl SledAgent { } info!(log, "Mounting backing filesystems"); - crate::backing_fs::ensure_backing_fs(&parent_log, &boot_disk.1).await?; + crate::backing_fs::ensure_backing_fs(&parent_log, &boot_disk_zpool) + .await?; // TODO-correctness Bootstrap-agent already ensures the underlay // etherstub and etherstub VNIC exist on startup - could it pass them @@ -549,7 +542,8 @@ impl SledAgent { nexus_client.clone(), instance_vnic_allocator, port_manager.clone(), - storage_manager.clone(), + config_reconciler.currently_managed_zpools_rx().clone(), + config_reconciler.available_datasets_rx(), long_running_task_handles.zone_bundler.clone(), vmm_reservoir_manager.clone(), metrics_manager.request_queue(), @@ -598,9 +592,30 @@ impl SledAgent { .await .expect( "Expected an infinite retry loop getting \ - network config from bootstore", + network config from bootstore", ); + let artifact_store = Arc::new( + ArtifactStore::new( + &log, + config_reconciler.internal_disks_rx().clone(), + Some(Arc::clone(&config_reconciler)), + ) + .await, + ); + + // Start reconciling against our ledgered sled config. + config_reconciler.spawn_reconciliation_task( + ReconcilerFacilities { + etherstub_vnic, + service_manager: services.clone(), + metrics_queue: metrics_manager.request_queue(), + zone_bundler: long_running_task_handles.zone_bundler.clone(), + }, + SledAgentArtifactStoreWrapper(Arc::clone(&artifact_store)), + config_reconciler_spawn_token, + ); + services .sled_agent_started( svc_config, @@ -612,14 +627,8 @@ impl SledAgent { ) .await?; - let repo_depot = ArtifactStore::new( - &log, - storage_manager.clone(), - Some(services.clone()), - ) - .await - .start(sled_address, &config.dropshot) - .await?; + let repo_depot = + artifact_store.start(sled_address, &config.dropshot).await?; // Spawn a background task for managing notifications to nexus // about this sled-agent. @@ -642,27 +651,26 @@ impl SledAgent { request.body.id.into_untyped_uuid(), nexus_client.clone(), etherstub.clone(), - storage_manager.clone(), port_manager.clone(), metrics_manager.request_queue(), + config_reconciler.available_datasets_rx(), log.new(o!("component" => "ProbeManager")), ); + let currently_managed_zpools_rx = + config_reconciler.currently_managed_zpools_rx().clone(); + let sled_agent = SledAgent { inner: Arc::new(SledAgentInner { id: request.body.id, subnet: request.body.subnet, start_request: request, - storage: long_running_task_handles.storage_manager.clone(), - storage_monitor: long_running_task_handles - .storage_monitor_handle - .clone(), + config_reconciler, instances, probes, hardware: long_running_task_handles.hardware_manager.clone(), port_manager, services, - nexus_client, nexus_notifier: nexus_notifier_handle, rack_network_config, zone_bundler: long_running_task_handles.zone_bundler.clone(), @@ -675,7 +683,7 @@ impl SledAgent { sprockets: config.sprockets.clone(), }; - sled_agent.inner.probes.run().await; + sled_agent.inner.probes.run(currently_managed_zpools_rx).await; // We immediately add a notification to the request queue about our // existence. If inspection of the hardware later informs us that we're @@ -686,66 +694,17 @@ impl SledAgent { Ok(sled_agent) } - /// Load services for which we're responsible. - /// - /// Blocks until all services have started, retrying indefinitely on - /// failure. - pub(crate) async fn load_services(&self) { - info!(self.log, "Loading cold boot services"); - retry_notify( - retry_policy_internal_service_aggressive(), - || async { - // Load as many services as we can, and don't exit immediately - // upon failure. - let load_services_result = - self.inner.services.load_services().await.map_err(|err| { - BackoffError::transient(Error::from(err)) - }); - - // If there wasn't any work to do, we're done immediately. - if matches!( - load_services_result, - Ok(services::LoadServicesResult::NoServicesToLoad) - ) { - info!( - self.log, - "load_services exiting early; no services to be loaded" - ); - return Ok(()); - } - - // Otherwise, request firewall rule updates for as many services as - // we can. Note that we still make this request even if we only - // partially load some services. - let firewall_result = self - .request_firewall_update() - .await - .map_err(|err| BackoffError::transient(err)); - - // Only complete if we have loaded all services and firewall - // rules successfully. - load_services_result.and(firewall_result) - }, - |err, delay| { - warn!( - self.log, - "Failed to load services, will retry in {:?}", delay; - "error" => ?err, - ); - }, - ) - .await - .unwrap(); // we retry forever, so this can't fail - } - /// Accesses the [SupportBundleManager] API. pub(crate) fn as_support_bundle_storage(&self) -> SupportBundleManager<'_> { - SupportBundleManager::new(&self.log, self.storage()) + SupportBundleManager::new(&self.log, &*self.inner.config_reconciler) } /// Accesses the [SupportBundleLogs] API. pub(crate) fn as_support_bundle_logs(&self) -> SupportBundleLogs<'_> { - SupportBundleLogs::new(&self.log, self.storage()) + SupportBundleLogs::new( + &self.log, + self.inner.config_reconciler.internal_disks_rx(), + ) } pub(crate) fn switch_zone_underlay_info( @@ -770,20 +729,6 @@ impl SledAgent { self.sprockets.clone() } - /// Requests firewall rules from Nexus. - /// - /// Does not retry upon failure. - async fn request_firewall_update(&self) -> Result<(), Error> { - let sled_id = self.inner.id; - - self.inner - .nexus_client - .sled_firewall_rules_request(&sled_id) - .await - .map_err(|err| Error::FirewallRequest(err))?; - Ok(()) - } - /// Trigger a request to Nexus informing it that the current sled exists, /// with information about the existing set of hardware. pub(crate) async fn notify_nexus_about_self(&self, log: &Logger) { @@ -874,212 +819,13 @@ impl SledAgent { self.inner.zone_bundler.cleanup().await.map_err(Error::from) } - pub async fn datasets_config_list(&self) -> Result { - Ok(self.storage().datasets_config_list().await?) - } - - async fn datasets_ensure( - &self, - config: DatasetsConfig, - ) -> Result { - info!(self.log, "datasets ensure"); - let datasets_result = self.storage().datasets_ensure(config).await?; - info!(self.log, "datasets ensure: Updated storage"); - - // TODO(https://github.com/oxidecomputer/omicron/issues/6177): - // At the moment, we don't actually remove any datasets -- this function - // just adds new datasets. - // - // Once we start removing old datasets, we should probably ensure that - // they are not longer in-use before returning (similar to - // omicron_physical_disks_ensure). - - Ok(datasets_result) - } - - /// Requests the set of physical disks currently managed by the Sled Agent. - /// - /// This should be contrasted by the set of disks in the inventory, which - /// may contain a slightly different set, if certain disks are not expected - /// to be in-use by the broader control plane. - pub async fn omicron_physical_disks_list( - &self, - ) -> Result { - Ok(self.storage().omicron_physical_disks_list().await?) - } - - /// Ensures that the specific set of Omicron Physical Disks are running - /// on this sled, and that no other disks are being used by the control - /// plane (with the exception of M.2s, which are always automatically - /// in-use). - async fn omicron_physical_disks_ensure( - &self, - config: OmicronPhysicalDisksConfig, - ) -> Result { - info!(self.log, "physical disks ensure"); - // Tell the storage subsystem which disks should be managed. - let disk_result = - self.storage().omicron_physical_disks_ensure(config).await?; - info!(self.log, "physical disks ensure: Updated storage"); - - // Grab a view of the latest set of disks, alongside a generation - // number. - // - // This generation is at LEAST as high as our last call through - // omicron_physical_disks_ensure. It may actually be higher, if a - // concurrent operation occurred. - // - // "latest_disks" has a generation number, which is important for other - // subcomponents of Sled Agent to consider. If multiple requests to - // ensure disks arrive concurrently, it's important to "only advance - // forward" as requested by Nexus. - // - // For example: if we receive the following requests concurrently: - // - Use Disks {A, B, C}, generation = 1 - // - Use Disks {A, B, C, D}, generation = 2 - // - // If we ignore generation numbers, it's possible that we start using - // "disk D" -- e.g., for instance filesystems -- and then immediately - // delete it when we process the request with "generation 1". - // - // By keeping these requests ordered, we prevent this thrashing, and - // ensure that we always progress towards the last-requested state. - let latest_disks = self.storage().get_latest_disks().await; - let our_gen = latest_disks.generation(); - info!(self.log, "physical disks ensure: Propagating new generation of disks"; "generation" => ?our_gen); - - // Ensure that the StorageMonitor, and the dump devices, have committed - // to start using new disks and stop using old ones. - self.inner.storage_monitor.await_generation(*our_gen).await?; - info!(self.log, "physical disks ensure: Updated storage monitor"); - - // Ensure that the ZoneBundler, if it was creating a bundle referencing - // the old U.2s, has stopped using them. - self.inner.zone_bundler.await_completion_of_prior_bundles().await; - info!(self.log, "physical disks ensure: Updated zone bundler"); - - // Ensure that all probes, at least after our call to - // "omicron_physical_disks_ensure", stop using any disks that - // may have been in-service from before that request. - self.inner.probes.use_only_these_disks(&latest_disks).await; - info!(self.log, "physical disks ensure: Updated probes"); - - // Do the same for instances - mark them failed if they were using - // expunged disks. - self.inner.instances.use_only_these_disks(latest_disks).await?; - info!(self.log, "physical disks ensure: Updated instances"); - - Ok(disk_result) - } - /// Ensures that the specific sets of disks, datasets, and zones specified /// by `config` are running. - /// - /// This method currently blocks while each of disks, datasets, and zones - /// are ensured in that order; a failure on one prevents any attempt to - /// ensure the subsequent step(s). pub async fn set_omicron_config( &self, config: OmicronSledConfig, - ) -> Result { - // Until the config-reconciler work lands: unpack the unified config - // into the three split configs for indepenedent ledgering. - let disks_config = OmicronPhysicalDisksConfig { - generation: config.generation, - disks: config.disks.into_iter().collect(), - }; - - let disks = self.omicron_physical_disks_ensure(disks_config).await?; - - // If we only had partial success deploying disks, don't proceed. - if disks.has_error() { - return Ok(OmicronSledConfigResult { - disks: disks.status, - datasets: Vec::new(), - }); - } - - let datasets_config = DatasetsConfig { - generation: config.generation, - datasets: config.datasets.into_iter().map(|d| (d.id, d)).collect(), - }; - - let datasets = self.datasets_ensure(datasets_config).await?; - - // If we only had partial success deploying datasets, don't proceed. - if datasets.has_error() { - return Ok(OmicronSledConfigResult { - disks: disks.status, - datasets: datasets.status, - }); - } - - let zones_config = OmicronZonesConfig { - generation: config.generation, - zones: config.zones.into_iter().collect(), - }; - - self.omicron_zones_ensure(zones_config).await?; - - Ok(OmicronSledConfigResult { - disks: disks.status, - datasets: datasets.status, - }) - } - - /// Ensures that the specific set of Omicron zones are running as configured - /// (and that no other zones are running) - async fn omicron_zones_ensure( - &self, - requested_zones: OmicronZonesConfig, - ) -> Result<(), Error> { - // TODO(https://github.com/oxidecomputer/omicron/issues/6043): - // - If these are the set of filesystems, we should also consider - // removing the ones which are not listed here. - // - It's probably worth sending a bulk request to the storage system, - // rather than requesting individual datasets. - for zone in &requested_zones.zones { - let Some(dataset_name) = zone.dataset_name() else { - continue; - }; - - // NOTE: This code will be deprecated by https://github.com/oxidecomputer/omicron/pull/7160 - // - // However, we need to ensure that all blueprints have datasets - // within them before we can remove this back-fill. - // - // Therefore, we do something hairy here: We ensure the filesystem - // exists, but don't specify any dataset UUID value. - // - // This means that: - // - If the dataset exists and has a UUID, this will be a no-op - // - If the dataset doesn't exist, it'll be created without its - // oxide:uuid zfs property set - // - If a subsequent call to "datasets_ensure" tries to set a UUID, - // it should be able to get set (once). - self.inner.storage.upsert_filesystem(None, dataset_name).await?; - } - - self.inner - .services - .ensure_all_omicron_zones_persistent(requested_zones) - .await?; - Ok(()) - } - - /// Gets the sled's current list of all zpools. - pub async fn zpools_get(&self) -> Vec { - self.inner - .storage - .get_latest_disks() - .await - .get_all_zpools() - .into_iter() - .map(|(name, variant)| Zpool { - id: name.id(), - disk_type: variant.into(), - }) - .collect() + ) -> Result, LedgerTaskError> { + self.inner.config_reconciler.set_sled_config(config).await } /// Returns whether or not the sled believes itself to be a scrimlet @@ -1192,7 +938,7 @@ impl SledAgent { todo!("Disk attachment not yet implemented"); } - pub fn artifact_store(&self) -> &ArtifactStore { + pub fn artifact_store(&self) -> &ArtifactStore { &self.inner.repo_depot.app_private() } @@ -1249,7 +995,18 @@ impl SledAgent { /// Gets the sled's current time synchronization state pub async fn timesync_get(&self) -> Result { - self.inner.services.timesync_get().await.map_err(Error::from) + let status = self.inner.config_reconciler.timesync_status(); + + // TODO-cleanup we could give a more specific error cause in the + // `FailedToGetSyncStatus` case. + match status { + TimeSyncStatus::NotYetChecked + | TimeSyncStatus::ConfiguredToSkip + | TimeSyncStatus::FailedToGetSyncStatus(_) => { + Err(Error::TimeNotSynchronized) + } + TimeSyncStatus::TimeSync(time_sync) => Ok(time_sync), + } } pub async fn ensure_scrimlet_host_ports( @@ -1314,8 +1071,15 @@ impl SledAgent { Ok(()) } - pub(crate) fn storage(&self) -> &StorageHandle { - &self.inner.storage + pub(crate) fn boot_image_raw_devfs_path( + &self, + slot: M2Slot, + ) -> Option>> { + self.inner + .config_reconciler + .internal_disks_rx() + .current() + .image_raw_devfs_path(slot) } pub(crate) fn boot_disk_os_writer(&self) -> &BootDiskOsWriter { @@ -1359,94 +1123,14 @@ impl SledAgent { let sled_role = if is_scrimlet { SledRole::Scrimlet } else { SledRole::Gimlet }; - let mut disks = vec![]; - let mut zpools = vec![]; - let mut datasets = vec![]; - let (all_disks, disks_config, datasets_config, omicron_zones) = tokio::join!( - self.storage().get_latest_disks(), - self.storage().omicron_physical_disks_list(), - self.storage().datasets_config_list(), - self.inner.services.omicron_zones_list() - ); - - // RSS asks for our inventory _before_ it sends us an - // `OmicronSledConfig`; echo back the default (empty) disk and dataset - // configs if we have no ledger at all. - let disks_config = match disks_config { - Ok(disks_config) => disks_config, - Err(sled_storage::error::Error::LedgerNotFound) => { - OmicronPhysicalDisksConfig::default() - } - Err(err) => return Err(InventoryError::GetDisksConfig(err)), - }; - let datasets_config = match datasets_config { - Ok(datasets_config) => datasets_config, - Err(sled_storage::error::Error::LedgerNotFound) => { - DatasetsConfig::default() - } - Err(err) => return Err(InventoryError::GetDatasetsConfig(err)), - }; - - for (identity, variant, slot, firmware) in all_disks.iter_all() { - disks.push(InventoryDisk { - identity: identity.clone(), - variant, - slot, - active_firmware_slot: firmware.active_slot(), - next_active_firmware_slot: firmware.next_active_slot(), - number_of_firmware_slots: firmware.number_of_slots(), - slot1_is_read_only: firmware.slot1_read_only(), - slot_firmware_versions: firmware.slots().to_vec(), - }); - } - for zpool in all_disks.all_u2_zpools() { - let info = - match illumos_utils::zpool::Zpool::get_info(&zpool.to_string()) - .await - { - Ok(info) => info, - Err(err) => { - warn!( - self.log, - "Failed to access zpool info"; - "zpool" => %zpool, - "err" => %err - ); - continue; - } - }; - - zpools.push(InventoryZpool { - id: zpool.id(), - total_size: ByteCount::try_from(info.size())?, - }); - - let inv_props = match self.storage().datasets_list(zpool).await { - Ok(props) => { - props.into_iter().map(|prop| InventoryDataset::from(prop)) - } - Err(err) => { - warn!( - self.log, - "Failed to access dataset info within zpool"; - "zpool" => %zpool, - "err" => %err - ); - continue; - } - }; - datasets.extend(inv_props); - } - - // Reassemble our combined sled config from its separate pieces. (This - // will go away once we start ledgering the config as a single unit.) - let sled_config = OmicronSledConfig { - generation: omicron_zones.generation, - disks: disks_config.disks.into_iter().collect(), - datasets: datasets_config.datasets.into_values().collect(), - zones: omicron_zones.zones.into_iter().collect(), - remove_mupdate_override: None, - }; + let ReconcilerInventory { + disks, + zpools, + datasets, + ledgered_sled_config, + reconciler_status, + last_reconciliation, + } = self.inner.config_reconciler.inventory(&self.log).await?; Ok(Inventory { sled_id, @@ -1459,11 +1143,9 @@ impl SledAgent { disks, zpools, datasets, - // These fields will come from the reconciler once it's integrated. - // For now, we can report our ledgered config but nothing else. - ledgered_sled_config: Some(sled_config), - reconciler_status: ConfigReconcilerInventoryStatus::NotYetRun, - last_reconciliation: None, + ledgered_sled_config, + reconciler_status, + last_reconciliation, }) } @@ -1625,3 +1307,78 @@ pub async fn sled_add( info!(log, "Peer agent initialized"; "peer_bootstrap_addr" => %bootstrap_addr, "peer_id" => %baseboard); Ok(()) } + +struct ReconcilerFacilities { + etherstub_vnic: EtherstubVnic, + service_manager: ServiceManager, + metrics_queue: MetricsRequestQueue, + zone_bundler: ZoneBundler, +} + +impl SledAgentFacilities for ReconcilerFacilities { + fn underlay_vnic(&self) -> &EtherstubVnic { + &self.etherstub_vnic + } + + async fn on_time_sync(&self) { + self.service_manager.on_time_sync().await + } + + async fn start_omicron_zone( + &self, + zone_config: &OmicronZoneConfig, + zone_root_path: PathInPool, + ) -> anyhow::Result { + let zone = self + .service_manager + .start_omicron_zone(zone_config, zone_root_path) + .await?; + Ok(zone) + } + + fn metrics_untrack_zone_links( + &self, + zone: &RunningZone, + ) -> anyhow::Result<()> { + match self.metrics_queue.untrack_zone_links(zone) { + Ok(()) => Ok(()), + Err(errors) => { + let mut errors = + errors.iter().map(|err| InlineErrorChain::new(err)); + Err(anyhow!( + "{} errors untracking zone links: {}", + errors.len(), + errors.join(", ") + )) + } + } + } + + fn ddm_remove_internal_dns_prefix(&self, prefix: Ipv6Subnet) { + self.service_manager + .ddm_reconciler() + .remove_internal_dns_subnet(prefix); + } + + async fn zone_bundle_create( + &self, + zone: &RunningZone, + cause: ZoneBundleCause, + ) -> anyhow::Result<()> { + self.zone_bundler.create(zone, cause).await?; + Ok(()) + } +} + +// Workaround wrapper for orphan rules. +struct SledAgentArtifactStoreWrapper(Arc>); + +impl SledAgentArtifactStore for SledAgentArtifactStoreWrapper { + async fn validate_artifact_exists_in_storage( + &self, + artifact: ArtifactHash, + ) -> anyhow::Result<()> { + self.0.get(artifact).await?; + Ok(()) + } +} diff --git a/sled-agent/src/storage_monitor.rs b/sled-agent/src/storage_monitor.rs deleted file mode 100644 index 626d81d54ff..00000000000 --- a/sled-agent/src/storage_monitor.rs +++ /dev/null @@ -1,116 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! A task that listens for storage events from [`sled_storage::manager::StorageManager`] -//! and dispatches them to other parts of the bootstrap agent and sled agent -//! code. - -use omicron_common::api::external::Generation; -use sled_agent_config_reconciler::dump_setup::DumpSetup; -use sled_storage::config::MountConfig; -use sled_storage::manager::StorageHandle; -use sled_storage::resources::AllDisks; -use slog::Logger; -use std::sync::Arc; -use tokio::sync::watch; - -#[derive(thiserror::Error, Debug)] -pub enum Error { - #[error("Storage Monitor no longer running")] - NotRunning, -} - -pub struct StorageMonitor { - log: Logger, - storage_manager: StorageHandle, - - // Invokes dumpadm(8) and savecore(8) when new disks are encountered - dump_setup: DumpSetup, - - tx: watch::Sender, -} - -/// Emits status about storage monitoring. -#[derive(Debug, Clone)] -pub struct StorageMonitorStatus { - /// The latest generation of physical disks to be processed - /// by the storage monitor. - pub latest_gen: Option, -} - -impl StorageMonitorStatus { - fn new() -> Self { - Self { latest_gen: None } - } -} - -#[derive(Clone)] -pub struct StorageMonitorHandle { - rx: watch::Receiver, -} - -impl StorageMonitorHandle { - pub async fn await_generation( - &self, - wanted: Generation, - ) -> Result<(), Error> { - self.rx - .clone() - .wait_for(|status| { - let Some(observed) = status.latest_gen else { - return false; - }; - return observed >= wanted; - }) - .await - .map_err(|_| Error::NotRunning)?; - Ok(()) - } -} - -impl StorageMonitor { - pub fn new( - log: &Logger, - mount_config: MountConfig, - storage_manager: StorageHandle, - ) -> (StorageMonitor, StorageMonitorHandle) { - let dump_setup = DumpSetup::new(&log, Arc::new(mount_config)); - let log = log.new(o!("component" => "StorageMonitor")); - let (tx, rx) = watch::channel(StorageMonitorStatus::new()); - ( - StorageMonitor { log, storage_manager, dump_setup, tx }, - StorageMonitorHandle { rx }, - ) - } - - /// Run the main receive loop of the `StorageMonitor` - /// - /// This should be spawned into a tokio task - pub async fn run(mut self) { - loop { - tokio::select! { - disks = self.storage_manager.wait_for_changes() => { - info!( - self.log, - "Received storage manager update"; - "disks" => ?disks - ); - self.handle_resource_update(disks).await; - } - } - } - } - - async fn handle_resource_update(&mut self, updated_disks: AllDisks) { - let generation = updated_disks.generation(); - self.dump_setup - .update_dumpdev_setup( - updated_disks.iter_managed().map(|(_id, disk)| disk), - ) - .await; - self.tx.send_replace(StorageMonitorStatus { - latest_gen: Some(*generation), - }); - } -} diff --git a/sled-agent/src/support_bundle/logs.rs b/sled-agent/src/support_bundle/logs.rs index 3467c8663d0..c68a3bdedde 100644 --- a/sled-agent/src/support_bundle/logs.rs +++ b/sled-agent/src/support_bundle/logs.rs @@ -7,7 +7,7 @@ use camino_tempfile::tempfile_in; use dropshot::HttpError; use range_requests::make_get_response; -use sled_storage::manager::StorageHandle; +use sled_agent_config_reconciler::InternalDisksReceiver; use slog::Logger; use slog_error_chain::InlineErrorChain; use tokio::io::AsyncSeekExt; @@ -43,12 +43,15 @@ impl From for HttpError { pub struct SupportBundleLogs<'a> { log: &'a Logger, - sled_storage: &'a StorageHandle, + internal_disks_rx: &'a InternalDisksReceiver, } impl<'a> SupportBundleLogs<'a> { - pub fn new(log: &'a Logger, sled_storage: &'a StorageHandle) -> Self { - Self { log, sled_storage } + pub fn new( + log: &'a Logger, + internal_disks_rx: &'a InternalDisksReceiver, + ) -> Self { + Self { log, internal_disks_rx } } /// Get a list of zones on a sled containing logs that we want to include in @@ -77,12 +80,9 @@ impl<'a> SupportBundleLogs<'a> { { // We are using an M.2 device for temporary storage to assemble a zip // file made up of all of the discovered zone's logs. - let m2_debug_datasets = self - .sled_storage - .get_latest_disks() - .await - .all_sled_diagnostics_directories(); - let tempdir = m2_debug_datasets.first().ok_or(Error::MissingStorage)?; + let current_internal_disks = self.internal_disks_rx.current(); + let mut m2_debug_datasets = current_internal_disks.all_debug_datasets(); + let tempdir = m2_debug_datasets.next().ok_or(Error::MissingStorage)?; let mut tempfile = tempfile_in(tempdir)?; let log = self.log.clone(); diff --git a/sled-agent/src/support_bundle/storage.rs b/sled-agent/src/support_bundle/storage.rs index 4e3d781e146..34e6bf37480 100644 --- a/sled-agent/src/support_bundle/storage.rs +++ b/sled-agent/src/support_bundle/storage.rs @@ -28,15 +28,20 @@ use range_requests::PotentialRange; use range_requests::SingleRange; use sha2::{Digest, Sha256}; use sled_agent_api::*; +use sled_agent_config_reconciler::ConfigReconcilerHandle; +use sled_agent_config_reconciler::DatasetTaskError; +use sled_agent_config_reconciler::InventoryError; +use sled_agent_config_reconciler::NestedDatasetDestroyError; +use sled_agent_config_reconciler::NestedDatasetEnsureError; +use sled_agent_config_reconciler::NestedDatasetListError; +use sled_agent_config_reconciler::NestedDatasetMountError; use sled_agent_types::support_bundle::BUNDLE_FILE_NAME; use sled_agent_types::support_bundle::BUNDLE_TMP_FILE_NAME_SUFFIX; use sled_storage::manager::NestedDatasetConfig; use sled_storage::manager::NestedDatasetListOptions; use sled_storage::manager::NestedDatasetLocation; -use sled_storage::manager::StorageHandle; use slog::Logger; use slog_error_chain::InlineErrorChain; -use std::borrow::Cow; use std::io::Write; use tokio::io::AsyncReadExt; use tokio::io::AsyncSeekExt; @@ -86,6 +91,24 @@ pub enum Error { #[error(transparent)] Zip(#[from] ZipError), + + #[error(transparent)] + DatasetTask(#[from] DatasetTaskError), + + #[error("Could not access ledgered sled config")] + LedgeredSledConfig(#[source] InventoryError), + + #[error(transparent)] + NestedDatasetMountError(#[from] NestedDatasetMountError), + + #[error(transparent)] + NestedDatasetEnsureError(#[from] NestedDatasetEnsureError), + + #[error(transparent)] + NestedDatasetDestroyError(#[from] NestedDatasetDestroyError), + + #[error(transparent)] + NestedDatasetListError(#[from] NestedDatasetListError), } fn err_str(err: &dyn std::error::Error) -> String { @@ -144,13 +167,12 @@ pub trait LocalStorage: Sync { async fn dyn_ensure_mounted_and_get_mountpoint( &self, dataset: NestedDatasetLocation, - mount_root: &Utf8Path, ) -> Result; /// Returns all nested datasets within an existing dataset async fn dyn_nested_dataset_list( &self, - name: NestedDatasetLocation, + name: DatasetName, options: NestedDatasetListOptions, ) -> Result, Error>; @@ -165,19 +187,28 @@ pub trait LocalStorage: Sync { &self, name: NestedDatasetLocation, ) -> Result<(), Error>; - - /// Returns the root filesystem path where datasets are mounted. - /// - /// This is typically "/" in prod, but can be a temporary directory - /// for tests to isolate storage that typically appears globally. - fn zpool_mountpoint_root(&self) -> Cow; } /// This implementation is effectively a pass-through to the real methods #[async_trait] -impl LocalStorage for StorageHandle { +impl LocalStorage for ConfigReconcilerHandle { async fn dyn_datasets_config_list(&self) -> Result { - self.datasets_config_list().await.map_err(|err| err.into()) + // TODO-cleanup This is super gross; add a better API (maybe fetch a + // single dataset by ID, since that's what our caller wants?) + let sled_config = + self.ledgered_sled_config().map_err(Error::LedgeredSledConfig)?; + let sled_config = match sled_config { + Some(config) => config, + None => return Ok(DatasetsConfig::default()), + }; + Ok(DatasetsConfig { + generation: sled_config.generation, + datasets: sled_config + .datasets + .into_iter() + .map(|d| (d.id, d)) + .collect(), + }) } async fn dyn_dataset_get( @@ -205,38 +236,42 @@ impl LocalStorage for StorageHandle { async fn dyn_ensure_mounted_and_get_mountpoint( &self, dataset: NestedDatasetLocation, - mount_root: &Utf8Path, ) -> Result { - dataset - .ensure_mounted_and_get_mountpoint(mount_root) + self.nested_dataset_ensure_mounted(dataset) .await + .map_err(Error::from)? .map_err(Error::from) } async fn dyn_nested_dataset_list( &self, - name: NestedDatasetLocation, + name: DatasetName, options: NestedDatasetListOptions, ) -> Result, Error> { - self.nested_dataset_list(name, options).await.map_err(|err| err.into()) + self.nested_dataset_list(name, options) + .await + .map_err(Error::from)? + .map_err(Error::from) } async fn dyn_nested_dataset_ensure( &self, config: NestedDatasetConfig, ) -> Result<(), Error> { - self.nested_dataset_ensure(config).await.map_err(|err| err.into()) + self.nested_dataset_ensure(config) + .await + .map_err(Error::from)? + .map_err(Error::from) } async fn dyn_nested_dataset_destroy( &self, name: NestedDatasetLocation, ) -> Result<(), Error> { - self.nested_dataset_destroy(name).await.map_err(|err| err.into()) - } - - fn zpool_mountpoint_root(&self) -> Cow { - Cow::Borrowed(self.mount_config().root.as_path()) + self.nested_dataset_destroy(name) + .await + .map_err(Error::from)? + .map_err(Error::from) } } @@ -257,18 +292,23 @@ impl LocalStorage for crate::sim::Storage { async fn dyn_ensure_mounted_and_get_mountpoint( &self, dataset: NestedDatasetLocation, - mount_root: &Utf8Path, ) -> Result { + let slf = self.lock(); // Simulated storage treats all datasets as mounted. - Ok(dataset.mountpoint(mount_root)) + Ok(dataset.mountpoint(slf.root())) } async fn dyn_nested_dataset_list( &self, - name: NestedDatasetLocation, + name: DatasetName, options: NestedDatasetListOptions, ) -> Result, Error> { - self.lock().nested_dataset_list(name, options).map_err(|err| err.into()) + self.lock() + .nested_dataset_list( + NestedDatasetLocation { path: String::new(), root: name }, + options, + ) + .map_err(|err| err.into()) } async fn dyn_nested_dataset_ensure( @@ -284,10 +324,6 @@ impl LocalStorage for crate::sim::Storage { ) -> Result<(), Error> { self.lock().nested_dataset_destroy(name).map_err(|err| err.into()) } - - fn zpool_mountpoint_root(&self) -> Cow { - Cow::Owned(self.lock().root().to_path_buf()) - } } /// Describes the type of access to the support bundle @@ -489,12 +525,10 @@ impl<'a> SupportBundleManager<'a> { ) -> Result, Error> { let root = self.get_mounted_dataset_config(zpool_id, dataset_id).await?.name; - let dataset_location = - NestedDatasetLocation { path: String::from(""), root }; let datasets = self .storage .dyn_nested_dataset_list( - dataset_location, + root, NestedDatasetListOptions::ChildrenOnly, ) .await?; @@ -512,10 +546,7 @@ impl<'a> SupportBundleManager<'a> { // The dataset for a support bundle exists. let support_bundle_path = self .storage - .dyn_ensure_mounted_and_get_mountpoint( - dataset.name, - &self.storage.zpool_mountpoint_root(), - ) + .dyn_ensure_mounted_and_get_mountpoint(dataset.name) .await? .join(BUNDLE_FILE_NAME); @@ -625,13 +656,8 @@ impl<'a> SupportBundleManager<'a> { info!(log, "Dataset does exist for bundle"); // The mounted root of the support bundle dataset - let support_bundle_dir = self - .storage - .dyn_ensure_mounted_and_get_mountpoint( - dataset, - &self.storage.zpool_mountpoint_root(), - ) - .await?; + let support_bundle_dir = + self.storage.dyn_ensure_mounted_and_get_mountpoint(dataset).await?; let support_bundle_path = support_bundle_dir.join(BUNDLE_FILE_NAME); let support_bundle_path_tmp = support_bundle_dir.join(format!( "{}-{BUNDLE_TMP_FILE_NAME_SUFFIX}", @@ -737,13 +763,8 @@ impl<'a> SupportBundleManager<'a> { NestedDatasetLocation { path: support_bundle_id.to_string(), root }; // The mounted root of the support bundle dataset - let support_bundle_dir = self - .storage - .dyn_ensure_mounted_and_get_mountpoint( - dataset, - &self.storage.zpool_mountpoint_root(), - ) - .await?; + let support_bundle_dir = + self.storage.dyn_ensure_mounted_and_get_mountpoint(dataset).await?; let path = support_bundle_dir.join(BUNDLE_FILE_NAME); let f = tokio::fs::File::open(&path).await?; @@ -944,11 +965,84 @@ mod tests { use omicron_common::disk::DatasetsConfig; use omicron_common::zpool_name::ZpoolName; use omicron_test_utils::dev::test_setup_log; + use sled_storage::manager::StorageHandle; use sled_storage::manager_test_harness::StorageManagerTestHarness; use std::collections::BTreeMap; use zip::ZipWriter; use zip::write::SimpleFileOptions; + // TODO-cleanup Should we rework these tests to not use StorageHandle (real + // code now goes through `ConfigReconcilerHandle`)? + #[async_trait] + impl LocalStorage for StorageHandle { + async fn dyn_datasets_config_list( + &self, + ) -> Result { + self.datasets_config_list().await.map_err(|err| err.into()) + } + + async fn dyn_dataset_get( + &self, + dataset_name: &String, + ) -> Result { + let Some(dataset) = + illumos_utils::zfs::Zfs::get_dataset_properties( + &[dataset_name.clone()], + illumos_utils::zfs::WhichDatasets::SelfOnly, + ) + .await + .map_err(|err| Error::DatasetLookup(err))? + .pop() + else { + // This should not be possible, unless the "zfs get" command is + // behaving unpredictably. We're only asking for a single dataset, + // so on success, we should see the result of that dataset. + return Err(Error::DatasetLookup(anyhow::anyhow!( + "Zfs::get_dataset_properties returned an empty vec?" + ))); + }; + + Ok(dataset) + } + + async fn dyn_ensure_mounted_and_get_mountpoint( + &self, + dataset: NestedDatasetLocation, + ) -> Result { + dataset + .ensure_mounted_and_get_mountpoint(&self.mount_config().root) + .await + .map_err(|err| err.into()) + } + + async fn dyn_nested_dataset_list( + &self, + name: DatasetName, + options: NestedDatasetListOptions, + ) -> Result, Error> { + self.nested_dataset_list( + NestedDatasetLocation { path: String::new(), root: name }, + options, + ) + .await + .map_err(|err| err.into()) + } + + async fn dyn_nested_dataset_ensure( + &self, + config: NestedDatasetConfig, + ) -> Result<(), Error> { + self.nested_dataset_ensure(config).await.map_err(|err| err.into()) + } + + async fn dyn_nested_dataset_destroy( + &self, + name: NestedDatasetLocation, + ) -> Result<(), Error> { + self.nested_dataset_destroy(name).await.map_err(|err| err.into()) + } + } + struct SingleU2StorageHarness { storage_test_harness: StorageManagerTestHarness, zpool_id: ZpoolUuid, diff --git a/sled-agent/src/zone_bundle.rs b/sled-agent/src/zone_bundle.rs index 24a13b8b2a2..5df159a6c8b 100644 --- a/sled-agent/src/zone_bundle.rs +++ b/sled-agent/src/zone_bundle.rs @@ -27,9 +27,9 @@ use illumos_utils::zfs::Snapshot; use illumos_utils::zfs::ZFS; use illumos_utils::zfs::Zfs; use illumos_utils::zone::AdmError; +use sled_agent_config_reconciler::AvailableDatasetsReceiver; +use sled_agent_config_reconciler::InternalDisksReceiver; use sled_agent_types::zone_bundle::*; -use sled_storage::dataset::U2_DEBUG_DATASET; -use sled_storage::manager::StorageHandle; use slog::Logger; use std::collections::BTreeMap; use std::collections::BTreeSet; @@ -139,7 +139,8 @@ pub struct ZoneBundler { // State shared between tasks, e.g., used when creating a bundle in different // tasks or between a creation and cleanup. struct Inner { - storage_handle: StorageHandle, + internal_disks_rx: InternalDisksReceiver, + available_datasets_rx: AvailableDatasetsReceiver, cleanup_context: CleanupContext, last_cleanup_at: Instant, } @@ -167,11 +168,11 @@ impl Inner { // that can exist but do not, i.e., those whose parent datasets already // exist; and returns those. async fn bundle_directories(&self) -> Vec { - let resources = self.storage_handle.get_latest_disks().await; // NOTE: These bundle directories are always stored on M.2s, so we don't // need to worry about synchronizing with U.2 disk expungement at the // callsite. - let expected = resources.all_zone_bundle_directories(); + let internal_disks = self.internal_disks_rx.current(); + let expected = internal_disks.all_zone_bundle_directories(); let mut out = Vec::with_capacity(expected.len()); for each in expected.into_iter() { if tokio::fs::create_dir_all(&each).await.is_ok() { @@ -236,7 +237,8 @@ impl ZoneBundler { /// to clean them up to free up space. pub async fn new( log: Logger, - storage_handle: StorageHandle, + internal_disks_rx: InternalDisksReceiver, + available_datasets_rx: AvailableDatasetsReceiver, cleanup_context: CleanupContext, ) -> Self { // This is compiled out in tests because there's no way to set our @@ -255,7 +257,8 @@ impl ZoneBundler { .expect("Failed to initialize existing ZFS resources"); let notify_cleanup = Arc::new(Notify::new()); let inner = Arc::new(Mutex::new(Inner { - storage_handle, + internal_disks_rx, + available_datasets_rx, cleanup_context, last_cleanup_at: Instant::now(), })); @@ -356,9 +359,9 @@ impl ZoneBundler { // prior bundles have completed. let inner = self.inner.lock().await; let storage_dirs = inner.bundle_directories().await; - let resources = inner.storage_handle.get_latest_disks().await; - let extra_log_dirs = resources - .all_u2_mountpoints(U2_DEBUG_DATASET) + let extra_log_dirs = inner + .available_datasets_rx + .all_mounted_debug_datasets() .into_iter() .map(|pool_path| pool_path.path) .collect(); @@ -1766,7 +1769,10 @@ mod illumos_tests { use chrono::TimeZone; use chrono::Timelike; use chrono::Utc; + use omicron_common::disk::DiskIdentity; use rand::RngCore; + use sled_agent_config_reconciler::AvailableDatasetsReceiver; + use sled_agent_config_reconciler::InternalDisksReceiver; use sled_storage::manager_test_harness::StorageManagerTestHarness; use slog::Drain; use slog::Logger; @@ -1891,9 +1897,38 @@ mod illumos_tests { let log = test_logger(); let context = CleanupContext::default(); let resource_wrapper = ResourceWrapper::new(&log).await; + let handle = resource_wrapper.storage_test_harness.handle(); + let all_disks = handle.get_latest_disks().await; + + // Convert from StorageManagerTestHarness to config-reconciler channels. + // Do we want to expand config-reconciler test support and not use + // StorageManagerTestHarness? + let internal_disks_rx = InternalDisksReceiver::fake_static( + Arc::new(all_disks.mount_config().clone()), + all_disks.all_m2_zpools().into_iter().enumerate().map( + |(i, zpool)| { + ( + DiskIdentity { + vendor: format!("test-vendor-{i}"), + model: format!("test-model-{i}"), + serial: format!("test-serial-{i}"), + }, + zpool, + ) + }, + ), + ); + let available_datasets_rx = AvailableDatasetsReceiver::fake_static( + all_disks + .all_m2_zpools() + .into_iter() + .zip(all_disks.all_m2_mountpoints(".")), + ); + let bundler = ZoneBundler::new( log, - resource_wrapper.storage_test_harness.handle().clone(), + internal_disks_rx, + available_datasets_rx, context, ) .await; diff --git a/sled-agent/zone-images/Cargo.toml b/sled-agent/zone-images/Cargo.toml index c4227f0fc7d..1edba3dfa2b 100644 --- a/sled-agent/zone-images/Cargo.toml +++ b/sled-agent/zone-images/Cargo.toml @@ -16,6 +16,7 @@ nexus-sled-agent-shared.workspace = true omicron-common.workspace = true omicron-workspace-hack.workspace = true serde_json.workspace = true +sled-agent-config-reconciler.workspace = true sled-storage.workspace = true slog.workspace = true slog-error-chain.workspace = true @@ -26,3 +27,4 @@ camino-tempfile-ext.workspace = true dropshot.workspace = true omicron-uuid-kinds.workspace = true pretty_assertions.workspace = true +sled-agent-config-reconciler = { workspace = true, features = ["testing"] } diff --git a/sled-agent/zone-images/src/mupdate_override.rs b/sled-agent/zone-images/src/mupdate_override.rs index bbfb132f9c2..ed9f56a0b7a 100644 --- a/sled-agent/zone-images/src/mupdate_override.rs +++ b/sled-agent/zone-images/src/mupdate_override.rs @@ -11,7 +11,6 @@ use std::fs::FileType; use std::io; use std::sync::Arc; -use crate::ZoneImageZpools; use camino::Utf8Path; use camino::Utf8PathBuf; use iddqd::IdOrdItem; @@ -19,7 +18,7 @@ use iddqd::IdOrdMap; use iddqd::id_upcast; use illumos_utils::zpool::ZpoolName; use omicron_common::update::MupdateOverrideInfo; -use sled_storage::dataset::INSTALL_DATASET; +use sled_agent_config_reconciler::InternalDisksWithBootDisk; use slog::debug; use slog::error; use slog::info; @@ -59,11 +58,9 @@ impl AllMupdateOverrides { /// be authoritative). Consider extracting this out into something generic. pub(crate) fn read_all( log: &slog::Logger, - zpools: &ZoneImageZpools<'_>, - boot_zpool: &ZpoolName, + internal_disks: InternalDisksWithBootDisk, ) -> Self { - let dataset = - boot_zpool.dataset_mountpoint(zpools.root, INSTALL_DATASET); + let dataset = internal_disks.boot_disk_install_dataset(); let (boot_disk_path, boot_disk_res) = read_mupdate_override(log, &dataset); @@ -71,18 +68,12 @@ impl AllMupdateOverrides { // Now read the file from all other disks. We attempt to make sure they // match up and will log a warning if they don't, though (until we have // a better story on transient failures) it's not fatal. - let non_boot_zpools = zpools - .all_m2_zpools - .iter() - .filter(|&zpool_name| zpool_name != boot_zpool); - let non_boot_disks_overrides = non_boot_zpools - .map(|zpool_name| { - let dataset = - zpool_name.dataset_mountpoint(zpools.root, INSTALL_DATASET); - + let non_boot_datasets = internal_disks.non_boot_disk_install_datasets(); + let non_boot_disks_overrides = non_boot_datasets + .map(|(zpool_name, dataset)| { let (path, res) = read_mupdate_override(log, &dataset); MupdateOverrideNonBootInfo { - zpool_name: *zpool_name, + zpool_name, path, result: MupdateOverrideNonBootResult::new( res, @@ -93,7 +84,7 @@ impl AllMupdateOverrides { .collect(); let ret = Self { - boot_zpool: *boot_zpool, + boot_zpool: internal_disks.boot_disk_zpool(), boot_disk_path, boot_disk_override: boot_disk_res, non_boot_disk_overrides: non_boot_disks_overrides, @@ -486,9 +477,12 @@ mod tests { use dropshot::ConfigLogging; use dropshot::ConfigLoggingLevel; use dropshot::test_util::LogContext; + use omicron_common::disk::DiskIdentity; use omicron_uuid_kinds::MupdateOverrideUuid; use omicron_uuid_kinds::ZpoolUuid; use pretty_assertions::assert_eq; + use sled_agent_config_reconciler::InternalDisksReceiver; + use sled_storage::config::MountConfig; use std::collections::BTreeSet; use std::io; use std::sync::LazyLock; @@ -539,6 +533,33 @@ mod tests { static OVERRIDE_2_UUID: MupdateOverrideUuid = MupdateOverrideUuid::from_u128(0x20588f8f_c680_4101_afc7_820226d03ada); + fn make_internal_disks( + root: &Utf8Path, + boot_zpool: ZpoolName, + other_zpools: &[ZpoolName], + ) -> InternalDisksWithBootDisk { + let identity_from_zpool = |zpool: ZpoolName| DiskIdentity { + vendor: "mupdate-override-tests".to_string(), + model: "fake-disk".to_string(), + serial: zpool.id().to_string(), + }; + let mount_config = MountConfig { + root: root.to_path_buf(), + synthetic_disk_root: root.to_path_buf(), + }; + InternalDisksReceiver::fake_static( + Arc::new(mount_config), + std::iter::once((identity_from_zpool(boot_zpool), boot_zpool)) + .chain( + other_zpools + .iter() + .copied() + .map(|pool| (identity_from_zpool(pool), pool)), + ), + ) + .current_with_boot_disk() + } + /// Boot disk present / no other disks. (This produces a warning, but is /// otherwise okay.) #[test] @@ -554,12 +575,9 @@ mod tests { .write_str(&serde_json::to_string(&override_info).unwrap()) .unwrap(); - let zpools = ZoneImageZpools { - root: dir.path(), - all_m2_zpools: vec![BOOT_ZPOOL], - }; + let internal_disks = make_internal_disks(dir.path(), BOOT_ZPOOL, &[]); let overrides = - AllMupdateOverrides::read_all(&logctx.log, &zpools, &BOOT_ZPOOL); + AllMupdateOverrides::read_all(&logctx.log, internal_disks); assert_eq!( overrides.boot_disk_override.as_ref().unwrap().as_ref(), Some(&override_info) @@ -586,13 +604,11 @@ mod tests { .write_str(&serde_json::to_string(&override_info).unwrap()) .unwrap(); - let zpools = ZoneImageZpools { - root: dir.path(), - all_m2_zpools: vec![BOOT_ZPOOL, NON_BOOT_ZPOOL], - }; + let internal_disks = + make_internal_disks(dir.path(), BOOT_ZPOOL, &[NON_BOOT_ZPOOL]); let overrides = - AllMupdateOverrides::read_all(&logctx.log, &zpools, &BOOT_ZPOOL); + AllMupdateOverrides::read_all(&logctx.log, internal_disks); assert_eq!( overrides.boot_disk_override.as_ref().unwrap().as_ref(), Some(&override_info) @@ -624,13 +640,11 @@ mod tests { dir.child(&BOOT_PATHS.install_dataset).create_dir_all().unwrap(); dir.child(&NON_BOOT_PATHS.install_dataset).create_dir_all().unwrap(); - let zpools = ZoneImageZpools { - root: dir.path(), - all_m2_zpools: vec![BOOT_ZPOOL, NON_BOOT_ZPOOL], - }; + let internal_disks = + make_internal_disks(dir.path(), BOOT_ZPOOL, &[NON_BOOT_ZPOOL]); let overrides = - AllMupdateOverrides::read_all(&logctx.log, &zpools, &BOOT_ZPOOL); + AllMupdateOverrides::read_all(&logctx.log, internal_disks); assert_eq!( overrides.boot_disk_override.as_ref().unwrap().as_ref(), None, @@ -665,13 +679,11 @@ mod tests { // Create the directory, but not the override JSON within it. dir.child(&NON_BOOT_PATHS.install_dataset).create_dir_all().unwrap(); - let zpools = ZoneImageZpools { - root: dir.path(), - all_m2_zpools: vec![BOOT_ZPOOL, NON_BOOT_ZPOOL], - }; + let internal_disks = + make_internal_disks(dir.path(), BOOT_ZPOOL, &[NON_BOOT_ZPOOL]); let overrides = - AllMupdateOverrides::read_all(&logctx.log, &zpools, &BOOT_ZPOOL); + AllMupdateOverrides::read_all(&logctx.log, internal_disks); assert_eq!( overrides.boot_disk_override.as_ref().unwrap().as_ref(), Some(&override_info) @@ -709,12 +721,10 @@ mod tests { .write_str(&serde_json::to_string(&override_info).unwrap()) .unwrap(); - let zpools = ZoneImageZpools { - root: dir.path(), - all_m2_zpools: vec![BOOT_ZPOOL, NON_BOOT_ZPOOL], - }; + let internal_disks = + make_internal_disks(dir.path(), BOOT_ZPOOL, &[NON_BOOT_ZPOOL]); let overrides = - AllMupdateOverrides::read_all(&logctx.log, &zpools, &BOOT_ZPOOL); + AllMupdateOverrides::read_all(&logctx.log, internal_disks); assert_eq!( overrides.boot_disk_override.as_ref().unwrap().as_ref(), None, @@ -755,12 +765,10 @@ mod tests { .write_str(&serde_json::to_string(&override_info_2).unwrap()) .expect("failed to write override json"); - let zpools = ZoneImageZpools { - root: dir.path(), - all_m2_zpools: vec![BOOT_ZPOOL, NON_BOOT_ZPOOL], - }; + let internal_disks = + make_internal_disks(dir.path(), BOOT_ZPOOL, &[NON_BOOT_ZPOOL]); let overrides = - AllMupdateOverrides::read_all(&logctx.log, &zpools, &BOOT_ZPOOL); + AllMupdateOverrides::read_all(&logctx.log, internal_disks); assert_eq!( overrides.boot_disk_override.as_ref().unwrap().as_ref(), Some(&override_info), @@ -801,12 +809,10 @@ mod tests { .create_dir_all() .unwrap(); - let zpools = ZoneImageZpools { - root: dir.path(), - all_m2_zpools: vec![BOOT_ZPOOL, NON_BOOT_ZPOOL], - }; + let internal_disks = + make_internal_disks(dir.path(), BOOT_ZPOOL, &[NON_BOOT_ZPOOL]); let overrides = - AllMupdateOverrides::read_all(&logctx.log, &zpools, &BOOT_ZPOOL); + AllMupdateOverrides::read_all(&logctx.log, internal_disks); assert_eq!( overrides.boot_disk_override.as_ref().unwrap_err(), &dataset_missing_error( @@ -844,12 +850,10 @@ mod tests { dir.child(&BOOT_PATHS.install_dataset).touch().unwrap(); dir.child(&NON_BOOT_PATHS.install_dataset).touch().unwrap(); - let zpools = ZoneImageZpools { - root: dir.path(), - all_m2_zpools: vec![BOOT_ZPOOL, NON_BOOT_ZPOOL], - }; + let internal_disks = + make_internal_disks(dir.path(), BOOT_ZPOOL, &[NON_BOOT_ZPOOL]); let overrides = - AllMupdateOverrides::read_all(&logctx.log, &zpools, &BOOT_ZPOOL); + AllMupdateOverrides::read_all(&logctx.log, internal_disks); assert_eq!( overrides.boot_disk_override.as_ref().unwrap_err(), &dataset_not_dir_error( @@ -895,17 +899,13 @@ mod tests { // Read error (empty file). dir.child(&NON_BOOT_3_PATHS.override_json).touch().unwrap(); - let zpools = ZoneImageZpools { - root: dir.path(), - all_m2_zpools: vec![ - BOOT_ZPOOL, - NON_BOOT_ZPOOL, - NON_BOOT_2_ZPOOL, - NON_BOOT_3_ZPOOL, - ], - }; + let internal_disks = make_internal_disks( + dir.path(), + BOOT_ZPOOL, + &[NON_BOOT_ZPOOL, NON_BOOT_2_ZPOOL, NON_BOOT_3_ZPOOL], + ); let overrides = - AllMupdateOverrides::read_all(&logctx.log, &zpools, &BOOT_ZPOOL); + AllMupdateOverrides::read_all(&logctx.log, internal_disks); assert_eq!( overrides.boot_disk_override.as_ref().unwrap_err(), &deserialize_error(dir.path(), &BOOT_PATHS.override_json, "",), diff --git a/sled-agent/zone-images/src/source_resolver.rs b/sled-agent/zone-images/src/source_resolver.rs index 576e1676ad0..0ef868960ae 100644 --- a/sled-agent/zone-images/src/source_resolver.rs +++ b/sled-agent/zone-images/src/source_resolver.rs @@ -5,12 +5,10 @@ //! Zone image lookup. use crate::AllMupdateOverrides; -use camino::Utf8Path; use camino::Utf8PathBuf; -use illumos_utils::zpool::ZpoolName; use nexus_sled_agent_shared::inventory::OmicronZoneImageSource; -use sled_storage::dataset::INSTALL_DATASET; -use sled_storage::dataset::M2_ARTIFACT_DATASET; +use sled_agent_config_reconciler::InternalDisks; +use sled_agent_config_reconciler::InternalDisksWithBootDisk; use slog::o; use std::sync::Arc; use std::sync::Mutex; @@ -29,16 +27,6 @@ pub struct ZoneImageFileSource { pub search_paths: Vec, } -/// A description of zpools to examine for zone images. -pub struct ZoneImageZpools<'a> { - /// The root directory, typically `/`. - pub root: &'a Utf8Path, - - /// The full set of M.2 zpools that are currently known. Must be non-empty, - /// but it can include the boot zpool. - pub all_m2_zpools: Vec, -} - /// Resolves [`OmicronZoneImageSource`] instances into file names and search /// paths. /// @@ -53,33 +41,25 @@ impl ZoneImageSourceResolver { /// Creates a new `ZoneImageSourceResolver`. pub fn new( log: &slog::Logger, - zpools: &ZoneImageZpools<'_>, - boot_zpool: &ZpoolName, + internal_disks: InternalDisksWithBootDisk, ) -> Self { Self { inner: Arc::new(Mutex::new(ResolverInner::new( - log, zpools, boot_zpool, + log, + internal_disks, ))), } } - /// Overrides the image directory with another one. - /// - /// Intended for testing. - pub fn override_image_directory(&self, path: Utf8PathBuf) { - self.inner.lock().unwrap().override_image_directory(path); - } - /// Returns a [`ZoneImageFileSource`] consisting of the file name, plus a /// list of potential paths to search, for a zone image. pub fn file_source_for( &self, image_source: &OmicronZoneImageSource, - zpools: &ZoneImageZpools<'_>, - boot_zpool: Option<&ZpoolName>, + internal_disks: InternalDisks, ) -> ZoneImageFileSource { let inner = self.inner.lock().unwrap(); - inner.file_source_for(image_source, zpools, boot_zpool) + inner.file_source_for(image_source, internal_disks) } } @@ -99,39 +79,20 @@ struct ResolverInner { impl ResolverInner { fn new( log: &slog::Logger, - zpools: &ZoneImageZpools<'_>, - boot_zpool: &ZpoolName, + internal_disks: InternalDisksWithBootDisk, ) -> Self { let log = log.new(o!("component" => "ZoneImageSourceResolver")); let mupdate_overrides = - AllMupdateOverrides::read_all(&log, zpools, boot_zpool); + AllMupdateOverrides::read_all(&log, internal_disks); Self { log, image_directory_override: None, mupdate_overrides } } - fn override_image_directory( - &mut self, - image_directory_override: Utf8PathBuf, - ) { - if let Some(dir) = &self.image_directory_override { - // Allow idempotent sets to the same directory -- some tests do - // this. - if image_directory_override != *dir { - panic!( - "image_directory_override already set to `{dir}`, \ - attempting to set it to `{image_directory_override}`" - ); - } - } - self.image_directory_override = Some(image_directory_override); - } - fn file_source_for( &self, image_source: &OmicronZoneImageSource, - zpools: &ZoneImageZpools<'_>, - boot_zpool: Option<&ZpoolName>, + internal_disks: InternalDisks, ) -> ZoneImageFileSource { let file_name = match image_source { OmicronZoneImageSource::InstallDataset => { @@ -153,33 +114,17 @@ impl ResolverInner { // If the boot disk exists, look for the image in the "install" // dataset on the boot zpool. - if let Some(boot_zpool) = boot_zpool { - zone_image_paths.push( - boot_zpool - .dataset_mountpoint(zpools.root, INSTALL_DATASET), - ); + if let Some(path) = internal_disks.boot_disk_install_dataset() { + zone_image_paths.push(path); } zone_image_paths } OmicronZoneImageSource::Artifact { .. } => { - // Search both artifact datasets, but look on the boot disk first. - // This iterator starts with the zpool for the boot disk (if it - // exists), and then is followed by all other zpools. - let zpool_iter = boot_zpool.into_iter().chain( - zpools - .all_m2_zpools - .iter() - .filter(|zpool| Some(zpool) != boot_zpool.as_ref()), - ); - zpool_iter - .map(|zpool| { - zpool.dataset_mountpoint( - zpools.root, - M2_ARTIFACT_DATASET, - ) - }) - .collect() + // Search both artifact datasets. This iterator starts with the + // dataset for the boot disk (if it exists), and then is followed + // by all other disks. + internal_disks.all_artifact_datasets().collect() } };