diff --git a/clients/sled-agent-client/src/lib.rs b/clients/sled-agent-client/src/lib.rs index b27f3048439..5eb8c3ecafe 100644 --- a/clients/sled-agent-client/src/lib.rs +++ b/clients/sled-agent-client/src/lib.rs @@ -67,7 +67,6 @@ progenitor::generate_api!( OmicronPhysicalDiskConfig = omicron_common::disk::OmicronPhysicalDiskConfig, OmicronPhysicalDisksConfig = omicron_common::disk::OmicronPhysicalDisksConfig, OmicronSledConfig = nexus_sled_agent_shared::inventory::OmicronSledConfig, - OmicronSledConfigResult = nexus_sled_agent_shared::inventory::OmicronSledConfigResult, OmicronZoneConfig = nexus_sled_agent_shared::inventory::OmicronZoneConfig, OmicronZoneDataset = nexus_sled_agent_shared::inventory::OmicronZoneDataset, OmicronZoneImageSource = nexus_sled_agent_shared::inventory::OmicronZoneImageSource, diff --git a/dev-tools/omdb/src/bin/omdb/sled_agent.rs b/dev-tools/omdb/src/bin/omdb/sled_agent.rs index d02203e052b..fbd6260bd8f 100644 --- a/dev-tools/omdb/src/bin/omdb/sled_agent.rs +++ b/dev-tools/omdb/src/bin/omdb/sled_agent.rs @@ -34,14 +34,6 @@ enum SledAgentCommands { #[clap(subcommand)] Zones(ZoneCommands), - /// print information about zpools - #[clap(subcommand)] - Zpools(ZpoolCommands), - - /// print information about datasets - #[clap(subcommand)] - Datasets(DatasetCommands), - /// print information about the local bootstore node #[clap(subcommand)] Bootstore(BootstoreCommands), @@ -97,12 +89,6 @@ impl SledAgentArgs { SledAgentCommands::Zones(ZoneCommands::List) => { cmd_zones_list(&client).await } - SledAgentCommands::Zpools(ZpoolCommands::List) => { - cmd_zpools_list(&client).await - } - SledAgentCommands::Datasets(DatasetCommands::List) => { - cmd_datasets_list(&client).await - } SledAgentCommands::Bootstore(BootstoreCommands::Status) => { cmd_bootstore_status(&client).await } @@ -129,44 +115,6 @@ async fn cmd_zones_list( Ok(()) } -/// Runs `omdb sled-agent zpools list` -async fn cmd_zpools_list( - client: &sled_agent_client::Client, -) -> Result<(), anyhow::Error> { - let response = client.zpools_get().await.context("listing zpools")?; - let zpools = response.into_inner(); - - println!("zpools:"); - if zpools.is_empty() { - println!(" "); - } - for zpool in &zpools { - println!(" {:?}", zpool); - } - - Ok(()) -} - -/// Runs `omdb sled-agent datasets list` -async fn cmd_datasets_list( - client: &sled_agent_client::Client, -) -> Result<(), anyhow::Error> { - let response = client.datasets_get().await.context("listing datasets")?; - let response = response.into_inner(); - - println!("dataset configuration @ generation {}:", response.generation); - let datasets = response.datasets; - - if datasets.is_empty() { - println!(" "); - } - for dataset in &datasets { - println!(" {:?}", dataset); - } - - Ok(()) -} - /// Runs `omdb sled-agent bootstore status` async fn cmd_bootstore_status( client: &sled_agent_client::Client, diff --git a/nexus-sled-agent-shared/src/inventory.rs b/nexus-sled-agent-shared/src/inventory.rs index 70672f68df8..e6ad3b7449f 100644 --- a/nexus-sled-agent-shared/src/inventory.rs +++ b/nexus-sled-agent-shared/src/inventory.rs @@ -14,10 +14,7 @@ use omicron_common::{ external::{ByteCount, Generation}, internal::shared::{NetworkInterface, SourceNatConfig}, }, - disk::{ - DatasetConfig, DatasetManagementStatus, DiskManagementStatus, - DiskVariant, OmicronPhysicalDiskConfig, - }, + disk::{DatasetConfig, DiskVariant, OmicronPhysicalDiskConfig}, zpool_name::ZpoolName, }; use omicron_uuid_kinds::{DatasetUuid, OmicronZoneUuid}; @@ -130,8 +127,6 @@ pub enum SledRole { } /// Describes the set of Reconfigurator-managed configuration elements of a sled -// TODO this struct should have a generation number; at the moment, each of -// the fields has a separete one internally. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq)] pub struct OmicronSledConfig { pub generation: Generation, @@ -140,14 +135,6 @@ pub struct OmicronSledConfig { pub zones: IdMap, } -/// Result of the currently-synchronous `omicron_config_put` endpoint. -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -#[must_use = "this `DatasetManagementResult` may contain errors, which should be handled"] -pub struct OmicronSledConfigResult { - pub disks: Vec, - pub datasets: Vec, -} - /// Describes the set of Omicron-managed zones running on a sled #[derive( Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, diff --git a/nexus/reconfigurator/execution/src/omicron_sled_config.rs b/nexus/reconfigurator/execution/src/omicron_sled_config.rs index 64e8fdc3088..cfd2bada1a0 100644 --- a/nexus/reconfigurator/execution/src/omicron_sled_config.rs +++ b/nexus/reconfigurator/execution/src/omicron_sled_config.rs @@ -10,15 +10,13 @@ use anyhow::anyhow; use futures::StreamExt; use futures::stream; use nexus_db_queries::context::OpContext; -use nexus_sled_agent_shared::inventory::OmicronSledConfigResult; use nexus_types::deployment::BlueprintSledConfig; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::SledUuid; -use slog::Logger; use slog::info; use slog::warn; +use slog_error_chain::InlineErrorChain; use std::collections::BTreeMap; -use update_engine::merge_anyhow_list; /// Idempotently ensure that the specified Omicron sled configs are deployed to /// the corresponding sleds @@ -63,13 +61,14 @@ pub(crate) async fn deploy_sled_configs( format!("Failed to put {config:#?} to sled {sled_id}") }); match result { + Ok(_) => None, Err(error) => { - warn!(log, "{error:#}"); + warn!( + log, "failed to put sled config"; + InlineErrorChain::new(error.as_ref()), + ); Some(error) } - Ok(result) => { - parse_config_result(result.into_inner(), &log).err() - } } }) .collect() @@ -78,69 +77,6 @@ pub(crate) async fn deploy_sled_configs( if errors.is_empty() { Ok(()) } else { Err(errors) } } -fn parse_config_result( - result: OmicronSledConfigResult, - log: &Logger, -) -> anyhow::Result<()> { - let (disk_errs, disk_successes): (Vec<_>, Vec<_>) = - result.disks.into_iter().partition(|status| status.err.is_some()); - - if !disk_errs.is_empty() { - warn!( - log, - "Failed to deploy disks for sled agent"; - "successfully configured disks" => disk_successes.len(), - "failed disk configurations" => disk_errs.len(), - ); - for err in &disk_errs { - warn!(log, "{err:?}"); - } - return Err(merge_anyhow_list(disk_errs.into_iter().map(|status| { - anyhow!( - "failed to deploy disk {:?}: {:#}", - status.identity, - // `disk_errs` was partitioned by `status.err.is_some()`, so - // this is safe to unwrap. - status.err.unwrap(), - ) - }))); - } - - let (dataset_errs, dataset_successes): (Vec<_>, Vec<_>) = - result.datasets.into_iter().partition(|status| status.err.is_some()); - - if !dataset_errs.is_empty() { - warn!( - log, - "Failed to deploy datasets for sled agent"; - "successfully configured datasets" => dataset_successes.len(), - "failed dataset configurations" => dataset_errs.len(), - ); - for err in &dataset_errs { - warn!(log, "{err:?}"); - } - return Err(merge_anyhow_list(dataset_errs.into_iter().map( - |status| { - anyhow!( - "failed to deploy dataset {}: {:#}", - status.dataset_name.full_name(), - // `dataset_errs` was partitioned by `status.err.is_some()`, - // so this is safe to unwrap. - status.err.unwrap(), - ) - }, - ))); - } - - info!( - log, - "Successfully deployed config to sled agent"; - "successfully configured disks" => disk_successes.len(), - "successfully configured datasets" => dataset_successes.len(), - ); - Ok(()) -} - #[cfg(test)] mod tests { use super::*; @@ -326,6 +262,9 @@ mod tests { // Observe the latest configuration stored on the simulated sled agent, // and verify that this output matches the input. + // + // TODO-cleanup Simulated sled-agent should report a unified + // `OmicronSledConfig`. let observed_disks = sim_sled_agent.omicron_physical_disks_list().unwrap(); let observed_datasets = sim_sled_agent.datasets_config_list().unwrap(); diff --git a/nexus/reconfigurator/planning/src/example.rs b/nexus/reconfigurator/planning/src/example.rs index 538884fb45d..c38682f59b9 100644 --- a/nexus/reconfigurator/planning/src/example.rs +++ b/nexus/reconfigurator/planning/src/example.rs @@ -14,7 +14,6 @@ use crate::planner::rng::PlannerRng; use crate::system::SledBuilder; use crate::system::SystemDescription; use nexus_inventory::CollectionBuilderRng; -use nexus_sled_agent_shared::inventory::OmicronZonesConfig; use nexus_types::deployment::Blueprint; use nexus_types::deployment::OmicronZoneNic; use nexus_types::deployment::PlanningInput; @@ -486,15 +485,7 @@ impl ExampleSystemBuilder { for (sled_id, sled_cfg) in &blueprint.sleds { let sled_cfg = sled_cfg.clone().into_in_service_sled_config(); - system - .sled_set_omicron_zones( - *sled_id, - OmicronZonesConfig { - generation: sled_cfg.generation, - zones: sled_cfg.zones.into_iter().collect(), - }, - ) - .unwrap(); + system.sled_set_omicron_config(*sled_id, sled_cfg).unwrap(); } // We just ensured that a handful of datasets should exist in diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index d863438bab1..8f0bc20d5b3 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -996,7 +996,6 @@ pub(crate) mod test { use clickhouse_admin_types::ClickhouseKeeperClusterMembership; use clickhouse_admin_types::KeeperId; use expectorate::assert_contents; - use nexus_sled_agent_shared::inventory::OmicronZonesConfig; use nexus_types::deployment::BlueprintDatasetDisposition; use nexus_types::deployment::BlueprintDiffSummary; use nexus_types::deployment::BlueprintPhysicalDiskDisposition; @@ -1181,18 +1180,15 @@ pub(crate) mod test { // example.collection -- this should be addressed via API improvements. example .system - .sled_set_omicron_zones(new_sled_id, { - let sled_cfg = blueprint4 + .sled_set_omicron_config( + new_sled_id, + blueprint4 .sleds .get(&new_sled_id) .expect("blueprint should contain zones for new sled") .clone() - .into_in_service_sled_config(); - OmicronZonesConfig { - generation: sled_cfg.generation, - zones: sled_cfg.zones.into_iter().collect(), - } - }) + .into_in_service_sled_config(), + ) .unwrap(); let collection = example.system.to_collection_builder().unwrap().build(); diff --git a/nexus/reconfigurator/planning/src/system.rs b/nexus/reconfigurator/planning/src/system.rs index ad389795f50..1b12b63470e 100644 --- a/nexus/reconfigurator/planning/src/system.rs +++ b/nexus/reconfigurator/planning/src/system.rs @@ -17,6 +17,7 @@ use nexus_sled_agent_shared::inventory::Inventory; use nexus_sled_agent_shared::inventory::InventoryDataset; use nexus_sled_agent_shared::inventory::InventoryDisk; use nexus_sled_agent_shared::inventory::InventoryZpool; +use nexus_sled_agent_shared::inventory::OmicronSledConfig; use nexus_sled_agent_shared::inventory::OmicronZonesConfig; use nexus_sled_agent_shared::inventory::SledRole; use nexus_types::deployment::ClickhousePolicy; @@ -353,7 +354,7 @@ impl SystemDescription { !self.sleds.is_empty() } - /// Set Omicron zones for a sled. + /// Set Omicron config for a sled. /// /// The zones will be reported in the collection generated by /// [`Self::to_collection_builder`]. @@ -362,17 +363,26 @@ impl SystemDescription { /// /// # Notes /// - /// It is okay to call `sled_set_omicron_zones` in ways that wouldn't + /// It is okay to call `sled_set_omicron_config` in ways that wouldn't /// happen in production, such as to test illegal states. - pub fn sled_set_omicron_zones( + pub fn sled_set_omicron_config( &mut self, sled_id: SledUuid, - omicron_zones: OmicronZonesConfig, + sled_config: OmicronSledConfig, ) -> anyhow::Result<&mut Self> { let sled = self.sleds.get_mut(&sled_id).with_context(|| { format!("attempted to access sled {} not found in system", sled_id) })?; - Arc::make_mut(sled).inventory_sled_agent.omicron_zones = omicron_zones; + let sled = Arc::make_mut(sled); + // TODO this inventory needs to be more robust for the reconciler; + // this is nontrivial so coming in a subsequent PR. For now just + // backfill the existing inventory structure. + sled.inventory_sled_agent.omicron_zones = OmicronZonesConfig { + generation: sled_config.generation, + zones: sled_config.zones.into_iter().collect(), + }; + sled.inventory_sled_agent.omicron_physical_disks_generation = + sled_config.generation; Ok(self) } diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index c3f1cbcee51..f95d3e2f5b6 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -334,30 +334,6 @@ } } }, - "/datasets": { - "get": { - "summary": "Lists the datasets that this sled is configured to use", - "operationId": "datasets_get", - "responses": { - "200": { - "description": "successful operation", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/DatasetsConfig" - } - } - } - }, - "4XX": { - "$ref": "#/components/responses/Error" - }, - "5XX": { - "$ref": "#/components/responses/Error" - } - } - } - }, "/disks/{disk_id}": { "put": { "operationId": "disk_put", @@ -516,38 +492,8 @@ "required": true }, "responses": { - "200": { - "description": "successful operation", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/OmicronSledConfigResult" - } - } - } - }, - "4XX": { - "$ref": "#/components/responses/Error" - }, - "5XX": { - "$ref": "#/components/responses/Error" - } - } - } - }, - "/omicron-physical-disks": { - "get": { - "operationId": "omicron_physical_disks_get", - "responses": { - "200": { - "description": "successful operation", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/OmicronPhysicalDisksConfig" - } - } - } + "204": { + "description": "resource updated" }, "4XX": { "$ref": "#/components/responses/Error" @@ -2154,33 +2100,6 @@ } } } - }, - "/zpools": { - "get": { - "operationId": "zpools_get", - "responses": { - "200": { - "description": "successful operation", - "content": { - "application/json": { - "schema": { - "title": "Array_of_Zpool", - "type": "array", - "items": { - "$ref": "#/components/schemas/Zpool" - } - } - } - } - }, - "4XX": { - "$ref": "#/components/responses/Error" - }, - "5XX": { - "$ref": "#/components/responses/Error" - } - } - } } }, "components": { @@ -3647,22 +3566,6 @@ "description": "The kind of dataset. See the `DatasetKind` enum in omicron-common for possible values.", "type": "string" }, - "DatasetManagementStatus": { - "description": "Identifies how a single dataset management operation may have succeeded or failed.", - "type": "object", - "properties": { - "dataset_name": { - "$ref": "#/components/schemas/DatasetName" - }, - "err": { - "nullable": true, - "type": "string" - } - }, - "required": [ - "dataset_name" - ] - }, "DatasetName": { "type": "object", "properties": { @@ -3678,29 +3581,6 @@ "pool_name" ] }, - "DatasetsConfig": { - "type": "object", - "properties": { - "datasets": { - "type": "object", - "additionalProperties": { - "$ref": "#/components/schemas/DatasetConfig" - } - }, - "generation": { - "description": "generation number of this configuration\n\nThis generation number is owned by the control plane (i.e., RSS or Nexus, depending on whether RSS-to-Nexus handoff has happened). It should not be bumped within Sled Agent.\n\nSled Agent rejects attempts to set the configuration to a generation older than the one it's currently running.\n\nNote that \"Generation::new()\", AKA, the first generation number, is reserved for \"no datasets\". This is the default configuration for a sled before any requests have been made.", - "allOf": [ - { - "$ref": "#/components/schemas/Generation" - } - ] - } - }, - "required": [ - "datasets", - "generation" - ] - }, "DhcpConfig": { "description": "DHCP configuration for a port\n\nNot present here: Hostname (DHCPv4 option 12; used in DHCPv6 option 39); we use `InstanceRuntimeState::hostname` for this value.", "type": "object", @@ -3777,110 +3657,6 @@ "vendor" ] }, - "DiskManagementError": { - "oneOf": [ - { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "not_found" - ] - } - }, - "required": [ - "type" - ] - }, - { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "zpool_uuid_mismatch" - ] - }, - "value": { - "type": "object", - "properties": { - "expected": { - "$ref": "#/components/schemas/TypedUuidForZpoolKind" - }, - "observed": { - "$ref": "#/components/schemas/TypedUuidForZpoolKind" - } - }, - "required": [ - "expected", - "observed" - ] - } - }, - "required": [ - "type", - "value" - ] - }, - { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "key_manager" - ] - }, - "value": { - "type": "string" - } - }, - "required": [ - "type", - "value" - ] - }, - { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "other" - ] - }, - "value": { - "type": "string" - } - }, - "required": [ - "type", - "value" - ] - } - ] - }, - "DiskManagementStatus": { - "description": "Identifies how a single disk management operation may have succeeded or failed.", - "type": "object", - "properties": { - "err": { - "nullable": true, - "allOf": [ - { - "$ref": "#/components/schemas/DiskManagementError" - } - ] - }, - "identity": { - "$ref": "#/components/schemas/DiskIdentity" - } - }, - "required": [ - "identity" - ] - }, "DiskRuntimeState": { "description": "Runtime state of the Disk, which includes its attach state and some minimal metadata", "type": "object", @@ -4179,13 +3955,6 @@ } ] }, - "DiskType": { - "type": "string", - "enum": [ - "U2", - "M2" - ] - }, "DiskVariant": { "type": "string", "enum": [ @@ -5431,29 +5200,6 @@ "pool_id" ] }, - "OmicronPhysicalDisksConfig": { - "type": "object", - "properties": { - "disks": { - "type": "array", - "items": { - "$ref": "#/components/schemas/OmicronPhysicalDiskConfig" - } - }, - "generation": { - "description": "generation number of this configuration\n\nThis generation number is owned by the control plane (i.e., RSS or Nexus, depending on whether RSS-to-Nexus handoff has happened). It should not be bumped within Sled Agent.\n\nSled Agent rejects attempts to set the configuration to a generation older than the one it's currently running.", - "allOf": [ - { - "$ref": "#/components/schemas/Generation" - } - ] - } - }, - "required": [ - "disks", - "generation" - ] - }, "OmicronSledConfig": { "description": "Describes the set of Reconfigurator-managed configuration elements of a sled", "type": "object", @@ -5478,28 +5224,6 @@ "zones" ] }, - "OmicronSledConfigResult": { - "description": "Result of the currently-synchronous `omicron_config_put` endpoint.", - "type": "object", - "properties": { - "datasets": { - "type": "array", - "items": { - "$ref": "#/components/schemas/DatasetManagementStatus" - } - }, - "disks": { - "type": "array", - "items": { - "$ref": "#/components/schemas/DiskManagementStatus" - } - } - }, - "required": [ - "datasets", - "disks" - ] - }, "OmicronZoneConfig": { "description": "Describes one Omicron-managed zone running on a sled", "type": "object", @@ -7577,21 +7301,6 @@ "version" ] }, - "Zpool": { - "type": "object", - "properties": { - "disk_type": { - "$ref": "#/components/schemas/DiskType" - }, - "id": { - "$ref": "#/components/schemas/TypedUuidForZpoolKind" - } - }, - "required": [ - "disk_type", - "id" - ] - }, "ZpoolName": { "title": "The name of a Zpool", "description": "Zpool names are of the format ox{i,p}_. They are either Internal or External, and should be unique", diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index 0f1af733b25..c406e4f5df3 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -138,6 +138,7 @@ tokio-stream.workspace = true tokio-util.workspace = true illumos-utils = { workspace = true, features = ["testing"] } +sled-agent-config-reconciler = { workspace = true, features = ["testing"] } sled-storage = { workspace = true, features = ["testing"] } # diff --git a/sled-agent/api/src/lib.rs b/sled-agent/api/src/lib.rs index 6033650fa4e..a71f92276cc 100644 --- a/sled-agent/api/src/lib.rs +++ b/sled-agent/api/src/lib.rs @@ -13,7 +13,7 @@ use dropshot::{ TypedBody, }; use nexus_sled_agent_shared::inventory::{ - Inventory, OmicronSledConfig, OmicronSledConfigResult, SledRole, + Inventory, OmicronSledConfig, SledRole, }; use omicron_common::{ api::external::Generation, @@ -24,7 +24,7 @@ use omicron_common::{ SledIdentifiers, SwitchPorts, VirtualNetworkInterfaceHost, }, }, - disk::{DatasetsConfig, DiskVariant, OmicronPhysicalDisksConfig}, + disk::DiskVariant, ledger::Ledgerable, }; use omicron_uuid_kinds::{ @@ -260,32 +260,7 @@ pub trait SledAgentApi { async fn omicron_config_put( rqctx: RequestContext, body: TypedBody, - ) -> Result, HttpError>; - - /// Lists the datasets that this sled is configured to use - #[endpoint { - method = GET, - path = "/datasets", - }] - async fn datasets_get( - rqctx: RequestContext, - ) -> Result, HttpError>; - - #[endpoint { - method = GET, - path = "/omicron-physical-disks", - }] - async fn omicron_physical_disks_get( - rqctx: RequestContext, - ) -> Result, HttpError>; - - #[endpoint { - method = GET, - path = "/zpools", - }] - async fn zpools_get( - rqctx: RequestContext, - ) -> Result>, HttpError>; + ) -> Result; #[endpoint { method = GET, diff --git a/sled-agent/src/artifact_store.rs b/sled-agent/src/artifact_store.rs index c2b6965a32a..e73461020d5 100644 --- a/sled-agent/src/artifact_store.rs +++ b/sled-agent/src/artifact_store.rs @@ -17,11 +17,11 @@ //! Operations that list or modify artifacts or the configuration are called by //! Nexus and handled by the Sled Agent API. -use std::collections::BTreeMap; use std::future::Future; use std::io::{ErrorKind, Write}; use std::net::SocketAddrV6; use std::str::FromStr; +use std::sync::Arc; use std::time::Duration; use atomicwrites::{AtomicFile, OverwriteBehavior}; @@ -40,8 +40,8 @@ use sha2::{Digest, Sha256}; use sled_agent_api::{ ArtifactConfig, ArtifactListResponse, ArtifactPutResponse, }; -use sled_storage::dataset::M2_ARTIFACT_DATASET; -use sled_storage::manager::StorageHandle; +use sled_agent_config_reconciler::ConfigReconcilerHandle; +use sled_agent_config_reconciler::InternalDisksReceiver; use slog::{Logger, error, info}; use slog_error_chain::{InlineErrorChain, SlogInlineError}; use tokio::fs::File; @@ -49,8 +49,6 @@ use tokio::sync::{OwnedSemaphorePermit, mpsc, oneshot, watch}; use tokio::task::JoinSet; use tufaceous_artifact::ArtifactHash; -use crate::services::ServiceManager; - // These paths are defined under the artifact storage dataset. They // cannot conflict with any artifact paths because all artifact paths are // hexadecimal-encoded SHA-256 checksums. @@ -93,7 +91,7 @@ impl ArtifactStore { pub(crate) async fn new( log: &Logger, storage: T, - services: Option, + config_reconciler: Option>, ) -> ArtifactStore { let log = log.new(slog::o!("component" => "ArtifactStore")); @@ -133,7 +131,7 @@ impl ArtifactStore { tokio::task::spawn(ledger_manager( log.clone(), ledger_paths, - services, + config_reconciler, ledger_rx, config_tx, )); @@ -164,13 +162,15 @@ impl ArtifactStore { } } -impl ArtifactStore { +impl ArtifactStore { pub(crate) async fn start( - self, + self: Arc, sled_address: SocketAddrV6, dropshot_config: &ConfigDropshot, - ) -> Result>, StartError> - { + ) -> Result< + dropshot::HttpServer>>, + StartError, + > { let mut depot_address = sled_address; depot_address.set_port(REPO_DEPOT_PORT); @@ -259,7 +259,7 @@ impl ArtifactStore { /// Open an artifact file by hash from a storage handle. /// /// This is the same as [ArtifactStore::get], but can be called with only - /// a [StorageHandle]. + /// a storage implementation. pub(crate) async fn get_from_storage( storage: &T, log: &Logger, @@ -452,11 +452,11 @@ type LedgerManagerRequest = async fn ledger_manager( log: Logger, ledger_paths: Vec, - services: Option, + config_reconciler: Option>, mut rx: mpsc::Receiver, config_channel: watch::Sender>, ) { - let services = services.as_ref(); + let config_reconciler = config_reconciler.as_ref(); let handle_request = async |new_config: ArtifactConfig| { if ledger_paths.is_empty() { return Err(Error::NoUpdateDataset); @@ -466,21 +466,11 @@ async fn ledger_manager( { if new_config.generation > ledger.data().generation { // New config generation. First check that the configuration - // contains all artifacts that are presently in use. - let mut missing = BTreeMap::new(); - // Check artifacts from the current zone configuration. - if let Some(services) = services { - for zone in services.omicron_zones_list().await.zones { - if let Some(hash) = zone.image_source.artifact_hash() { - if !new_config.artifacts.contains(&hash) { - missing - .insert(hash, "current zone configuration"); - } - } - } - } - if !missing.is_empty() { - return Err(Error::InUseArtifactsMissing(missing)); + // is valid against the current ledgered sled config. + if let Some(config_reconciler) = config_reconciler { + config_reconciler + .validate_artifact_config(new_config.clone()) + .await??; } // Everything looks okay; update the ledger. @@ -614,14 +604,11 @@ pub trait DatasetsManager: Clone + Send + Sync + 'static { } } -impl DatasetsManager for StorageHandle { +impl DatasetsManager for InternalDisksReceiver { async fn artifact_storage_paths( &self, ) -> impl Iterator + '_ { - self.get_latest_disks() - .await - .all_m2_mountpoints(M2_ARTIFACT_DATASET) - .into_iter() + self.current().all_artifact_datasets().collect::>().into_iter() } } @@ -766,11 +753,11 @@ impl ArtifactWriter { } /// Implementation of the Repo Depot API backed by an -/// `ArtifactStore`. +/// `ArtifactStore`. enum RepoDepotImpl {} impl RepoDepotApi for RepoDepotImpl { - type Context = ArtifactStore; + type Context = Arc>; async fn artifact_get_by_sha256( rqctx: RequestContext, @@ -831,8 +818,15 @@ pub enum Error { #[error("Digest mismatch: expected {expected}, actual {actual}")] HashMismatch { expected: ArtifactHash, actual: ArtifactHash }, - #[error("Artifacts in use are not present in new config: {0:?}")] - InUseArtifactsMissing(BTreeMap), + #[error("New config is invalid per current sled config")] + InvalidPerSledConfig( + #[from] sled_agent_config_reconciler::LedgerArtifactConfigError, + ), + + #[error("Cannot validate incoming config against sled config")] + CannotValidateAgainstSledConfig( + #[from] sled_agent_config_reconciler::LedgerTaskError, + ), #[error("Blocking task failed")] Join(#[source] tokio::task::JoinError), @@ -863,7 +857,7 @@ impl From for HttpError { match err { // 4xx errors Error::HashMismatch { .. } - | Error::InUseArtifactsMissing { .. } + | Error::InvalidPerSledConfig { .. } | Error::NoConfig | Error::NotInConfig { .. } => { HttpError::for_bad_request(None, err.to_string()) @@ -894,9 +888,12 @@ impl From for HttpError { | Error::File { .. } | Error::Join(_) | Error::LedgerCommit(_) - | Error::LedgerChannel => HttpError::for_internal_error( - InlineErrorChain::new(&err).to_string(), - ), + | Error::LedgerChannel + | Error::CannotValidateAgainstSledConfig(_) => { + HttpError::for_internal_error( + InlineErrorChain::new(&err).to_string(), + ) + } } } } diff --git a/sled-agent/src/bootstrap/bootstore_setup.rs b/sled-agent/src/bootstrap/bootstore_setup.rs index 1b148290104..c9e3cefbd8e 100644 --- a/sled-agent/src/bootstrap/bootstore_setup.rs +++ b/sled-agent/src/bootstrap/bootstore_setup.rs @@ -15,7 +15,6 @@ use omicron_ddm_admin_client::Client as DdmAdminClient; use sled_hardware_types::Baseboard; use sled_hardware_types::underlay::BootstrapInterface; use sled_storage::dataset::CLUSTER_DATASET; -use sled_storage::resources::AllDisks; use slog::Logger; use std::collections::BTreeSet; use std::net::Ipv6Addr; @@ -26,7 +25,7 @@ const BOOTSTORE_FSM_STATE_FILE: &str = "bootstore-fsm-state.json"; const BOOTSTORE_NETWORK_CONFIG_FILE: &str = "bootstore-network-config.json"; pub fn new_bootstore_config( - all_disks: &AllDisks, + cluster_dataset_paths: &[Utf8PathBuf], baseboard: Baseboard, global_zone_bootstrap_ip: Ipv6Addr, ) -> Result { @@ -37,19 +36,20 @@ pub fn new_bootstore_config( learn_timeout: Duration::from_secs(5), rack_init_timeout: Duration::from_secs(300), rack_secret_request_timeout: Duration::from_secs(5), - fsm_state_ledger_paths: bootstore_fsm_state_paths(&all_disks)?, + fsm_state_ledger_paths: bootstore_fsm_state_paths( + cluster_dataset_paths, + )?, network_config_ledger_paths: bootstore_network_config_paths( - &all_disks, + cluster_dataset_paths, )?, }) } fn bootstore_fsm_state_paths( - all_disks: &AllDisks, + cluster_dataset_paths: &[Utf8PathBuf], ) -> Result, StartError> { - let paths: Vec<_> = all_disks - .all_m2_mountpoints(CLUSTER_DATASET) - .into_iter() + let paths: Vec<_> = cluster_dataset_paths + .iter() .map(|p| p.join(BOOTSTORE_FSM_STATE_FILE)) .collect(); @@ -60,11 +60,10 @@ fn bootstore_fsm_state_paths( } fn bootstore_network_config_paths( - all_disks: &AllDisks, + cluster_dataset_paths: &[Utf8PathBuf], ) -> Result, StartError> { - let paths: Vec<_> = all_disks - .all_m2_mountpoints(CLUSTER_DATASET) - .into_iter() + let paths: Vec<_> = cluster_dataset_paths + .iter() .map(|p| p.join(BOOTSTORE_NETWORK_CONFIG_FILE)) .collect(); diff --git a/sled-agent/src/bootstrap/http_entrypoints.rs b/sled-agent/src/bootstrap/http_entrypoints.rs index 44bd09387ff..12a986cf2fb 100644 --- a/sled-agent/src/bootstrap/http_entrypoints.rs +++ b/sled-agent/src/bootstrap/http_entrypoints.rs @@ -23,10 +23,10 @@ use dropshot::{ use omicron_common::api::external::Error; use omicron_uuid_kinds::RackInitUuid; use omicron_uuid_kinds::RackResetUuid; +use sled_agent_config_reconciler::InternalDisksReceiver; use sled_agent_types::rack_init::RackInitializeRequest; use sled_agent_types::rack_ops::RackOperationStatus; use sled_hardware_types::Baseboard; -use sled_storage::manager::StorageHandle; use slog::Logger; use slog_error_chain::InlineErrorChain; use sprockets_tls::keys::SprocketsConfig; @@ -37,7 +37,7 @@ use tokio::sync::{mpsc, oneshot}; pub(crate) struct BootstrapServerContext { pub(crate) base_log: Logger, pub(crate) global_zone_bootstrap_ip: Ipv6Addr, - pub(crate) storage_manager: StorageHandle, + pub(crate) internal_disks_rx: InternalDisksReceiver, pub(crate) bootstore_node_handle: bootstore::NodeHandle, pub(crate) baseboard: Baseboard, pub(crate) rss_access: RssAccess, @@ -56,7 +56,7 @@ impl BootstrapServerContext { &self.base_log, self.sprockets.clone(), self.global_zone_bootstrap_ip, - &self.storage_manager, + &self.internal_disks_rx, &self.bootstore_node_handle, request, ) diff --git a/sled-agent/src/bootstrap/pre_server.rs b/sled-agent/src/bootstrap/pre_server.rs index 35a8c1f3b86..561a7d08fc6 100644 --- a/sled-agent/src/bootstrap/pre_server.rs +++ b/sled-agent/src/bootstrap/pre_server.rs @@ -20,7 +20,6 @@ use crate::long_running_tasks::{ LongRunningTaskHandles, spawn_all_longrunning_tasks, }; use crate::services::ServiceManager; -use crate::services::TimeSyncConfig; use crate::sled_agent::SledAgent; use camino::Utf8PathBuf; use cancel_safe_futures::TryStreamExt; @@ -36,6 +35,7 @@ use illumos_utils::zone::Api; use illumos_utils::zone::Zones; use omicron_common::FileKv; use omicron_common::address::Ipv6Subnet; +use sled_agent_config_reconciler::ConfigReconcilerSpawnToken; use sled_hardware::DendriteAsic; use sled_hardware::SledMode; use sled_hardware::underlay; @@ -54,6 +54,7 @@ pub(super) struct BootstrapAgentStartup { pub(super) service_manager: ServiceManager, pub(super) long_running_task_handles: LongRunningTaskHandles, pub(super) sled_agent_started_tx: oneshot::Sender, + pub(super) config_reconciler_spawn_token: ConfigReconcilerSpawnToken, } impl BootstrapAgentStartup { @@ -119,6 +120,7 @@ impl BootstrapAgentStartup { // the process and are used by both the bootstrap agent and sled agent let ( long_running_task_handles, + config_reconciler_spawn_token, sled_agent_started_tx, service_manager_ready_tx, ) = spawn_all_longrunning_tasks( @@ -132,22 +134,17 @@ impl BootstrapAgentStartup { let global_zone_bootstrap_ip = startup_networking.global_zone_bootstrap_ip; - let time_sync = if let Some(true) = config.skip_timesync { - TimeSyncConfig::Skip - } else { - TimeSyncConfig::Normal - }; - let service_manager = ServiceManager::new( &base_log, ddm_reconciler, startup_networking, sled_mode, - time_sync, config.sidecar_revision.clone(), config.switch_zone_maghemite_links.clone(), - long_running_task_handles.storage_manager.clone(), - long_running_task_handles.zone_bundler.clone(), + long_running_task_handles + .config_reconciler + .internal_disks_rx() + .clone(), ); // Inform the hardware monitor that the service manager is ready @@ -165,6 +162,7 @@ impl BootstrapAgentStartup { service_manager, long_running_task_handles, sled_agent_started_tx, + config_reconciler_spawn_token, }) } } diff --git a/sled-agent/src/bootstrap/rack_ops.rs b/sled-agent/src/bootstrap/rack_ops.rs index 2be59fd5880..db2e79d3ec9 100644 --- a/sled-agent/src/bootstrap/rack_ops.rs +++ b/sled-agent/src/bootstrap/rack_ops.rs @@ -9,9 +9,9 @@ use crate::rack_setup::service::SetupServiceError; use bootstore::schemes::v0 as bootstore; use omicron_uuid_kinds::RackInitUuid; use omicron_uuid_kinds::RackResetUuid; +use sled_agent_config_reconciler::InternalDisksReceiver; use sled_agent_types::rack_init::RackInitializeRequest; use sled_agent_types::rack_ops::{RackOperationStatus, RssStep}; -use sled_storage::manager::StorageHandle; use slog::Logger; use sprockets_tls::keys::SprocketsConfig; use std::mem; @@ -146,7 +146,7 @@ impl RssAccess { parent_log: &Logger, sprockets: SprocketsConfig, global_zone_bootstrap_ip: Ipv6Addr, - storage_manager: &StorageHandle, + internal_disks_rx: &InternalDisksReceiver, bootstore_node_handle: &bootstore::NodeHandle, request: RackInitializeRequest, ) -> Result { @@ -182,7 +182,7 @@ impl RssAccess { *status = RssStatus::Initializing { id, completion, step_rx }; mem::drop(status); let parent_log = parent_log.clone(); - let storage_manager = storage_manager.clone(); + let internal_disks_rx = internal_disks_rx.clone(); let bootstore_node_handle = bootstore_node_handle.clone(); let status = Arc::clone(&self.status); tokio::spawn(async move { @@ -190,7 +190,7 @@ impl RssAccess { &parent_log, sprockets, global_zone_bootstrap_ip, - storage_manager, + internal_disks_rx, bootstore_node_handle, request, step_tx, @@ -328,7 +328,7 @@ async fn rack_initialize( parent_log: &Logger, sprockets: SprocketsConfig, global_zone_bootstrap_ip: Ipv6Addr, - storage_manager: StorageHandle, + internal_disks_rx: InternalDisksReceiver, bootstore_node_handle: bootstore::NodeHandle, request: RackInitializeRequest, step_tx: watch::Sender, @@ -338,7 +338,7 @@ async fn rack_initialize( sprockets, request, global_zone_bootstrap_ip, - storage_manager, + internal_disks_rx, bootstore_node_handle, step_tx, ) diff --git a/sled-agent/src/bootstrap/rss_handle.rs b/sled-agent/src/bootstrap/rss_handle.rs index efddfa2aa25..f3872b90feb 100644 --- a/sled-agent/src/bootstrap/rss_handle.rs +++ b/sled-agent/src/bootstrap/rss_handle.rs @@ -14,10 +14,10 @@ use futures::stream::FuturesUnordered; use omicron_common::backoff::BackoffError; use omicron_common::backoff::retry_notify; use omicron_common::backoff::retry_policy_local; +use sled_agent_config_reconciler::InternalDisksReceiver; use sled_agent_types::rack_init::RackInitializeRequest; use sled_agent_types::rack_ops::RssStep; use sled_agent_types::sled::StartSledAgentRequest; -use sled_storage::manager::StorageHandle; use slog::Logger; use sprockets_tls::keys::SprocketsConfig; use std::net::Ipv6Addr; @@ -50,7 +50,7 @@ impl RssHandle { sprockets: SprocketsConfig, config: RackInitializeRequest, our_bootstrap_address: Ipv6Addr, - storage_manager: StorageHandle, + internal_disks_rx: InternalDisksReceiver, bootstore: bootstore::NodeHandle, step_tx: watch::Sender, ) -> Result<(), SetupServiceError> { @@ -59,7 +59,7 @@ impl RssHandle { let rss = RackSetupService::new( log.new(o!("component" => "RSS")), config, - storage_manager, + internal_disks_rx, tx, bootstore, step_tx, diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs index 761a39ccbbf..5fa09ea0f41 100644 --- a/sled-agent/src/bootstrap/server.rs +++ b/sled-agent/src/bootstrap/server.rs @@ -39,11 +39,12 @@ use omicron_ddm_admin_client::DdmError; use omicron_ddm_admin_client::types::EnableStatsRequest; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::RackInitUuid; +use sled_agent_config_reconciler::ConfigReconcilerSpawnToken; +use sled_agent_config_reconciler::InternalDisksReceiver; use sled_agent_types::rack_init::RackInitializeRequest; use sled_agent_types::sled::StartSledAgentRequest; use sled_hardware::underlay; use sled_storage::dataset::CONFIG_DATASET; -use sled_storage::manager::StorageHandle; use slog::Logger; use std::io; use std::net::SocketAddr; @@ -180,12 +181,15 @@ impl Server { service_manager, long_running_task_handles, sled_agent_started_tx, + config_reconciler_spawn_token, } = BootstrapAgentStartup::run(config).await?; // Do we have a StartSledAgentRequest stored in the ledger? - let paths = - sled_config_paths(&long_running_task_handles.storage_manager) - .await?; + let internal_disks_rx = long_running_task_handles + .config_reconciler + .internal_disks_rx() + .clone(); + let paths = sled_config_paths(&internal_disks_rx).await?; let maybe_ledger = Ledger::::new(&startup_log, paths).await; @@ -204,7 +208,7 @@ impl Server { let bootstrap_context = BootstrapServerContext { base_log: base_log.clone(), global_zone_bootstrap_ip, - storage_manager: long_running_task_handles.storage_manager.clone(), + internal_disks_rx, bootstore_node_handle: long_running_task_handles.bootstore.clone(), baseboard: long_running_task_handles.hardware_manager.baseboard(), rss_access, @@ -244,6 +248,7 @@ impl Server { &config, start_sled_agent_request, long_running_task_handles.clone(), + config_reconciler_spawn_token, service_manager.clone(), &base_log, &startup_log, @@ -257,14 +262,12 @@ impl Server { .map_err(|_| ()) .expect("Failed to send to StorageMonitor"); - // For cold boot specifically, we now need to load the services - // we're responsible for, while continuing to handle hardware - // notifications. This cannot fail: we retry indefinitely until - // we're done loading services. - sled_agent.load_services().await; SledAgentState::ServerStarted(sled_agent_server) } else { - SledAgentState::Bootstrapping(Some(sled_agent_started_tx)) + SledAgentState::Bootstrapping(Some(BootstrappingDependencies { + sled_agent_started_tx, + config_reconciler_spawn_token, + })) }; // Spawn our inner task that handles any future hardware updates and any @@ -306,11 +309,16 @@ impl Server { // bootstrap server). enum SledAgentState { // We're still in the bootstrapping phase, waiting for a sled-agent request. - Bootstrapping(Option>), + Bootstrapping(Option), // ... or the sled agent server is running. ServerStarted(SledAgentServer), } +struct BootstrappingDependencies { + sled_agent_started_tx: oneshot::Sender, + config_reconciler_spawn_token: ConfigReconcilerSpawnToken, +} + #[derive(thiserror::Error, Debug)] pub enum SledAgentServerStartError { #[error("Failed to start sled-agent server: {0}")] @@ -350,6 +358,7 @@ async fn start_sled_agent( config: &SledConfig, request: StartSledAgentRequest, long_running_task_handles: LongRunningTaskHandles, + config_reconciler_spawn_token: ConfigReconcilerSpawnToken, service_manager: ServiceManager, base_log: &Logger, log: &Logger, @@ -386,9 +395,6 @@ async fn start_sled_agent( } } - // Inform the storage service that the key manager is available - long_running_task_handles.storage_manager.key_manager_ready().await; - // Inform our DDM reconciler of our underlay subnet and the information it // needs for maghemite to enable Oximeter stats. let ddm_reconciler = service_manager.ddm_reconciler(); @@ -404,6 +410,7 @@ async fn start_sled_agent( base_log.clone(), request.clone(), long_running_task_handles.clone(), + config_reconciler_spawn_token, service_manager, ) .await @@ -413,8 +420,10 @@ async fn start_sled_agent( // Record this request so the sled agent can be automatically // initialized on the next boot. - let paths = - sled_config_paths(&long_running_task_handles.storage_manager).await?; + let paths = sled_config_paths( + long_running_task_handles.config_reconciler.internal_disks_rx(), + ) + .await?; let mut ledger = Ledger::new_with(&log, paths, request); ledger.commit().await?; @@ -468,12 +477,11 @@ impl From for SledAgentServerStartError { } async fn sled_config_paths( - storage: &StorageHandle, + internal_disks_rx: &InternalDisksReceiver, ) -> Result, MissingM2Paths> { - let resources = storage.get_latest_disks().await; - let paths: Vec<_> = resources - .all_m2_mountpoints(CONFIG_DATASET) - .into_iter() + let paths: Vec<_> = internal_disks_rx + .current() + .all_config_datasets() .map(|p| p.join(SLED_AGENT_REQUEST_FILE)) .collect(); @@ -536,7 +544,7 @@ impl Inner { log: &Logger, ) { match &mut self.state { - SledAgentState::Bootstrapping(sled_agent_started_tx) => { + SledAgentState::Bootstrapping(deps) => { let request_id = request.body.id.into_untyped_uuid(); // Extract from options to satisfy the borrow checker. @@ -545,13 +553,16 @@ impl Inner { // we explicitly unwrap here, and panic on error below. // // See https://github.com/oxidecomputer/omicron/issues/4494 - let sled_agent_started_tx = - sled_agent_started_tx.take().unwrap(); + let BootstrappingDependencies { + sled_agent_started_tx, + config_reconciler_spawn_token, + } = deps.take().unwrap(); let response = match start_sled_agent( &self.config, request, self.long_running_task_handles.clone(), + config_reconciler_spawn_token, self.service_manager.clone(), &self.base_log, &log, @@ -620,15 +631,13 @@ impl Inner { } async fn uninstall_sled_local_config(&self) -> Result<(), BootstrapError> { - let config_dirs = self + let internal_disks = self .long_running_task_handles - .storage_manager - .get_latest_disks() - .await - .all_m2_mountpoints(CONFIG_DATASET) - .into_iter(); + .config_reconciler + .internal_disks_rx() + .current(); - for dir in config_dirs { + for dir in internal_disks.all_config_datasets() { for entry in dir.read_dir_utf8().map_err(|err| { BootstrapError::Io { message: format!("Deleting {dir}"), err } })? { diff --git a/sled-agent/src/hardware_monitor.rs b/sled-agent/src/hardware_monitor.rs index 9508a11bfba..0e8cd00463d 100644 --- a/sled-agent/src/hardware_monitor.rs +++ b/sled-agent/src/hardware_monitor.rs @@ -8,10 +8,10 @@ use crate::services::ServiceManager; use crate::sled_agent::SledAgent; +use sled_agent_config_reconciler::RawDisksSender; use sled_hardware::{HardwareManager, HardwareUpdate}; use sled_hardware_types::Baseboard; use sled_storage::disk::RawDisk; -use sled_storage::manager::StorageHandle; use slog::Logger; use tokio::sync::broadcast::error::RecvError; use tokio::sync::{broadcast, oneshot}; @@ -68,8 +68,8 @@ pub struct HardwareMonitor { // A reference to the hardware manager hardware_manager: HardwareManager, - // A handle to [`sled_hardware::manager::StorageManger`] - storage_manager: StorageHandle, + // A handle to send raw disk updates to the config-reconciler system. + raw_disks_tx: RawDisksSender, // A handle to the sled-agent // @@ -91,7 +91,7 @@ impl HardwareMonitor { pub fn new( log: &Logger, hardware_manager: &HardwareManager, - storage_manager: &StorageHandle, + raw_disks_tx: RawDisksSender, ) -> ( HardwareMonitor, oneshot::Sender, @@ -112,7 +112,7 @@ impl HardwareMonitor { service_manager_ready_rx, hardware_rx, hardware_manager: hardware_manager.clone(), - storage_manager: storage_manager.clone(), + raw_disks_tx, sled_agent: None, tofino_manager, }, @@ -177,36 +177,16 @@ impl HardwareMonitor { } } HardwareUpdate::DiskAdded(disk) => { - // We notify the storage manager of the hardware, but do not need to - // wait for the result to be fully processed. - // - // Here and below, we're "dropping a future" rather than - // awaiting it. That's intentional - the hardware monitor - // doesn't care when this work is finished, just when it's - // enqueued. - #[allow(clippy::let_underscore_future)] - let _ = self - .storage_manager - .detected_raw_disk(disk.into()) - .await; + self.raw_disks_tx + .add_or_update_raw_disk(disk.into(), &self.log); } HardwareUpdate::DiskRemoved(disk) => { - // We notify the storage manager of the hardware, but do not need to - // wait for the result to be fully processed. - #[allow(clippy::let_underscore_future)] - let _ = self - .storage_manager - .detected_raw_disk_removal(disk.into()) - .await; + self.raw_disks_tx + .remove_raw_disk(disk.identity(), &self.log); } HardwareUpdate::DiskUpdated(disk) => { - // We notify the storage manager of the hardware, but do not need to - // wait for the result to be fully processed. - #[allow(clippy::let_underscore_future)] - let _ = self - .storage_manager - .detected_raw_disk_update(disk.into()) - .await; + self.raw_disks_tx + .add_or_update_raw_disk(disk.into(), &self.log); } }, Err(broadcast::error::RecvError::Lagged(count)) => { @@ -280,14 +260,9 @@ impl HardwareMonitor { self.deactivate_switch().await; } - // We notify the storage manager of the hardware, but do not need to - // wait for the result to be fully processed. - #[allow(clippy::let_underscore_future)] - let _ = self - .storage_manager - .ensure_using_exactly_these_disks( - self.hardware_manager.disks().into_values().map(RawDisk::from), - ) - .await; + self.raw_disks_tx.set_raw_disks( + self.hardware_manager.disks().into_values().map(RawDisk::from), + &self.log, + ); } } diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index 9abd2dfb5c2..e644dec0315 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -18,7 +18,7 @@ use dropshot::{ Query, RequestContext, StreamingBody, TypedBody, }; use nexus_sled_agent_shared::inventory::{ - Inventory, OmicronSledConfig, OmicronSledConfigResult, SledRole, + Inventory, OmicronSledConfig, SledRole, }; use omicron_common::api::external::Error; use omicron_common::api::internal::nexus::{DiskRuntimeState, SledVmmState}; @@ -26,9 +26,6 @@ use omicron_common::api::internal::shared::{ ExternalIpGatewayMap, ResolvedVpcRouteSet, ResolvedVpcRouteState, SledIdentifiers, SwitchPorts, VirtualNetworkInterfaceHost, }; -use omicron_common::disk::{ - DatasetsConfig, DiskVariant, M2Slot, OmicronPhysicalDisksConfig, -}; use range_requests::RequestContextEx; use sled_agent_api::*; use sled_agent_types::boot_disk::{ @@ -400,13 +397,6 @@ impl SledAgentApi for SledAgentImpl { Ok(HttpResponseDeleted()) } - async fn datasets_get( - rqctx: RequestContext, - ) -> Result, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk(sa.datasets_config_list().await?)) - } - async fn zone_bundle_cleanup( rqctx: RequestContext, ) -> Result>, HttpError> @@ -428,27 +418,11 @@ impl SledAgentApi for SledAgentImpl { async fn omicron_config_put( rqctx: RequestContext, body: TypedBody, - ) -> Result, HttpError> { + ) -> Result { let sa = rqctx.context(); let body_args = body.into_inner(); - sa.set_omicron_config(body_args) - .await - .map(HttpResponseOk) - .map_err(HttpError::from) - } - - async fn omicron_physical_disks_get( - rqctx: RequestContext, - ) -> Result, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk(sa.omicron_physical_disks_list().await?)) - } - - async fn zpools_get( - rqctx: RequestContext, - ) -> Result>, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk(sa.zpools_get().await)) + sa.set_omicron_config(body_args).await??; + Ok(HttpResponseUpdatedNoContent()) } async fn sled_role_get( @@ -792,29 +766,7 @@ impl SledAgentApi for SledAgentImpl { let boot_disk = path_params.into_inner().boot_disk; // Find our corresponding disk. - let maybe_disk_path = - sa.storage().get_latest_disks().await.iter_managed().find_map( - |(_identity, disk)| { - // Synthetic disks panic if asked for their `slot()`, so filter - // them out first; additionally, filter out any non-M2 disks. - if disk.is_synthetic() || disk.variant() != DiskVariant::M2 - { - return None; - } - - // Convert this M2 disk's slot to an M2Slot, and skip any that - // don't match the requested boot_disk. - let Ok(slot) = M2Slot::try_from(disk.slot()) else { - return None; - }; - if slot != boot_disk { - return None; - } - - let raw_devs_path = true; - Some(disk.boot_image_devfs_path(raw_devs_path)) - }, - ); + let maybe_disk_path = sa.boot_image_raw_devfs_path(boot_disk); let disk_path = match maybe_disk_path { Some(Ok(path)) => path, diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 161e477498a..95518804d51 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -36,10 +36,9 @@ use propolis_client::Client as PropolisClient; use propolis_client::instance_spec::{ComponentV0, SpecKey}; use rand::SeedableRng; use rand::prelude::IteratorRandom; +use sled_agent_config_reconciler::AvailableDatasetsReceiver; use sled_agent_types::instance::*; use sled_agent_types::zone_bundle::ZoneBundleCause; -use sled_storage::dataset::ZONE_DATASET; -use sled_storage::manager::StorageHandle; use slog::Logger; use std::net::IpAddr; use std::net::SocketAddr; @@ -530,8 +529,8 @@ struct InstanceRunner { // Connection to Nexus nexus_client: NexusClient, - // Storage resources - storage: StorageHandle, + // Available datasets for choosing zone roots + available_datasets_rx: AvailableDatasetsReceiver, // Used to create propolis zones zone_builder_factory: ZoneBuilderFactory, @@ -1556,10 +1555,10 @@ impl Instance { nexus_client, vnic_allocator, port_manager, - storage, zone_bundler, zone_builder_factory, metrics_queue, + available_datasets_rx, } = services; let mut dhcp_config = DhcpCfg { @@ -1645,7 +1644,7 @@ impl Instance { state: InstanceStates::new(vmm_runtime, migration_id), running_state: None, nexus_client, - storage, + available_datasets_rx, zone_builder_factory, zone_bundler, metrics_queue, @@ -1991,13 +1990,10 @@ impl InstanceRunner { // configured VNICs. let zname = propolis_zone_name(&self.propolis_id); let mut rng = rand::rngs::StdRng::from_entropy(); - let latest_disks = self - .storage - .get_latest_disks() - .await - .all_u2_mountpoints(ZONE_DATASET); - let root = latest_disks + let root = self + .available_datasets_rx + .all_mounted_zone_root_datasets() .into_iter() .choose(&mut rng) .ok_or_else(|| Error::U2NotFound)?; @@ -2275,11 +2271,16 @@ mod tests { use omicron_common::api::external::{Generation, Hostname}; use omicron_common::api::internal::nexus::VmmState; use omicron_common::api::internal::shared::{DhcpConfig, SledIdentifiers}; + use omicron_common::disk::DiskIdentity; + use omicron_uuid_kinds::ZpoolUuid; use propolis_client::types::{ InstanceMigrateStatusResponse, InstanceStateMonitorResponse, }; + use sled_agent_config_reconciler::{ + CurrentlyManagedZpoolsReceiver, InternalDisksReceiver, + }; use sled_agent_types::zone_bundle::CleanupContext; - use sled_storage::manager_test_harness::StorageManagerTestHarness; + use sled_storage::config::MountConfig; use std::net::SocketAddrV6; use std::net::{Ipv4Addr, Ipv6Addr, SocketAddrV4}; use std::str::FromStr; @@ -2409,25 +2410,11 @@ mod tests { .expect("single-stepping mock server failed unexpectedly"); } - async fn setup_storage_manager(log: &Logger) -> StorageManagerTestHarness { - let mut harness = StorageManagerTestHarness::new(log).await; - let raw_disks = - harness.add_vdevs(&["u2_under_test.vdev", "m2_helping.vdev"]).await; - harness.handle().key_manager_ready().await; - let config = harness.make_config(1, &raw_disks); - let _ = harness - .handle() - .omicron_physical_disks_ensure(config.clone()) - .await - .expect("Ensuring disks should work after key manager is ready"); - harness - } - async fn instance_struct( log: &Logger, propolis_addr: SocketAddr, nexus_client: NexusClient, - storage_handle: StorageHandle, + available_datasets_rx: AvailableDatasetsReceiver, temp_dir: &String, ) -> (Instance, MetricsRx) { let id = InstanceUuid::new_v4(); @@ -2439,7 +2426,7 @@ mod tests { let (services, rx) = fake_instance_manager_services( log, - storage_handle, + available_datasets_rx, nexus_client, temp_dir, ); @@ -2522,7 +2509,7 @@ mod tests { fn fake_instance_manager_services( log: &Logger, - storage_handle: StorageHandle, + available_datasets_rx: AvailableDatasetsReceiver, nexus_client: NexusClient, temp_dir: &String, ) -> (InstanceManagerServices, MetricsRx) { @@ -2539,7 +2526,19 @@ mod tests { let cleanup_context = CleanupContext::default(); let zone_bundler = ZoneBundler::new( log.new(o!("component" => "ZoneBundler")), - storage_handle.clone(), + InternalDisksReceiver::fake_static( + Arc::new(MountConfig::default()), + [( + DiskIdentity { + vendor: "test-vendor".to_string(), + model: "test-model".to_string(), + serial: "test-serial".to_string(), + }, + ZpoolName::new_external(ZpoolUuid::new_v4()), + )] + .into_iter(), + ), + available_datasets_rx.clone(), cleanup_context, ); @@ -2548,7 +2547,7 @@ mod tests { nexus_client, vnic_allocator, port_manager, - storage: storage_handle, + available_datasets_rx, zone_bundler, zone_builder_factory: ZoneBuilderFactory::fake( Some(temp_dir), @@ -2563,7 +2562,6 @@ mod tests { /// interactions with other parts of the system (e.g. Nexus and metrics). #[allow(dead_code)] struct InstanceTestObjects { - storage_harness: StorageManagerTestHarness, nexus: FakeNexusParts, _temp_guard: Utf8TempDir, instance_manager: crate::instance_manager::InstanceManager, @@ -2572,12 +2570,13 @@ mod tests { impl InstanceTestObjects { async fn new(log: &slog::Logger) -> Self { - let storage_harness = setup_storage_manager(log).await; let nexus = FakeNexusParts::new(&log).await; let temp_guard = Utf8TempDir::new().unwrap(); let (services, metrics_rx) = fake_instance_manager_services( log, - storage_harness.handle().clone(), + AvailableDatasetsReceiver::fake_in_tempdir_for_tests( + ZpoolOrRamdisk::Ramdisk, + ), nexus.nexus_client.clone(), &temp_guard.path().to_string(), ); @@ -2586,7 +2585,7 @@ mod tests { nexus_client, vnic_allocator, port_manager, - storage, + available_datasets_rx, zone_bundler, zone_builder_factory, metrics_queue, @@ -2600,7 +2599,10 @@ mod tests { nexus_client, vnic_allocator, port_manager, - storage, + CurrentlyManagedZpoolsReceiver::fake_static( + std::iter::empty(), + ), + available_datasets_rx, zone_bundler, zone_builder_factory, vmm_reservoir_manager, @@ -2609,17 +2611,12 @@ mod tests { .unwrap(); Self { - storage_harness, nexus, _temp_guard: temp_guard, instance_manager, metrics_rx, } } - - async fn cleanup(mut self) { - self.storage_harness.cleanup().await; - } } #[tokio::test] @@ -2639,9 +2636,6 @@ mod tests { _nexus_server, } = FakeNexusParts::new(&log).await; - let mut storage_harness = setup_storage_manager(&log).await; - let storage_handle = storage_harness.handle().clone(); - let temp_guard = Utf8TempDir::new().unwrap(); let temp_dir = temp_guard.path().to_string(); @@ -2651,7 +2645,9 @@ mod tests { &log, propolis_addr, nexus_client, - storage_handle, + AvailableDatasetsReceiver::fake_in_tempdir_for_tests( + ZpoolOrRamdisk::Ramdisk, + ), &temp_dir, ), ) @@ -2706,7 +2702,6 @@ mod tests { .try_recv() .expect_err("The metrics request queue should have one message"); - storage_harness.cleanup().await; logctx.cleanup_successful(); } @@ -2725,9 +2720,6 @@ mod tests { _nexus_server, } = FakeNexusParts::new(&log).await; - let mut storage_harness = setup_storage_manager(&logctx.log).await; - let storage_handle = storage_harness.handle().clone(); - let temp_guard = Utf8TempDir::new().unwrap(); let temp_dir = temp_guard.path().to_string(); @@ -2738,7 +2730,9 @@ mod tests { // we want to test propolis not ever coming up SocketAddr::V6(SocketAddrV6::new(Ipv6Addr::LOCALHOST, 1, 0, 0)), nexus_client, - storage_handle, + AvailableDatasetsReceiver::fake_in_tempdir_for_tests( + ZpoolOrRamdisk::Ramdisk, + ), &temp_dir, ), ) @@ -2774,7 +2768,6 @@ mod tests { ); } - storage_harness.cleanup().await; logctx.cleanup_successful(); } @@ -2873,7 +2866,6 @@ mod tests { .try_recv() .expect_err("The metrics request queue should have one message"); - test_objects.cleanup().await; logctx.cleanup_successful(); } @@ -3019,7 +3011,6 @@ mod tests { .expect("timed out waiting for VmmState::Stopped in FakeNexus") .expect("failed to receive FakeNexus' InstanceState"); - test_objects.cleanup().await; logctx.cleanup_successful(); } @@ -3066,7 +3057,7 @@ mod tests { nexus_client, vnic_allocator, port_manager, - storage, + available_datasets_rx, zone_bundler, zone_builder_factory, metrics_queue, @@ -3101,7 +3092,7 @@ mod tests { state: InstanceStates::new(vmm_runtime, migration_id), running_state: None, nexus_client, - storage, + available_datasets_rx, zone_builder_factory, zone_bundler, metrics_queue, @@ -3124,12 +3115,10 @@ mod tests { // them directly. _nexus_server: HttpServer, _dns_server: TransientServer, - storage_harness: StorageManagerTestHarness, } impl TestInstanceRunner { async fn new(log: &slog::Logger) -> Self { - let storage_harness = setup_storage_manager(&log).await; let FakeNexusParts { nexus_client, _nexus_server, @@ -3140,7 +3129,9 @@ mod tests { let temp_guard = Utf8TempDir::new().unwrap(); let (services, _metrics_rx) = fake_instance_manager_services( &log, - storage_harness.handle().clone(), + AvailableDatasetsReceiver::fake_in_tempdir_for_tests( + ZpoolOrRamdisk::Ramdisk, + ), nexus_client, &temp_guard.path().to_string(), ); @@ -3181,7 +3172,6 @@ mod tests { remove_rx, _nexus_server, _dns_server, - storage_harness, } } } @@ -3202,7 +3192,6 @@ mod tests { mut remove_rx, _nexus_server, _dns_server, - mut storage_harness, } = TestInstanceRunner::new(&log).await; let (resp_tx, resp_rx) = oneshot::channel(); @@ -3256,7 +3245,6 @@ mod tests { drop(terminate_tx); let _ = runner_task.await; - storage_harness.cleanup().await; logctx.cleanup_successful(); } @@ -3276,7 +3264,6 @@ mod tests { mut remove_rx, _nexus_server, _dns_server, - mut storage_harness, } = TestInstanceRunner::new(&log).await; let (resp_tx, resp_rx) = oneshot::channel(); @@ -3299,7 +3286,6 @@ mod tests { }; assert_eq!(state.vmm_state.state, VmmState::Failed); - storage_harness.cleanup().await; logctx.cleanup_successful(); } } diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index 9bd0ad76497..fa8a11c89d8 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -17,15 +17,14 @@ use illumos_utils::link::VnicAllocator; use illumos_utils::opte::PortManager; use illumos_utils::running_zone::ZoneBuilderFactory; use omicron_common::api::external::ByteCount; -use omicron_common::api::external::Generation; use omicron_common::api::internal::nexus::SledVmmState; use omicron_common::api::internal::shared::SledIdentifiers; use omicron_uuid_kinds::PropolisUuid; +use sled_agent_config_reconciler::AvailableDatasetsReceiver; +use sled_agent_config_reconciler::CurrentlyManagedZpoolsReceiver; use sled_agent_types::instance::*; -use sled_storage::manager::StorageHandle; -use sled_storage::resources::AllDisks; use slog::Logger; -use std::collections::{BTreeMap, HashSet}; +use std::collections::BTreeMap; use std::sync::Arc; use tokio::sync::{mpsc, oneshot}; use uuid::Uuid; @@ -66,10 +65,10 @@ pub(crate) struct InstanceManagerServices { pub nexus_client: NexusClient, pub vnic_allocator: VnicAllocator, pub port_manager: PortManager, - pub storage: StorageHandle, pub zone_bundler: ZoneBundler, pub zone_builder_factory: ZoneBuilderFactory, pub metrics_queue: MetricsRequestQueue, + pub available_datasets_rx: AvailableDatasetsReceiver, } // Describes the internals of the "InstanceManager", though most of the @@ -95,7 +94,8 @@ impl InstanceManager { nexus_client: NexusClient, vnic_allocator: VnicAllocator, port_manager: PortManager, - storage: StorageHandle, + currently_managed_zpools_rx: CurrentlyManagedZpoolsReceiver, + available_datasets_rx: AvailableDatasetsReceiver, zone_bundler: ZoneBundler, vmm_reservoir_manager: VmmReservoirManagerHandle, metrics_queue: MetricsRequestQueue, @@ -105,7 +105,8 @@ impl InstanceManager { nexus_client, vnic_allocator, port_manager, - storage, + currently_managed_zpools_rx, + available_datasets_rx, zone_bundler, ZoneBuilderFactory::new(), vmm_reservoir_manager, @@ -123,7 +124,8 @@ impl InstanceManager { nexus_client: NexusClient, vnic_allocator: VnicAllocator, port_manager: PortManager, - storage: StorageHandle, + currently_managed_zpools_rx: CurrentlyManagedZpoolsReceiver, + available_datasets_rx: AvailableDatasetsReceiver, zone_bundler: ZoneBundler, zone_builder_factory: ZoneBuilderFactory, vmm_reservoir_manager: VmmReservoirManagerHandle, @@ -142,8 +144,8 @@ impl InstanceManager { jobs: BTreeMap::new(), vnic_allocator, port_manager, - storage_generation: None, - storage, + currently_managed_zpools_rx, + available_datasets_rx, zone_bundler, zone_builder_factory, metrics_queue, @@ -315,23 +317,6 @@ impl InstanceManager { .map_err(|_| Error::FailedSendInstanceManagerClosed)?; rx.await? } - - /// Marks instances failed unless they're using storage from `disks`. - /// - /// This function looks for transient zone filesystem usage on expunged - /// zpools. - pub async fn use_only_these_disks( - &self, - disks: AllDisks, - ) -> Result<(), Error> { - let (tx, rx) = oneshot::channel(); - self.inner - .tx - .send(InstanceManagerRequest::OnlyUseDisks { disks, tx }) - .await - .map_err(|_| Error::FailedSendInstanceManagerClosed)?; - rx.await? - } } // Most requests that can be sent to the "InstanceManagerRunner" task. @@ -386,10 +371,6 @@ enum InstanceManagerRequest { propolis_id: PropolisUuid, tx: oneshot::Sender>, }, - OnlyUseDisks { - disks: AllDisks, - tx: oneshot::Sender>, - }, } // Requests that the instance manager stop processing information about a @@ -426,8 +407,8 @@ struct InstanceManagerRunner { vnic_allocator: VnicAllocator, port_manager: PortManager, - storage_generation: Option, - storage: StorageHandle, + currently_managed_zpools_rx: CurrentlyManagedZpoolsReceiver, + available_datasets_rx: AvailableDatasetsReceiver, zone_bundler: ZoneBundler, zone_builder_factory: ZoneBuilderFactory, metrics_queue: MetricsRequestQueue, @@ -458,6 +439,24 @@ impl InstanceManagerRunner { }, } }, + // If the set of currently-managed zpools has changed, shut down + // any instances due to disks that have disappeared out from + // under them. + result = self.currently_managed_zpools_rx.changed() => { + match result { + Ok(()) => { + self.use_only_currently_managed_zpools().await; + } + Err(_) => { + warn!( + self.log, + "InstanceManager's 'current zpools' channel \ + closed; shutting down", + ); + break; + } + } + } request = self.rx.recv() => { let request_variant = request.as_ref().map(|r| r.to_string()); let result = match request { @@ -495,10 +494,6 @@ impl InstanceManagerRunner { // the state... self.get_instance_state(tx, propolis_id) }, - Some(OnlyUseDisks { disks, tx } ) => { - self.use_only_these_disks(disks).await; - tx.send(Ok(())).map_err(|_| Error::FailedSendClientClosed) - }, None => { warn!(self.log, "InstanceManager's request channel closed; shutting down"); break; @@ -607,10 +602,10 @@ impl InstanceManagerRunner { nexus_client: self.nexus_client.clone(), vnic_allocator: self.vnic_allocator.clone(), port_manager: self.port_manager.clone(), - storage: self.storage.clone(), zone_bundler: self.zone_bundler.clone(), zone_builder_factory: self.zone_builder_factory.clone(), metrics_queue: self.metrics_queue.clone(), + available_datasets_rx: self.available_datasets_rx.clone(), }; let state = crate::instance::InstanceInitialState { @@ -760,24 +755,9 @@ impl InstanceManagerRunner { Ok(()) } - async fn use_only_these_disks(&mut self, disks: AllDisks) { - // Consider the generation number on the incoming request to avoid - // applying old requests. - let requested_generation = *disks.generation(); - if let Some(last_gen) = self.storage_generation { - if last_gen >= requested_generation { - // This request looks old, ignore it. - info!(self.log, "use_only_these_disks: Ignoring request"; - "last_gen" => ?last_gen, "requested_gen" => ?requested_generation); - return; - } - } - self.storage_generation = Some(requested_generation); - info!(self.log, "use_only_these_disks: Processing new request"; - "gen" => ?requested_generation); - - let u2_set: HashSet<_> = disks.all_u2_zpools().into_iter().collect(); - + async fn use_only_currently_managed_zpools(&mut self) { + let current_zpools = + self.currently_managed_zpools_rx.current_and_update(); let mut to_remove = vec![]; for (id, instance) in self.jobs.iter() { // If we can read the filesystem pool, consider it. Otherwise, move @@ -792,7 +772,7 @@ impl InstanceManagerRunner { info!(self.log, "use_only_these_disks: Cannot read filesystem pool"; "instance_id" => ?id); continue; }; - if !u2_set.contains(&filesystem_pool) { + if !current_zpools.contains(&filesystem_pool) { to_remove.push(*id); } } diff --git a/sled-agent/src/lib.rs b/sled-agent/src/lib.rs index 3a63ff5c117..97d1493467d 100644 --- a/sled-agent/src/lib.rs +++ b/sled-agent/src/lib.rs @@ -35,7 +35,6 @@ pub mod rack_setup; pub mod server; pub mod services; mod sled_agent; -mod storage_monitor; mod support_bundle; mod swap_device; mod updates; diff --git a/sled-agent/src/long_running_tasks.rs b/sled-agent/src/long_running_tasks.rs index 6475eebad0d..ae533faae1d 100644 --- a/sled-agent/src/long_running_tasks.rs +++ b/sled-agent/src/long_running_tasks.rs @@ -20,36 +20,29 @@ use crate::config::Config; use crate::hardware_monitor::HardwareMonitor; use crate::services::ServiceManager; use crate::sled_agent::SledAgent; -use crate::storage_monitor::{StorageMonitor, StorageMonitorHandle}; use crate::zone_bundle::ZoneBundler; use bootstore::schemes::v0 as bootstore; use key_manager::{KeyManager, StorageKeyRequester}; +use sled_agent_config_reconciler::{ + ConfigReconcilerHandle, ConfigReconcilerSpawnToken, RawDisksSender, + TimeSyncConfig, +}; use sled_agent_types::zone_bundle::CleanupContext; use sled_hardware::{HardwareManager, SledMode, UnparsedDisk}; use sled_storage::config::MountConfig; use sled_storage::disk::RawSyntheticDisk; -use sled_storage::manager::{StorageHandle, StorageManager}; use slog::{Logger, info}; use std::net::Ipv6Addr; +use std::sync::Arc; use tokio::sync::oneshot; /// A mechanism for interacting with all long running tasks that can be shared /// between the bootstrap-agent and sled-agent code. #[derive(Clone)] pub struct LongRunningTaskHandles { - /// A mechanism for retrieving storage keys. This interacts with the - /// [`KeyManager`] task. In the future, there may be other handles for - /// retrieving different types of keys. Separating the handles limits the - /// access for a given key type to the code that holds the handle. - pub storage_key_requester: StorageKeyRequester, - - /// A mechanism for talking to the [`StorageManager`] which is responsible - /// for establishing zpools on disks and managing their datasets. - pub storage_manager: StorageHandle, - - /// A mechanism for talking to the [`StorageMonitor`], which reacts to disk - /// changes and updates the dump devices. - pub storage_monitor_handle: StorageMonitorHandle, + /// A handle to the set of tasks managed by the sled-agent-config-reconciler + /// system. + pub config_reconciler: Arc, /// A mechanism for interacting with the hardware device tree pub hardware_manager: HardwareManager, @@ -69,15 +62,24 @@ pub async fn spawn_all_longrunning_tasks( config: &Config, ) -> ( LongRunningTaskHandles, + ConfigReconcilerSpawnToken, oneshot::Sender, oneshot::Sender, ) { let storage_key_requester = spawn_key_manager(log); - let mut storage_manager = - spawn_storage_manager(log, storage_key_requester.clone()); - let storage_monitor_handle = - spawn_storage_monitor(log, storage_manager.clone()); + let time_sync_config = if let Some(true) = config.skip_timesync { + TimeSyncConfig::Skip + } else { + TimeSyncConfig::Normal + }; + let (mut config_reconciler, config_reconciler_spawn_token) = + ConfigReconcilerHandle::new( + MountConfig::default(), + storage_key_requester, + time_sync_config, + log, + ); let nongimlet_observed_disks = config.nongimlet_observed_disks.clone().unwrap_or(vec![]); @@ -85,38 +87,37 @@ pub async fn spawn_all_longrunning_tasks( let hardware_manager = spawn_hardware_manager(log, sled_mode, nongimlet_observed_disks).await; - // Start monitoring for hardware changes + // Start monitoring for hardware changes, adding some synthetic disks if + // necessary. + let raw_disks_tx = config_reconciler.raw_disks_tx(); + upsert_synthetic_disks_if_needed(&log, &raw_disks_tx, &config).await; let (sled_agent_started_tx, service_manager_ready_tx) = - spawn_hardware_monitor(log, &hardware_manager, &storage_manager); - - // Add some synthetic disks if necessary. - upsert_synthetic_disks_if_needed(&log, &storage_manager, &config).await; + spawn_hardware_monitor(log, &hardware_manager, raw_disks_tx); // Wait for the boot disk so that we can work with any ledgers, // such as those needed by the bootstore and sled-agent info!(log, "Waiting for boot disk"); - let (disk_id, _) = storage_manager.wait_for_boot_disk().await; + let disk_id = config_reconciler.wait_for_boot_disk().await; info!(log, "Found boot disk {:?}", disk_id); let bootstore = spawn_bootstore_tasks( log, - &mut storage_manager, + &config_reconciler, &hardware_manager, global_zone_bootstrap_ip, ) .await; - let zone_bundler = spawn_zone_bundler_tasks(log, &mut storage_manager); + let zone_bundler = spawn_zone_bundler_tasks(log, &config_reconciler); ( LongRunningTaskHandles { - storage_key_requester, - storage_manager, - storage_monitor_handle, + config_reconciler: Arc::new(config_reconciler), hardware_manager, bootstore, zone_bundler, }, + config_reconciler_spawn_token, sled_agent_started_tx, service_manager_ready_tx, ) @@ -131,32 +132,6 @@ fn spawn_key_manager(log: &Logger) -> StorageKeyRequester { storage_key_requester } -fn spawn_storage_manager( - log: &Logger, - key_requester: StorageKeyRequester, -) -> StorageHandle { - info!(log, "Starting StorageManager"); - let (manager, handle) = - StorageManager::new(log, MountConfig::default(), key_requester); - tokio::spawn(async move { - manager.run().await; - }); - handle -} - -fn spawn_storage_monitor( - log: &Logger, - storage_handle: StorageHandle, -) -> StorageMonitorHandle { - info!(log, "Starting StorageMonitor"); - let (storage_monitor, handle) = - StorageMonitor::new(log, MountConfig::default(), storage_handle); - tokio::spawn(async move { - storage_monitor.run().await; - }); - handle -} - async fn spawn_hardware_manager( log: &Logger, sled_mode: SledMode, @@ -182,11 +157,11 @@ async fn spawn_hardware_manager( fn spawn_hardware_monitor( log: &Logger, hardware_manager: &HardwareManager, - storage_handle: &StorageHandle, + raw_disks_tx: RawDisksSender, ) -> (oneshot::Sender, oneshot::Sender) { info!(log, "Starting HardwareMonitor"); let (mut monitor, sled_agent_started_tx, service_manager_ready_tx) = - HardwareMonitor::new(log, hardware_manager, storage_handle); + HardwareMonitor::new(log, hardware_manager, raw_disks_tx); tokio::spawn(async move { monitor.run().await; }); @@ -195,13 +170,16 @@ fn spawn_hardware_monitor( async fn spawn_bootstore_tasks( log: &Logger, - storage_handle: &mut StorageHandle, + config_reconciler: &ConfigReconcilerHandle, hardware_manager: &HardwareManager, global_zone_bootstrap_ip: Ipv6Addr, ) -> bootstore::NodeHandle { - let iter_all = storage_handle.get_latest_disks().await; let config = new_bootstore_config( - &iter_all, + &config_reconciler + .internal_disks_rx() + .current() + .all_cluster_datasets() + .collect::>(), hardware_manager.baseboard(), global_zone_bootstrap_ip, ) @@ -226,16 +204,21 @@ async fn spawn_bootstore_tasks( // `ZoneBundler::new` spawns a periodic cleanup task that runs indefinitely fn spawn_zone_bundler_tasks( log: &Logger, - storage_handle: &mut StorageHandle, + config_reconciler: &ConfigReconcilerHandle, ) -> ZoneBundler { info!(log, "Starting ZoneBundler related tasks"); let log = log.new(o!("component" => "ZoneBundler")); - ZoneBundler::new(log, storage_handle.clone(), CleanupContext::default()) + ZoneBundler::new( + log, + config_reconciler.internal_disks_rx().clone(), + config_reconciler.available_datasets_rx(), + CleanupContext::default(), + ) } async fn upsert_synthetic_disks_if_needed( log: &Logger, - storage_manager: &StorageHandle, + raw_disks_tx: &RawDisksSender, config: &Config, ) { if let Some(vdevs) = &config.vdevs { @@ -248,7 +231,7 @@ async fn upsert_synthetic_disks_if_needed( let disk = RawSyntheticDisk::load(vdev, i.try_into().unwrap()) .expect("Failed to parse synthetic disk") .into(); - storage_manager.detected_raw_disk(disk).await.await.unwrap(); + raw_disks_tx.add_or_update_raw_disk(disk, log); } } } diff --git a/sled-agent/src/probe_manager.rs b/sled-agent/src/probe_manager.rs index 7096a15769b..95e09e39b50 100644 --- a/sled-agent/src/probe_manager.rs +++ b/sled-agent/src/probe_manager.rs @@ -10,8 +10,8 @@ use nexus_client::types::{ BackgroundTasksActivateRequest, ProbeExternalIp, ProbeInfo, }; use omicron_common::api::external::{ - Generation, VpcFirewallRuleAction, VpcFirewallRuleDirection, - VpcFirewallRulePriority, VpcFirewallRuleStatus, + VpcFirewallRuleAction, VpcFirewallRuleDirection, VpcFirewallRulePriority, + VpcFirewallRuleStatus, }; use omicron_common::api::internal::shared::{ NetworkInterface, ResolvedVpcFirewallRule, @@ -19,9 +19,10 @@ use omicron_common::api::internal::shared::{ use omicron_uuid_kinds::{GenericUuid, OmicronZoneUuid}; use rand::SeedableRng; use rand::prelude::IteratorRandom; -use sled_storage::dataset::ZONE_DATASET; -use sled_storage::manager::StorageHandle; -use sled_storage::resources::AllDisks; +use sled_agent_config_reconciler::{ + AvailableDatasetsReceiver, CurrentlyManagedZpools, + CurrentlyManagedZpoolsReceiver, +}; use slog::{Logger, error, warn}; use std::collections::{HashMap, HashSet}; use std::hash::{Hash, Hasher}; @@ -50,7 +51,6 @@ pub(crate) struct ProbeManager { } struct RunningProbes { - storage_generation: Option, zones: HashMap, } @@ -60,10 +60,10 @@ pub(crate) struct ProbeManagerInner { log: Logger, sled_id: Uuid, vnic_allocator: VnicAllocator, - storage: StorageHandle, port_manager: PortManager, metrics_queue: MetricsRequestQueue, running_probes: Mutex, + available_datasets_rx: AvailableDatasetsReceiver, zones_api: Arc, } @@ -73,9 +73,9 @@ impl ProbeManager { sled_id: Uuid, nexus_client: NexusClient, etherstub: Etherstub, - storage: StorageHandle, port_manager: PortManager, metrics_queue: MetricsRequestQueue, + available_datasets_rx: AvailableDatasetsReceiver, log: Logger, ) -> Self { Self { @@ -87,69 +87,24 @@ impl ProbeManager { Arc::new(illumos_utils::dladm::Dladm::real_api()), ), running_probes: Mutex::new(RunningProbes { - storage_generation: None, zones: HashMap::new(), }), nexus_client, log, sled_id, - storage, port_manager, metrics_queue, + available_datasets_rx, zones_api: Arc::new(illumos_utils::zone::Zones::real_api()), }), } } - pub(crate) async fn run(&self) { - self.inner.run().await; - } - - /// Removes any probes using filesystem roots on zpools that are not - /// contained in the set of "disks". - pub(crate) async fn use_only_these_disks(&self, disks: &AllDisks) { - let u2_set: HashSet<_> = disks.all_u2_zpools().into_iter().collect(); - let mut probes = self.inner.running_probes.lock().await; - - // Consider the generation number on the incoming request to avoid - // applying old requests. - let requested_generation = *disks.generation(); - if let Some(last_gen) = probes.storage_generation { - if last_gen >= requested_generation { - // This request looks old, ignore it. - info!(self.inner.log, "use_only_these_disks: Ignoring request"; - "last_gen" => ?last_gen, "requested_gen" => ?requested_generation); - return; - } - } - probes.storage_generation = Some(requested_generation); - info!(self.inner.log, "use_only_these_disks: Processing new request"; - "gen" => ?requested_generation); - - let to_remove = probes - .zones - .iter() - .filter_map(|(id, probe)| { - let probe_pool = match probe.root_zpool() { - ZpoolOrRamdisk::Zpool(zpool_name) => zpool_name, - ZpoolOrRamdisk::Ramdisk => { - info!( - self.inner.log, - "use_only_these_disks: removing probe on ramdisk"; - "id" => ?id, - ); - return None; - } - }; - - if !u2_set.contains(probe_pool) { Some(*id) } else { None } - }) - .collect::>(); - - for probe_id in to_remove { - info!(self.inner.log, "use_only_these_disks: Removing probe"; "probe_id" => ?probe_id); - self.inner.remove_probe_locked(&mut probes, probe_id).await; - } + pub(crate) async fn run( + &self, + currently_managed_zpools_rx: CurrentlyManagedZpoolsReceiver, + ) { + self.inner.run(currently_managed_zpools_rx).await; } } @@ -214,18 +169,55 @@ impl TryFrom for ProbeState { impl ProbeManagerInner { /// Run the probe manager. If it's already running this is a no-op. - async fn run(self: &Arc) { + async fn run( + self: &Arc, + currently_managed_zpools_rx: CurrentlyManagedZpoolsReceiver, + ) { let mut join_handle = self.join_handle.lock().await; if join_handle.is_none() { - *join_handle = Some(self.clone().reconciler()) + *join_handle = + Some(self.clone().reconciler(currently_managed_zpools_rx)) } } /// Run the reconciler loop on a background thread. - fn reconciler(self: Arc) -> JoinHandle<()> { + fn reconciler( + self: Arc, + mut currently_managed_zpools_rx: CurrentlyManagedZpoolsReceiver, + ) -> JoinHandle<()> { tokio::spawn(async move { loop { - sleep(RECONCILIATION_INTERVAL).await; + let sleep_fut = sleep(RECONCILIATION_INTERVAL); + tokio::pin!(sleep_fut); + + // Wait until the next reconciliation tick, but handle any + // changes to the set of disks in the meantime. + loop { + tokio::select! { + _ = &mut sleep_fut => break, + + // Cancel-safe per docs on `changed()` + result = currently_managed_zpools_rx.changed() => { + match result { + Ok(()) => { + self.use_only_these_disks( + ¤tly_managed_zpools_rx + .current_and_update() + ).await; + continue; + } + Err(_) => { + warn!( + self.log, + "ProbeManager's 'current zpools' \ + channel closed; shutting down", + ); + return; + } + } + } + } + } // Collect the target and current state. Use set operations // to determine what probes need to be added, removed and/or @@ -268,6 +260,37 @@ impl ProbeManagerInner { }) } + /// Removes any probes using filesystem roots on zpools that are not + /// contained in the set of "disks". + async fn use_only_these_disks(&self, disks: &CurrentlyManagedZpools) { + let mut probes = self.running_probes.lock().await; + + let to_remove = probes + .zones + .iter() + .filter_map(|(id, probe)| { + let probe_pool = match probe.root_zpool() { + ZpoolOrRamdisk::Zpool(zpool_name) => zpool_name, + ZpoolOrRamdisk::Ramdisk => { + info!( + self.log, + "use_only_these_disks: removing probe on ramdisk"; + "id" => ?id, + ); + return None; + } + }; + + if !disks.contains(probe_pool) { Some(*id) } else { None } + }) + .collect::>(); + + for probe_id in to_remove { + info!(self.log, "use_only_these_disks: Removing probe"; "probe_id" => ?probe_id); + self.remove_probe_locked(&mut probes, probe_id).await; + } + } + /// Add a set of probes to this sled. /// /// Returns the number of inserted probes. @@ -291,12 +314,9 @@ impl ProbeManagerInner { /// boots the probe zone. async fn add_probe(self: &Arc, probe: &ProbeState) -> Result<()> { let mut rng = rand::rngs::StdRng::from_entropy(); - let current_disks = self - .storage - .get_latest_disks() - .await - .all_u2_mountpoints(ZONE_DATASET); - let zone_root_path = current_disks + let zone_root_path = self + .available_datasets_rx + .all_mounted_zone_root_datasets() .into_iter() .choose(&mut rng) .ok_or_else(|| anyhow!("u2 not found"))?; diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 0700e20ae1e..7d0f68b553d 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -117,6 +117,7 @@ use serde::{Deserialize, Serialize}; use sled_agent_client::{ Client as SledAgentClient, Error as SledAgentError, types as SledAgentTypes, }; +use sled_agent_config_reconciler::InternalDisksReceiver; use sled_agent_types::early_networking::{ EarlyNetworkConfig, EarlyNetworkConfigBody, }; @@ -127,8 +128,6 @@ use sled_agent_types::rack_ops::RssStep; use sled_agent_types::sled::StartSledAgentRequest; use sled_agent_types::time_sync::TimeSync; use sled_hardware_types::underlay::BootstrapInterface; -use sled_storage::dataset::CONFIG_DATASET; -use sled_storage::manager::StorageHandle; use slog::Logger; use slog_error_chain::InlineErrorChain; use std::collections::{BTreeMap, BTreeSet, btree_map}; @@ -257,15 +256,14 @@ impl RackSetupService { /// Arguments: /// - `log`: The logger. /// - `config`: The config file, which is used to setup the rack. - /// - `storage_manager`: A handle for interacting with the storage manager - /// task + /// - `internal_disks_rx`: Tells us about available internal disks /// - `local_bootstrap_agent`: Communication channel by which we can send /// commands to our local bootstrap-agent (e.g., to start sled-agents) /// - `bootstore` - A handle to call bootstore APIs pub(crate) fn new( log: Logger, config: Config, - storage_manager: StorageHandle, + internal_disks_rx: InternalDisksReceiver, local_bootstrap_agent: BootstrapAgentHandle, bootstore: bootstore::NodeHandle, step_tx: watch::Sender, @@ -275,7 +273,7 @@ impl RackSetupService { if let Err(e) = svc .run( &config, - &storage_manager, + &internal_disks_rx, local_bootstrap_agent, bootstore, step_tx, @@ -375,86 +373,9 @@ impl ServiceInner { "datasets" => ?sled_config.datasets, "zones" => ?sled_config.zones, ); - let result = client.omicron_config_put(&sled_config).await; - let error = match result { - Ok(response) => { - let response = response.into_inner(); - - // An HTTP OK may contain _partial_ success: check whether - // we got any individual disk failures, and split those out - // into transient/permanent cases based on whether they - // indicate we should retry. - let disk_errors = - response.disks.into_iter().filter_map(|status| { - status.err.map(|err| (status.identity, err)) - }); - let mut transient_errors = Vec::new(); - let mut permanent_errors = Vec::new(); - for (identity, error) in disk_errors { - if error.retryable() { - transient_errors.push(format!( - "Retryable error initializing disk \ - {} / {} / {}: {}", - identity.vendor, - identity.model, - identity.serial, - InlineErrorChain::new(&error) - )); - } else { - permanent_errors.push(format!( - "Non-retryable error initializing disk \ - {} / {} / {}: {}", - identity.vendor, - identity.model, - identity.serial, - InlineErrorChain::new(&error) - )); - } - } - if !permanent_errors.is_empty() { - return Err(BackoffError::permanent( - SetupServiceError::DiskInitializationPermanent { - permanent_errors, - }, - )); - } - if !transient_errors.is_empty() { - return Err(BackoffError::transient( - SetupServiceError::DiskInitializationTransient { - transient_errors, - }, - )); - } - - // No individual disk errors reported; all disks were - // initialized. Check for any dataset errors; these are not - // retryable. - let dataset_errors = response - .datasets - .into_iter() - .filter_map(|status| { - status.err.map(|err| { - format!( - "Error initializing dataset {}: {err}", - status.dataset_name.full_name() - ) - }) - }) - .collect::>(); - if !dataset_errors.is_empty() { - return Err(BackoffError::permanent( - SetupServiceError::DatasetInitialization { - errors: dataset_errors, - }, - )); - } - - // No individual dataset errors reported. We don't get - // status for individual zones (any failure there results in - // an HTTP-level error), so everything is good. - return Ok(()); - } - Err(error) => error, + let Err(error) = client.omicron_config_put(&sled_config).await + else { + return Ok(()); }; if let sled_agent_client::Error::ErrorResponse(response) = &error { @@ -502,6 +423,16 @@ impl ServiceInner { Ok(()) } + // Wait until the config reconciler on the target sled has successfully + // reconciled the config at `generation`. + async fn wait_for_config_reconciliation_on_sled( + &self, + _sled_address: SocketAddrV6, + _generation: Generation, + ) -> Result<(), SetupServiceError> { + unimplemented!("needs updated inventory") + } + // Ensure that the desired sled configuration for a particular zone version // is deployed. // @@ -530,20 +461,26 @@ impl ServiceInner { })? .clone(); + // We bump the zone generation as we step through phases of + // RSS; use that as the overall sled config generation. + let generation = zones_config.generation; let sled_config = OmicronSledConfig { - // We bump the zone generation as we step through phases of - // RSS; use that as the overall sled config generation. - generation: zones_config.generation, + generation, disks: config .disks .iter() .map(|c| c.clone().into()) .collect(), datasets: config.datasets.values().cloned().collect(), - zones: zones_config.zones.iter().cloned().collect(), + zones: zones_config.zones.into_iter().collect(), }; self.set_config_on_sled(*sled_address, sled_config).await?; + self.wait_for_config_reconciliation_on_sled( + *sled_address, + generation, + ) + .await?; Ok::<(), SetupServiceError>(()) }), @@ -1120,7 +1057,7 @@ impl ServiceInner { async fn run( &self, config: &Config, - storage_manager: &StorageHandle, + internal_disks_rx: &InternalDisksReceiver, local_bootstrap_agent: BootstrapAgentHandle, bootstore: bootstore::NodeHandle, step_tx: watch::Sender, @@ -1133,19 +1070,18 @@ impl ServiceInner { config.az_subnet(), )?; - let started_marker_paths: Vec = storage_manager - .get_latest_disks() - .await - .all_m2_mountpoints(CONFIG_DATASET) - .into_iter() + let config_dataset_paths = internal_disks_rx + .current() + .all_config_datasets() + .collect::>(); + + let started_marker_paths: Vec = config_dataset_paths + .iter() .map(|p| p.join(RSS_STARTED_FILENAME)) .collect(); - let completed_marker_paths: Vec = storage_manager - .get_latest_disks() - .await - .all_m2_mountpoints(CONFIG_DATASET) - .into_iter() + let completed_marker_paths: Vec = config_dataset_paths + .iter() .map(|p| p.join(RSS_COMPLETED_FILENAME)) .collect(); diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs index f60f367d280..15a5c2f784d 100644 --- a/sled-agent/src/server.rs +++ b/sled-agent/src/server.rs @@ -12,6 +12,7 @@ use crate::nexus::make_nexus_client; use crate::services::ServiceManager; use internal_dns_resolver::Resolver; use omicron_uuid_kinds::SledUuid; +use sled_agent_config_reconciler::ConfigReconcilerSpawnToken; use sled_agent_types::sled::StartSledAgentRequest; use slog::Logger; use std::net::SocketAddr; @@ -39,6 +40,7 @@ impl Server { log: Logger, request: StartSledAgentRequest, long_running_tasks_handles: LongRunningTaskHandles, + config_reconciler_spawn_token: ConfigReconcilerSpawnToken, services: ServiceManager, ) -> Result { info!(log, "setting up sled agent server"); @@ -61,6 +63,7 @@ impl Server { request, services, long_running_tasks_handles, + config_reconciler_spawn_token, ) .await .map_err(|e| e.to_string())?; diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index a1f93043b45..802ffe2c182 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -20,12 +20,11 @@ //! of what other services Nexus wants to have executing on the sled. //! //! To accomplish this, the following interfaces are exposed: -//! - [ServiceManager::ensure_all_omicron_zones_persistent] exposes an API to -//! request a set of Omicron zones that should persist beyond reboot. +//! - [ServiceManager::start_omicron_zone] exposes an API to start a new Omicron +//! zone. //! - [ServiceManager::activate_switch] exposes an API to specifically enable //! or disable (via [ServiceManager::deactivate_switch]) the switch zone. -use crate::artifact_store::ArtifactStore; use crate::bootstrap::BootstrapNetworking; use crate::bootstrap::early_networking::{ EarlyNetworkSetup, EarlyNetworkSetupError, @@ -35,7 +34,6 @@ use crate::ddm_reconciler::DdmReconciler; use crate::metrics::MetricsRequestQueue; use crate::params::{DendriteAsic, OmicronZoneConfigExt, OmicronZoneTypeExt}; use crate::profile::*; -use crate::zone_bundle::ZoneBundler; use anyhow::anyhow; use camino::{Utf8Path, Utf8PathBuf}; use clickhouse_admin_types::CLICKHOUSE_KEEPER_CONFIG_DIR; @@ -94,42 +92,32 @@ use omicron_common::backoff::{ BackoffError, retry_notify, retry_policy_internal_service_aggressive, }; use omicron_common::disk::{DatasetKind, DatasetName}; -use omicron_common::ledger::{self, Ledger, Ledgerable}; +use omicron_common::ledger::Ledgerable; use omicron_ddm_admin_client::DdmError; use omicron_uuid_kinds::OmicronZoneUuid; use rand::prelude::SliceRandom; -use sled_agent_types::{ - sled::SWITCH_ZONE_BASEBOARD_FILE, time_sync::TimeSync, - zone_bundle::ZoneBundleCause, -}; +use sled_agent_config_reconciler::InternalDisksReceiver; +use sled_agent_types::sled::SWITCH_ZONE_BASEBOARD_FILE; use sled_hardware::SledMode; use sled_hardware::is_gimlet; use sled_hardware::underlay; use sled_hardware_types::Baseboard; use sled_storage::config::MountConfig; -use sled_storage::dataset::{ - CONFIG_DATASET, INSTALL_DATASET, M2_ARTIFACT_DATASET, ZONE_DATASET, -}; -use sled_storage::manager::StorageHandle; +use sled_storage::dataset::{INSTALL_DATASET, ZONE_DATASET}; use slog::Logger; use slog_error_chain::InlineErrorChain; -use std::collections::BTreeMap; -use std::collections::HashSet; use std::net::{IpAddr, Ipv6Addr, SocketAddr}; -use std::str::FromStr; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::{Arc, OnceLock}; use tokio::io::AsyncWriteExt; use tokio::sync::Mutex; -use tokio::sync::{MutexGuard, oneshot}; +use tokio::sync::oneshot; use tokio::task::JoinHandle; use tufaceous_artifact::ArtifactHash; use uuid::Uuid; use illumos_utils::zone::Zones; -const IPV6_UNSPECIFIED: IpAddr = IpAddr::V6(Ipv6Addr::UNSPECIFIED); - // These are all the same binary. They just reside at different paths. const CLICKHOUSE_SERVER_BINARY: &str = "/opt/oxide/clickhouse_server/clickhouse"; @@ -158,9 +146,6 @@ pub enum Error { #[error("Failed to find device {device}")] MissingDevice { device: String }, - #[error("Failed to access ledger: {0}")] - Ledger(#[from] ledger::Error), - #[error("Sled Agent not initialized yet")] SledAgentNotReady, @@ -388,14 +373,6 @@ impl From for omicron_common::api::external::Error { } } -/// Result of [ServiceManager::load_services] -pub enum LoadServicesResult { - /// We didn't load anything, there wasn't anything to load - NoServicesToLoad, - /// We successfully loaded the zones from our ledger. - ServicesLoaded, -} - fn display_zone_init_errors(errors: &[(String, Box)]) -> String { if errors.len() == 1 { return format!( @@ -514,9 +491,6 @@ impl RealSystemApi { impl SystemApi for RealSystemApi {} -// The filename of the ledger, within the provided directory. -const ZONES_LEDGER_FILENAME: &str = "omicron-zones.json"; - /// Combines the Nexus-provided `OmicronZonesConfig` (which describes what Nexus /// wants for all of its zones) with the locally-determined configuration for /// these zones. @@ -730,57 +704,22 @@ enum SwitchZoneState { }, } -// The return type for `start_omicron_zones`. -// -// When multiple zones are started concurrently, some can fail while others -// succeed. This structure allows the function to return this nuanced -// information. -#[must_use] -struct StartZonesResult { - // The set of zones which have successfully started. - new_zones: Vec, - - // The set of (zone name, error) of zones that failed to start. - errors: Vec<(String, Error)>, -} - -// A running zone and the configuration which started it. -#[derive(Debug)] -struct OmicronZone { - runtime: RunningZone, - config: OmicronZoneConfigLocal, -} - -impl OmicronZone { - fn name(&self) -> &str { - self.runtime.name() - } -} - -type ZoneMap = BTreeMap; - /// Manages miscellaneous Sled-local services. pub struct ServiceManagerInner { log: Logger, global_zone_bootstrap_link_local_address: Ipv6Addr, switch_zone: Mutex, sled_mode: SledMode, - time_sync_config: TimeSyncConfig, time_synced: AtomicBool, switch_zone_maghemite_links: Vec, sidecar_revision: SidecarRevision, - // Zones representing running services - zones: Mutex, underlay_vnic_allocator: VnicAllocator, underlay_vnic: EtherstubVnic, bootstrap_vnic_allocator: VnicAllocator, ddm_reconciler: DdmReconciler, sled_info: OnceLock, switch_zone_bootstrap_address: Ipv6Addr, - storage: StorageHandle, - zone_bundler: ZoneBundler, - ledger_directory_override: OnceLock, - image_directory_override: OnceLock, + internal_disks_rx: InternalDisksReceiver, system_api: Box, } @@ -796,16 +735,6 @@ struct SledAgentInfo { metrics_queue: MetricsRequestQueue, } -pub(crate) enum TimeSyncConfig { - // Waits for NTP to confirm that time has been synchronized. - Normal, - // Skips timesync unconditionally. - Skip, - // Fails timesync unconditionally. - #[cfg(all(test, target_os = "illumos"))] - Fail, -} - #[derive(Clone)] pub struct ServiceManager { inner: Arc, @@ -910,37 +839,31 @@ impl ServiceManager { /// /// Args: /// - `log`: The logger - /// - `ddm_client`: Client pointed to our localhost ddmd + /// - `ddm_reconciler`: Handle for configuring our localhost ddmd /// - `bootstrap_networking`: Collection of etherstubs/VNICs set up when /// bootstrap agent begins /// - `sled_mode`: The sled's mode of operation (Gimlet vs Scrimlet). - /// - `time_sync_config`: Describes how the sled awaits synced time. /// - `sidecar_revision`: Rev of attached sidecar, if present. /// - `switch_zone_maghemite_links`: List of physical links on which /// maghemite should listen. - /// - `storage`: Shared handle to get the current state of disks/zpools. - #[allow(clippy::too_many_arguments)] + /// - `internal_disks_rx`: watch channel for changes to internal disks pub(crate) fn new( log: &Logger, ddm_reconciler: DdmReconciler, bootstrap_networking: BootstrapNetworking, sled_mode: SledMode, - time_sync_config: TimeSyncConfig, sidecar_revision: SidecarRevision, switch_zone_maghemite_links: Vec, - storage: StorageHandle, - zone_bundler: ZoneBundler, + internal_disks_rx: InternalDisksReceiver, ) -> Self { Self::new_inner( log, ddm_reconciler, bootstrap_networking, sled_mode, - time_sync_config, sidecar_revision, switch_zone_maghemite_links, - storage, - zone_bundler, + internal_disks_rx, RealSystemApi::new(), ) } @@ -951,11 +874,9 @@ impl ServiceManager { ddm_reconciler: DdmReconciler, bootstrap_networking: BootstrapNetworking, sled_mode: SledMode, - time_sync_config: TimeSyncConfig, sidecar_revision: SidecarRevision, switch_zone_maghemite_links: Vec, - storage: StorageHandle, - zone_bundler: ZoneBundler, + internal_disks_rx: InternalDisksReceiver, system_api: Box, ) -> Self { let log = log.new(o!("component" => "ServiceManager")); @@ -969,11 +890,9 @@ impl ServiceManager { // Load the switch zone if it already exists? switch_zone: Mutex::new(SwitchZoneState::Disabled), sled_mode, - time_sync_config, time_synced: AtomicBool::new(false), sidecar_revision, switch_zone_maghemite_links, - zones: Mutex::new(BTreeMap::new()), underlay_vnic_allocator: VnicAllocator::new( "Service", bootstrap_networking.underlay_etherstub, @@ -989,25 +908,12 @@ impl ServiceManager { sled_info: OnceLock::new(), switch_zone_bootstrap_address: bootstrap_networking .switch_zone_bootstrap_ip, - storage, - zone_bundler, - ledger_directory_override: OnceLock::new(), - image_directory_override: OnceLock::new(), + internal_disks_rx, system_api, }), } } - #[cfg(all(test, target_os = "illumos"))] - fn override_ledger_directory(&self, path: Utf8PathBuf) { - self.inner.ledger_directory_override.set(path).unwrap(); - } - - #[cfg(all(test, target_os = "illumos"))] - fn override_image_directory(&self, path: Utf8PathBuf) { - self.inner.image_directory_override.set(path).unwrap(); - } - pub(crate) fn ddm_reconciler(&self) -> &DdmReconciler { &self.inner.ddm_reconciler } @@ -1016,126 +922,6 @@ impl ServiceManager { self.inner.switch_zone_bootstrap_address } - // TODO: This function refers to an old, deprecated format for storing - // service information. It is not deprecated for cleanup purposes, but - // should otherwise not be called in new code. - async fn all_service_ledgers(&self) -> Vec { - pub const SERVICES_LEDGER_FILENAME: &str = "services.json"; - if let Some(dir) = self.inner.ledger_directory_override.get() { - return vec![dir.join(SERVICES_LEDGER_FILENAME)]; - } - let resources = self.inner.storage.get_latest_disks().await; - resources - .all_m2_mountpoints(CONFIG_DATASET) - .into_iter() - .map(|p| p.join(SERVICES_LEDGER_FILENAME)) - .collect() - } - - async fn all_omicron_zone_ledgers(&self) -> Vec { - if let Some(dir) = self.inner.ledger_directory_override.get() { - return vec![dir.join(ZONES_LEDGER_FILENAME)]; - } - let resources = self.inner.storage.get_latest_disks().await; - resources - .all_m2_mountpoints(CONFIG_DATASET) - .into_iter() - .map(|p| p.join(ZONES_LEDGER_FILENAME)) - .collect() - } - - // Loads persistent configuration about any Omicron-managed zones that we're - // supposed to be running. - async fn load_ledgered_zones( - &self, - // This argument attempts to ensure that the caller holds the right - // lock. - _map: &MutexGuard<'_, ZoneMap>, - ) -> Result>, Error> { - let log = &self.inner.log; - - // NOTE: This is a function where we used to access zones by "service - // ledgers". This format has since been deprecated, and these files, - // if they exist, should not be used. - // - // We try to clean them up at this spot. Deleting this "removal" code - // in the future should be a safe operation; this is a non-load-bearing - // cleanup. - for path in self.all_service_ledgers().await { - match tokio::fs::remove_file(&path).await { - Ok(_) => (), - Err(ref e) if e.kind() == std::io::ErrorKind::NotFound => (), - Err(e) => { - warn!( - log, - "Failed to delete old service ledger"; - "err" => ?e, - "path" => ?path, - ); - } - } - } - - // Try to load the current software's zone ledger - let ledger_paths = self.all_omicron_zone_ledgers().await; - info!(log, "Loading Omicron zones from: {ledger_paths:?}"); - let maybe_ledger = - Ledger::::new(log, ledger_paths.clone()) - .await; - - let Some(ledger) = maybe_ledger else { - info!(log, "Loading Omicron zones - No zones detected"); - return Ok(None); - }; - - info!( - log, - "Loaded Omicron zones"; - "zones_config" => ?ledger.data() - ); - Ok(Some(ledger)) - } - - // TODO(https://github.com/oxidecomputer/omicron/issues/2973): - // - // The sled agent retries this function indefinitely at the call-site, but - // we could be smarter. - // - // - If we know that disks are missing, we could wait for them - // - We could permanently fail if we are able to distinguish other errors - // more clearly. - pub async fn load_services(&self) -> Result { - let log = &self.inner.log; - let mut existing_zones = self.inner.zones.lock().await; - let Some(mut ledger) = - self.load_ledgered_zones(&existing_zones).await? - else { - // Nothing found -- nothing to do. - info!( - log, - "Loading Omicron zones - \ - no zones nor old-format services found" - ); - return Ok(LoadServicesResult::NoServicesToLoad); - }; - - let zones_config = ledger.data_mut(); - info!( - log, - "Loaded Omicron zones"; - "zones_config" => ?zones_config - ); - let omicron_zones_config = - zones_config.clone().to_omicron_zones_config(); - - self.ensure_all_omicron_zones( - &mut existing_zones, - omicron_zones_config, - ) - .await?; - Ok(LoadServicesResult::ServicesLoaded) - } - /// Sets up "Sled Agent" information, including underlay info. /// /// Any subsequent calls after the first invocation return an error. @@ -1729,22 +1515,18 @@ impl ServiceManager { OmicronZoneImageSource::InstallDataset => None, OmicronZoneImageSource::Artifact { hash } => Some(hash.to_string()), }; - let all_disks = self.inner.storage.get_latest_disks().await; + let internal_disks = self.inner.internal_disks_rx.current(); let zone_image_paths = match image_source { OmicronZoneImageSource::InstallDataset => { // Look for the image in the ramdisk first let mut zone_image_paths = vec![Utf8PathBuf::from("/opt/oxide")]; - // Inject an image path if requested by a test. - if let Some(path) = self.inner.image_directory_override.get() { - zone_image_paths.push(path.clone()); - }; // If the boot disk exists, look for the image in the "install" // dataset there too. - if let Some((_, boot_zpool)) = all_disks.boot_disk() { + if let Some(boot_zpool) = internal_disks.boot_disk_zpool() { zone_image_paths.push(boot_zpool.dataset_mountpoint( - &all_disks.mount_config().root, + &internal_disks.mount_config().root, INSTALL_DATASET, )); } @@ -1752,25 +1534,7 @@ impl ServiceManager { zone_image_paths } OmicronZoneImageSource::Artifact { .. } => { - // Search both artifact datasets, but look on the boot disk first. - let boot_zpool = - all_disks.boot_disk().map(|(_, boot_zpool)| boot_zpool); - // This iterator starts with the zpool for the boot disk (if it - // exists), and then is followed by all other zpools. - let zpool_iter = boot_zpool.clone().into_iter().chain( - all_disks - .all_m2_zpools() - .into_iter() - .filter(|zpool| Some(zpool) != boot_zpool.as_ref()), - ); - zpool_iter - .map(|zpool| { - zpool.dataset_mountpoint( - &all_disks.mount_config().root, - M2_ARTIFACT_DATASET, - ) - }) - .collect() + internal_disks.all_artifact_datasets().collect() } }; @@ -3520,13 +3284,13 @@ impl ServiceManager { // - `all_u2_pools` provides a snapshot into durable storage on this sled, // which gives the storage manager an opportunity to validate the zone's // storage configuration against the reality of the current sled. - async fn start_omicron_zone( + pub(crate) async fn start_omicron_zone( &self, mount_config: &MountConfig, zone: &OmicronZoneConfig, time_is_synchronized: bool, - all_u2_pools: &Vec, - ) -> Result { + all_u2_pools: &[ZpoolName], + ) -> Result { // Ensure the zone has been fully removed before we try to boot it. // // This ensures that old "partially booted/stopped" zones do not @@ -3563,343 +3327,7 @@ impl ServiceManager { ) .await?; - Ok(OmicronZone { runtime, config }) - } - - // Concurrently attempts to start all zones identified by requests. - // - // This method is NOT idempotent. - // - // If we try to start ANY zones concurrently, the result is contained - // in the `StartZonesResult` value. This will contain the set of zones which - // were initialized successfully, as well as the set of zones which failed - // to start. - async fn start_omicron_zones( - &self, - mount_config: &MountConfig, - requests: impl Iterator + Clone, - time_is_synchronized: bool, - all_u2_pools: &Vec, - ) -> Result { - if let Some(name) = - requests.clone().map(|zone| zone.zone_name()).duplicates().next() - { - return Err(Error::BadServiceRequest { - service: name, - message: "Should not initialize zone twice".to_string(), - }); - } - - let futures = requests.map(|zone| async move { - self.start_omicron_zone( - mount_config, - &zone, - time_is_synchronized, - all_u2_pools, - ) - .await - .map_err(|err| (zone.zone_name(), err)) - }); - - let results = futures::future::join_all(futures).await; - - let mut new_zones = Vec::new(); - let mut errors = Vec::new(); - for result in results { - match result { - Ok(zone) => { - info!(self.inner.log, "Zone started"; "zone" => zone.name()); - new_zones.push(zone); - } - Err((name, error)) => { - warn!(self.inner.log, "Zone failed to start"; "zone" => &name); - errors.push((name, error)) - } - } - } - Ok(StartZonesResult { new_zones, errors }) - } - - /// Returns the current Omicron zone configuration - pub async fn omicron_zones_list(&self) -> OmicronZonesConfig { - let log = &self.inner.log; - - // We need to take the lock in order for the information in the ledger - // to be up-to-date. - let _existing_zones = self.inner.zones.lock().await; - - // Read the existing set of services from the ledger. - let zone_ledger_paths = self.all_omicron_zone_ledgers().await; - let ledger_data = match Ledger::::new( - log, - zone_ledger_paths.clone(), - ) - .await - { - Some(ledger) => ledger.data().clone(), - None => OmicronZonesConfigLocal::initial(), - }; - - ledger_data.to_omicron_zones_config() - } - - /// Ensures that particular Omicron zones are running - /// - /// These services will be instantiated by this function, and will be - /// recorded to a local file to ensure they start automatically on next - /// boot. - pub async fn ensure_all_omicron_zones_persistent( - &self, - mut request: OmicronZonesConfig, - ) -> Result<(), Error> { - let log = &self.inner.log; - - let mut existing_zones = self.inner.zones.lock().await; - - // Ensure that any zone images from the artifact store are present. - for zone in &request.zones { - if let Some(hash) = zone.image_source.artifact_hash() { - if let Err(err) = ArtifactStore::get_from_storage( - &self.inner.storage, - &self.inner.log, - hash, - ) - .await - { - return Err(Error::ZoneArtifactNotFound { - hash, - zone_kind: zone.zone_type.kind().report_str(), - id: zone.id, - err, - }); - } - } - } - - // Read the existing set of services from the ledger. - let zone_ledger_paths = self.all_omicron_zone_ledgers().await; - let mut ledger = match Ledger::::new( - log, - zone_ledger_paths.clone(), - ) - .await - { - Some(ledger) => ledger, - None => Ledger::::new_with( - log, - zone_ledger_paths.clone(), - OmicronZonesConfigLocal::initial(), - ), - }; - - let ledger_zone_config = ledger.data_mut(); - debug!(log, "ensure_all_omicron_zones_persistent"; - "request_generation" => request.generation.to_string(), - "ledger_generation" => - ledger_zone_config.omicron_generation.to_string(), - ); - - // Absolutely refuse to downgrade the configuration. - if ledger_zone_config.omicron_generation > request.generation { - return Err(Error::RequestedZoneConfigOutdated { - requested: request.generation, - current: ledger_zone_config.omicron_generation, - }); - } - - // If the generation is the same as what we're running, but the contents - // aren't, that's a problem, too. - if ledger_zone_config.omicron_generation == request.generation { - // Nexus should send us consistent zone orderings; however, we may - // reorder the zone list inside `ensure_all_omicron_zones`. To avoid - // equality checks failing only because the two lists are ordered - // differently, sort them both here before comparing. - let mut ledger_zones = - ledger_zone_config.clone().to_omicron_zones_config().zones; - - // We sort by ID because we assume no two zones have the same ID. If - // that assumption is wrong, we may return an error here where the - // conflict is soley the list orders, but in such a case that's the - // least of our problems. - ledger_zones.sort_by_key(|z| z.id); - request.zones.sort_by_key(|z| z.id); - - if ledger_zones != request.zones { - return Err(Error::RequestedConfigConflicts( - request.generation, - )); - } - } - - let omicron_generation = request.generation; - let ledger_generation = ledger_zone_config.ledger_generation; - self.ensure_all_omicron_zones(&mut existing_zones, request).await?; - let zones = existing_zones - .values() - .map(|omicron_zone| omicron_zone.config.clone()) - .collect(); - - let new_config = OmicronZonesConfigLocal { - omicron_generation, - ledger_generation, - zones, - }; - - // If the contents of the ledger would be identical, we can avoid - // performing an update and commit. - if *ledger_zone_config == new_config { - return Ok(()); - } - - // Update the zones in the ledger and write it back to both M.2s - *ledger_zone_config = new_config; - ledger.commit().await?; - - Ok(()) - } - - // Ensures that only the following Omicron zones are running. - // - // This method strives to be idempotent. - // - // - Starting and stopping zones is not an atomic operation - it's possible - // that we cannot start a zone after a previous one has been successfully - // created (or destroyed) intentionally. As a result, even in error cases, - // it's possible that the set of `existing_zones` changes. However, this set - // will only change in the direction of `new_request`: zones will only be - // removed if they ARE NOT part of `new_request`, and zones will only be - // added if they ARE part of `new_request`. - // - Zones are generally not updated in-place (i.e., two zone configurations - // that differ in any way are treated as entirely distinct), with an - // exception for backfilling the `filesystem_pool`, as long as the new - // request's filesystem pool matches the actual pool for that zones. This - // in-place update is allowed because changing only that property to match - // the runtime system does not require reconfiguring the zone or shutting it - // down and restarting it. - // - This method does not record any information such that these services - // are re-instantiated on boot. - async fn ensure_all_omicron_zones( - &self, - // The MutexGuard here attempts to ensure that the caller has the right - // lock held when calling this function. - existing_zones: &mut MutexGuard<'_, ZoneMap>, - new_request: OmicronZonesConfig, - ) -> Result<(), Error> { - // Do some data-normalization to ensure we can compare the "requested - // set" vs the "existing set" as HashSets. - let ReconciledNewZonesRequest { - zones_to_be_removed, - zones_to_be_added, - } = reconcile_running_zones_with_new_request( - existing_zones, - new_request, - &self.inner.log, - )?; - - // Destroy zones that should not be running - for zone in zones_to_be_removed { - self.zone_bundle_and_try_remove(existing_zones, &zone).await; - } - - // Collect information that's necessary to start new zones - let storage = self.inner.storage.get_latest_disks().await; - let mount_config = storage.mount_config(); - let all_u2_pools = storage.all_u2_zpools(); - let time_is_synchronized = - match self.timesync_get_locked(&existing_zones).await { - // Time is synchronized - Ok(TimeSync { sync: true, .. }) => true, - // Time is not synchronized, or we can't check - _ => false, - }; - - // Concurrently boot all new zones - let StartZonesResult { new_zones, errors } = self - .start_omicron_zones( - mount_config, - zones_to_be_added.iter(), - time_is_synchronized, - &all_u2_pools, - ) - .await?; - - // Add the new zones to our tracked zone set - existing_zones.extend( - new_zones.into_iter().map(|zone| (zone.name().to_string(), zone)), - ); - - // If any zones failed to start, exit with an error - if !errors.is_empty() { - return Err(Error::ZoneEnsure { errors }); - } - Ok(()) - } - - // Attempts to take a zone bundle and remove a zone. - // - // Logs, but does not return an error on failure. - async fn zone_bundle_and_try_remove( - &self, - existing_zones: &mut MutexGuard<'_, ZoneMap>, - zone: &OmicronZoneConfig, - ) { - let log = &self.inner.log; - let expected_zone_name = zone.zone_name(); - let Some(mut zone) = existing_zones.remove(&expected_zone_name) else { - warn!( - log, - "Expected to remove zone, but could not find it"; - "zone_name" => &expected_zone_name, - ); - return; - }; - // Ensure that the sled agent's metrics task is not tracking the zone's - // VNICs or OPTE ports. - if let Some(queue) = self.maybe_metrics_queue() { - match queue.untrack_zone_links(&zone.runtime) { - Ok(_) => debug!( - log, - "stopped tracking zone datalinks"; - "zone_name" => &expected_zone_name, - ), - Err(errors) => error!( - log, - "failed to stop tracking zone datalinks"; - "errors" => ?errors, - "zone_name" => &expected_zone_name - ), - } - } - debug!( - log, - "removing an existing zone"; - "zone_name" => &expected_zone_name, - ); - if let Err(e) = self - .inner - .zone_bundler - .create(&zone.runtime, ZoneBundleCause::UnexpectedZone) - .await - { - error!( - log, - "Failed to take bundle of unexpected zone"; - "zone_name" => &expected_zone_name, - InlineErrorChain::new(&e), - ); - } - if let Err(e) = zone.runtime.stop().await { - error!(log, "Failed to stop zone {}: {e}", zone.name()); - } - if let Err(e) = - self.clean_up_after_zone_shutdown(&zone.config.zone).await - { - error!( - log, - "Failed to clean up after stopping zone {}", zone.name(); - InlineErrorChain::new(&e), - ); - } + Ok(runtime) } // Ensures that if a zone is about to be installed, it does not exist. @@ -3997,7 +3425,7 @@ impl ServiceManager { &self, mount_config: &MountConfig, zone: &OmicronZoneConfig, - all_u2_pools: &Vec, + all_u2_pools: &[ZpoolName], ) -> Result { let name = zone.zone_name(); @@ -4096,110 +3524,12 @@ impl ServiceManager { } } - pub async fn timesync_get(&self) -> Result { - let existing_zones = self.inner.zones.lock().await; - self.timesync_get_locked(&existing_zones).await - } - - async fn timesync_get_locked( - &self, - existing_zones: &tokio::sync::MutexGuard<'_, ZoneMap>, - ) -> Result { - let skip_timesync = match &self.inner.time_sync_config { - TimeSyncConfig::Normal => false, - TimeSyncConfig::Skip => true, - #[cfg(all(test, target_os = "illumos"))] - TimeSyncConfig::Fail => { - info!(self.inner.log, "Configured to fail timesync checks"); - return Err(Error::TimeNotSynchronized); - } - }; - - if skip_timesync { - info!(self.inner.log, "Configured to skip timesync checks"); - self.on_time_sync().await; - return Ok(TimeSync { - sync: true, - ref_id: 0, - ip_addr: IPV6_UNSPECIFIED, - stratum: 0, - ref_time: 0.0, - correction: 0.00, - }); - }; - - let ntp_zone_name = - InstalledZone::get_zone_name(ZoneKind::NTP_PREFIX, None); - - let ntp_zone = existing_zones - .iter() - .find(|(name, _)| name.starts_with(&ntp_zone_name)) - .ok_or_else(|| Error::NtpZoneNotReady)? - .1; - - // XXXNTP - This could be replaced with a direct connection to the - // daemon using a patched version of the chrony_candm crate to allow - // a custom server socket path. From the GZ, it should be possible to - // connect to the UNIX socket at - // format!("{}/var/run/chrony/chronyd.sock", ntp_zone.root()) - - match ntp_zone.runtime.run_cmd(&["/usr/bin/chronyc", "-c", "tracking"]) - { - Ok(stdout) => { - let v: Vec<&str> = stdout.split(',').collect(); - - if v.len() > 9 { - let ref_id = u32::from_str_radix(v[0], 16) - .map_err(|_| Error::NtpZoneNotReady)?; - let ip_addr = - IpAddr::from_str(v[1]).unwrap_or(IPV6_UNSPECIFIED); - let stratum = u8::from_str(v[2]) - .map_err(|_| Error::NtpZoneNotReady)?; - let ref_time = f64::from_str(v[3]) - .map_err(|_| Error::NtpZoneNotReady)?; - let correction = f64::from_str(v[4]) - .map_err(|_| Error::NtpZoneNotReady)?; - - // Per `chronyc waitsync`'s implementation, if either the - // reference IP address is not unspecified or the reference - // ID is not 0 or 0x7f7f0101, we are synchronized to a peer. - let peer_sync = !ip_addr.is_unspecified() - || (ref_id != 0 && ref_id != 0x7f7f0101); - - let sync = stratum < 10 - && ref_time > 1234567890.0 - && peer_sync - && correction.abs() <= 0.05; - - if sync { - self.on_time_sync().await; - } - - Ok(TimeSync { - sync, - ref_id, - ip_addr, - stratum, - ref_time, - correction, - }) - } else { - Err(Error::NtpZoneNotReady) - } - } - Err(e) => { - error!(self.inner.log, "chronyc command failed: {}", e); - Err(Error::NtpZoneNotReady) - } - } - } - /// Check if the synchronization state of the sled has shifted to true and /// if so, execute the any out-of-band actions that need to be taken. /// /// This function only executes the out-of-band actions once, once the /// synchronization state has shifted to true. - async fn on_time_sync(&self) { + pub(crate) async fn on_time_sync(&self) { if self .inner .time_synced @@ -5014,958 +4344,9 @@ fn internal_dns_addrobj_name(gz_address_index: u32) -> String { format!("internaldns{gz_address_index}") } -#[derive(Debug)] -struct ReconciledNewZonesRequest { - zones_to_be_removed: HashSet, - zones_to_be_added: HashSet, -} - -fn reconcile_running_zones_with_new_request( - existing_zones: &mut MutexGuard<'_, ZoneMap>, - new_request: OmicronZonesConfig, - log: &Logger, -) -> Result { - reconcile_running_zones_with_new_request_impl( - existing_zones - .values_mut() - .map(|z| (&mut z.config.zone, z.runtime.root_zpool())), - new_request, - log, - ) -} - -// Separate helper function for `reconcile_running_zones_with_new_request` that -// allows unit tests to exercise the implementation without having to construct -// a `&mut MutexGuard<'_, ZoneMap>` for `existing_zones`. -fn reconcile_running_zones_with_new_request_impl<'a>( - existing_zones_with_runtime_zpool: impl Iterator< - Item = (&'a mut OmicronZoneConfig, &'a ZpoolOrRamdisk), - >, - new_request: OmicronZonesConfig, - log: &Logger, -) -> Result { - let mut existing_zones_by_id: BTreeMap<_, _> = - existing_zones_with_runtime_zpool - .map(|(zone, zpool)| (zone.id, (zone, zpool))) - .collect(); - let mut zones_to_be_added = HashSet::new(); - let mut zones_to_be_removed = HashSet::new(); - let mut zones_to_update = Vec::new(); - - for zone in new_request.zones.into_iter() { - let Some((existing_zone, runtime_zpool)) = - existing_zones_by_id.remove(&zone.id) - else { - // This zone isn't in the existing set; add it. - zones_to_be_added.insert(zone); - continue; - }; - - // We're already running this zone. If the config hasn't changed, we - // have nothing to do. - if zone == *existing_zone { - continue; - } - - // Special case for fixing #7229. We have an incoming request for a zone - // that we're already running except the config has changed; normally, - // we'd shut the zone down and restart it. However, if we get a new - // request that is: - // - // 1. setting `filesystem_pool`, and - // 2. the config for this zone is otherwise identical, and - // 3. the new `filesystem_pool` matches the pool on which the zone is - // installed - // - // then we don't want to shut the zone down and restart it, because the - // config hasn't actually changed in any meaningful way; this is just - // reconfigurator correcting #7229. - if let Some(new_filesystem_pool) = &zone.filesystem_pool { - let differs_only_by_filesystem_pool = { - // Clone `existing_zone` and mutate its `filesystem_pool` to - // match the new request; if they now match, that's the only - // field that's different. - let mut existing = existing_zone.clone(); - existing.filesystem_pool = Some(new_filesystem_pool.clone()); - existing == zone - }; - - let runtime_zpool = match runtime_zpool { - ZpoolOrRamdisk::Zpool(zpool_name) => zpool_name, - ZpoolOrRamdisk::Ramdisk => { - // The only zone we run on the ramdisk is the switch - // zone, for which it isn't possible to get a zone - // request, so it should be fine to put an - // `unreachable!()` here. Out of caution for future - // changes, we'll instead return an error that the - // requested zone is on the ramdisk. - error!( - log, - "fix-7229: unexpectedly received request with a \ - zone config for a zone running on ramdisk"; - "new_config" => ?zone, - "existing_config" => ?existing_zone, - ); - return Err(Error::ZoneIsRunningOnRamdisk { - zone_id: zone.id, - }); - } - }; - - if differs_only_by_filesystem_pool { - if new_filesystem_pool == runtime_zpool { - // Our #7229 special case: the new config is only filling in - // the pool, and it does so correctly. Move on to the next - // zone in the request without adding this zone to either of - // our `zone_to_be_*` sets. - info!( - log, - "fix-7229: accepted new zone config that changes only \ - filesystem_pool"; - "new_config" => ?zone, - ); - - // We should update this `existing_zone`, but delay doing so - // until we've processed all zones (so if there are any - // failures later, we don't return having partially-updated - // the existing zones). - zones_to_update.push((existing_zone, zone)); - continue; - } else { - error!( - log, - "fix-7229: rejected new zone config that changes only \ - filesystem_pool (incorrect pool)"; - "new_config" => ?zone, - "expected_pool" => %runtime_zpool, - ); - return Err(Error::InvalidFilesystemPoolZoneConfig { - zone_id: zone.id, - expected_pool: runtime_zpool.clone(), - got_pool: new_filesystem_pool.clone(), - }); - } - } - } - - // End of #7229 special case: this zone is already running, but the new - // request has changed it in some way. We need to shut it down and - // restart it. - zones_to_be_removed.insert(existing_zone.clone()); - zones_to_be_added.insert(zone); - } - - // Any remaining entries in `existing_zones_by_id` should be shut down. - zones_to_be_removed - .extend(existing_zones_by_id.into_values().map(|(z, _)| z.clone())); - - // All zones have been handled successfully; commit any changes to existing - // zones we found in our "fix 7229" special case above. - let num_zones_updated = zones_to_update.len(); - for (existing_zone, new_zone) in zones_to_update { - *existing_zone = new_zone; - } - - info!( - log, - "ensure_all_omicron_zones: request reconciliation done"; - "num_zones_to_be_removed" => zones_to_be_removed.len(), - "num_zones_to_be_added" => zones_to_be_added.len(), - "num_zones_updated" => num_zones_updated, - ); - Ok(ReconciledNewZonesRequest { zones_to_be_removed, zones_to_be_added }) -} - -#[cfg(all(test, target_os = "illumos"))] -mod illumos_tests { - use crate::metrics; - - use super::*; - use illumos_utils::dladm::{ - BOOTSTRAP_ETHERSTUB_NAME, Etherstub, UNDERLAY_ETHERSTUB_NAME, - UNDERLAY_ETHERSTUB_VNIC_NAME, - }; - - use nexus_sled_agent_shared::inventory::OmicronZoneImageSource; - use omicron_uuid_kinds::OmicronZoneUuid; - use sled_storage::manager_test_harness::StorageManagerTestHarness; - use std::{ - net::{Ipv6Addr, SocketAddrV6}, - time::Duration, - }; - use tokio::sync::mpsc::error::TryRecvError; - use uuid::Uuid; - - // Just placeholders. Not used. - const GLOBAL_ZONE_BOOTSTRAP_IP: Ipv6Addr = Ipv6Addr::LOCALHOST; - const SWITCH_ZONE_BOOTSTRAP_IP: Ipv6Addr = Ipv6Addr::LOCALHOST; - - const EXPECTED_PORT: u16 = 12223; - - // Timeout within which we must have received a message about a zone's links - // to track. This is very generous. - const LINK_NOTIFICATION_TIMEOUT: Duration = Duration::from_secs(5); - - struct FakeSystemApi { - fake_install_dir: Utf8PathBuf, - dladm: Arc, - zones: Arc, - } - - impl FakeSystemApi { - fn new(fake_install_dir: Utf8PathBuf) -> Box { - Box::new(Self { - fake_install_dir, - dladm: illumos_utils::fakes::dladm::Dladm::new(), - zones: illumos_utils::fakes::zone::Zones::new(), - }) - } - } - - impl SystemApi for FakeSystemApi { - fn fake_install_dir(&self) -> Option<&Utf8Path> { - Some(&self.fake_install_dir) - } - - fn dladm(&self) -> Arc { - self.dladm.clone() - } - - fn zones(&self) -> Arc { - self.zones.clone() - } - } - - fn make_bootstrap_networking_config() -> BootstrapNetworking { - BootstrapNetworking { - bootstrap_etherstub: Etherstub( - BOOTSTRAP_ETHERSTUB_NAME.to_string(), - ), - global_zone_bootstrap_ip: GLOBAL_ZONE_BOOTSTRAP_IP, - global_zone_bootstrap_link_local_ip: GLOBAL_ZONE_BOOTSTRAP_IP, - switch_zone_bootstrap_ip: SWITCH_ZONE_BOOTSTRAP_IP, - underlay_etherstub: Etherstub(UNDERLAY_ETHERSTUB_NAME.to_string()), - underlay_etherstub_vnic: EtherstubVnic( - UNDERLAY_ETHERSTUB_VNIC_NAME.to_string(), - ), - } - } - - // Prepare to call "ensure" for a new service, then actually call "ensure". - async fn ensure_new_service( - mgr: &ServiceManager, - id: OmicronZoneUuid, - generation: Generation, - ) { - let address = - SocketAddrV6::new(Ipv6Addr::LOCALHOST, EXPECTED_PORT, 0, 0); - try_new_service_of_type( - mgr, - id, - generation, - OmicronZoneType::InternalNtp { address }, - ) - .await - .expect("Could not create service"); - } - - async fn try_new_service_of_type( - mgr: &ServiceManager, - id: OmicronZoneUuid, - generation: Generation, - zone_type: OmicronZoneType, - ) -> Result<(), Error> { - mgr.ensure_all_omicron_zones_persistent(OmicronZonesConfig { - generation, - zones: vec![OmicronZoneConfig { - id, - zone_type, - filesystem_pool: None, - image_source: OmicronZoneImageSource::InstallDataset, - }], - }) - .await - } - - // Prepare to call "ensure" for a service which already exists. We should - // return the service without actually installing a new zone. - async fn ensure_existing_service( - mgr: &ServiceManager, - id: OmicronZoneUuid, - generation: Generation, - ) { - let address = - SocketAddrV6::new(Ipv6Addr::LOCALHOST, EXPECTED_PORT, 0, 0); - mgr.ensure_all_omicron_zones_persistent(OmicronZonesConfig { - generation, - zones: vec![OmicronZoneConfig { - id, - zone_type: OmicronZoneType::InternalNtp { address }, - filesystem_pool: None, - image_source: OmicronZoneImageSource::InstallDataset, - }], - }) - .await - .unwrap(); - } - - // Prepare to drop the service manager. - // - // This will shut down all allocated zones, and delete their - // associated VNICs. - async fn drop_service_manager(mgr: ServiceManager) { - // Also send a message to the metrics task that the VNIC has been - // deleted. - let queue = mgr.metrics_queue(); - for zone in mgr.inner.zones.lock().await.values() { - if let Err(e) = queue.untrack_zone_links(&zone.runtime) { - error!( - mgr.inner.log, - "failed to stop tracking zone datalinks"; - "errors" => ?e, - ); - } - } - - // Explicitly drop the service manager - drop(mgr); - } - - struct TestConfig { - config_dir: camino_tempfile::Utf8TempDir, - } - - impl TestConfig { - async fn new() -> Self { - let config_dir = camino_tempfile::Utf8TempDir::new().unwrap(); - Self { config_dir } - } - - fn make_config(&self) -> Config { - Config { - sled_identifiers: SledIdentifiers { - rack_id: Uuid::new_v4(), - sled_id: Uuid::new_v4(), - model: "fake-gimlet".to_string(), - revision: 1, - serial: "fake-serial".to_string(), - }, - sidecar_revision: SidecarRevision::Physical( - "rev_whatever_its_a_test".to_string(), - ), - } - } - - fn override_paths(&self, mgr: &ServiceManager) { - let dir = self.config_dir.path(); - mgr.override_ledger_directory(dir.to_path_buf()); - mgr.override_image_directory(dir.to_path_buf()); - - // We test launching "fake" versions of the zones, but the - // logic to find paths relies on checking the existence of - // files. - std::fs::write(dir.join("oximeter.tar.gz"), "Not a real file") - .unwrap(); - std::fs::write(dir.join("ntp.tar.gz"), "Not a real file").unwrap(); - } - } - - async fn setup_storage(log: &Logger) -> StorageManagerTestHarness { - let mut harness = StorageManagerTestHarness::new(&log).await; - let raw_disks = - harness.add_vdevs(&["u2_test.vdev", "m2_test.vdev"]).await; - harness.handle().key_manager_ready().await; - let config = harness.make_config(1, &raw_disks); - let result = harness - .handle() - .omicron_physical_disks_ensure(config.clone()) - .await - .expect("Failed to ensure disks"); - assert!(!result.has_error(), "{:?}", result); - harness - } - - struct LedgerTestHelper<'a> { - log: slog::Logger, - storage_test_harness: StorageManagerTestHarness, - zone_bundler: ZoneBundler, - test_config: &'a TestConfig, - } - - impl<'a> LedgerTestHelper<'a> { - async fn new(log: slog::Logger, test_config: &'a TestConfig) -> Self { - let storage_test_harness = setup_storage(&log).await; - let zone_bundler = ZoneBundler::new( - log.clone(), - storage_test_harness.handle().clone(), - Default::default(), - ); - - LedgerTestHelper { - log, - storage_test_harness, - zone_bundler, - test_config, - } - } - - async fn cleanup(&mut self) { - self.storage_test_harness.cleanup().await; - } - - fn new_service_manager( - &self, - system: Box, - ) -> ServiceManager { - self.new_service_manager_with_timesync(TimeSyncConfig::Skip, system) - } - - fn new_service_manager_with_timesync( - &self, - time_sync_config: TimeSyncConfig, - system: Box, - ) -> ServiceManager { - let log = &self.log; - let reconciler = - DdmReconciler::new(Ipv6Subnet::new(Ipv6Addr::LOCALHOST), log) - .expect("created DdmReconciler"); - let mgr = ServiceManager::new_inner( - log, - reconciler, - make_bootstrap_networking_config(), - SledMode::Auto, - time_sync_config, - SidecarRevision::Physical("rev-test".to_string()), - vec![], - self.storage_test_harness.handle().clone(), - self.zone_bundler.clone(), - system, - ); - self.test_config.override_paths(&mgr); - mgr - } - - async fn sled_agent_started( - log: &slog::Logger, - test_config: &TestConfig, - mgr: &ServiceManager, - metrics_queue: MetricsRequestQueue, - ) { - let port_manager = PortManager::new( - log.new(o!("component" => "PortManager")), - Ipv6Addr::new( - 0xfd00, 0x1de, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, - ), - ); - - mgr.sled_agent_started( - test_config.make_config(), - port_manager, - Ipv6Addr::LOCALHOST, - Uuid::new_v4(), - None, - metrics_queue, - ) - .await - .unwrap(); - } - } - - #[tokio::test] - async fn test_ensure_service() { - let logctx = - omicron_test_utils::dev::test_setup_log("test_ensure_service"); - let test_config = TestConfig::new().await; - let mut helper = - LedgerTestHelper::new(logctx.log.clone(), &test_config).await; - let mgr = helper.new_service_manager(FakeSystemApi::new( - test_config.config_dir.path().to_path_buf(), - )); - let (metrics_queue, mut metrics_rx) = MetricsRequestQueue::for_test(); - LedgerTestHelper::sled_agent_started( - &logctx.log, - &test_config, - &mgr, - metrics_queue, - ) - .await; - - let v1 = Generation::new(); - let found = mgr.omicron_zones_list().await; - assert_eq!(found.generation, v1); - assert!(found.zones.is_empty()); - - let v2 = v1.next(); - let id = OmicronZoneUuid::new_v4(); - ensure_new_service(&mgr, id, v2).await; - - let found = mgr.omicron_zones_list().await; - assert_eq!(found.generation, v2); - assert_eq!(found.zones.len(), 1); - assert_eq!(found.zones[0].id, id); - - // First check that we received the synced sled notification - let synced_message = tokio::time::timeout( - LINK_NOTIFICATION_TIMEOUT, - metrics_rx.recv(), - ).await.expect("Should have received a message about the sled being synced within the timeout") - .expect("Should have received a message about the sled being synced"); - assert_eq!( - synced_message, - metrics::Message::TimeSynced { sled_id: mgr.sled_id() }, - ); - - // Then, check that we received a message about the zone's VNIC. - let vnic_message = tokio::time::timeout( - LINK_NOTIFICATION_TIMEOUT, - metrics_rx.recv(), - ) - .await - .expect( - "Should have received a message about the zone's VNIC within the timeout" - ) - .expect("Should have received a message about the zone's VNIC"); - let zone_name = format!("oxz_ntp_{}", id); - assert_eq!( - vnic_message, - metrics::Message::TrackVnic { - zone_name, - name: "oxControlService0".into() - }, - ); - assert_eq!(metrics_rx.try_recv(), Err(TryRecvError::Empty)); - - drop_service_manager(mgr).await; - - helper.cleanup().await; - logctx.cleanup_successful(); - } - - #[tokio::test] - async fn test_ensure_service_before_timesync() { - let logctx = omicron_test_utils::dev::test_setup_log( - "test_ensure_service_before_timesync", - ); - let test_config = TestConfig::new().await; - let mut helper = - LedgerTestHelper::new(logctx.log.clone(), &test_config).await; - - let mgr = helper.new_service_manager_with_timesync( - TimeSyncConfig::Fail, - FakeSystemApi::new(test_config.config_dir.path().to_path_buf()), - ); - let (metrics_queue, mut metrics_rx) = MetricsRequestQueue::for_test(); - LedgerTestHelper::sled_agent_started( - &logctx.log, - &test_config, - &mgr, - metrics_queue, - ) - .await; - - let v1 = Generation::new(); - let found = mgr.omicron_zones_list().await; - assert_eq!(found.generation, v1); - assert!(found.zones.is_empty()); - - let v2 = v1.next(); - let id = OmicronZoneUuid::new_v4(); - - // Should fail: time has not yet synchronized. - let address = - SocketAddrV6::new(Ipv6Addr::LOCALHOST, EXPECTED_PORT, 0, 0); - let result = try_new_service_of_type( - &mgr, - id, - v2, - OmicronZoneType::Oximeter { address }, - ) - .await; - - // First, ensure this is the right kind of error. - let err = result.unwrap_err(); - let errors = match &err { - Error::ZoneEnsure { errors } => errors, - err => panic!("unexpected result: {err:?}"), - }; - assert_eq!(errors.len(), 1); - assert_matches::assert_matches!( - errors[0].1, - Error::TimeNotSynchronized - ); - - // Ensure we have _not_ received a message about the zone's VNIC, - // because there isn't a zone. - assert_eq!(metrics_rx.try_recv(), Err(TryRecvError::Empty)); - - // Next, ensure this still converts to an "unavail" common error - let common_err = omicron_common::api::external::Error::from(err); - assert_matches::assert_matches!( - common_err, - omicron_common::api::external::Error::ServiceUnavailable { .. } - ); - - // Should succeed: we don't care that time has not yet synchronized (for - // this specific service). - try_new_service_of_type( - &mgr, - id, - v2, - OmicronZoneType::InternalNtp { address }, - ) - .await - .unwrap(); - - drop_service_manager(mgr).await; - helper.cleanup().await; - logctx.cleanup_successful(); - } - - #[tokio::test] - async fn test_ensure_service_which_already_exists() { - let logctx = omicron_test_utils::dev::test_setup_log( - "test_ensure_service_which_already_exists", - ); - let test_config = TestConfig::new().await; - let mut helper = - LedgerTestHelper::new(logctx.log.clone(), &test_config).await; - let mgr = helper.new_service_manager(FakeSystemApi::new( - test_config.config_dir.path().to_path_buf(), - )); - let (metrics_queue, mut metrics_rx) = MetricsRequestQueue::for_test(); - LedgerTestHelper::sled_agent_started( - &logctx.log, - &test_config, - &mgr, - metrics_queue, - ) - .await; - - let v2 = Generation::new().next(); - let id = OmicronZoneUuid::new_v4(); - ensure_new_service(&mgr, id, v2).await; - let v3 = v2.next(); - ensure_existing_service(&mgr, id, v3).await; - let found = mgr.omicron_zones_list().await; - assert_eq!(found.generation, v3); - assert_eq!(found.zones.len(), 1); - assert_eq!(found.zones[0].id, id); - - // First, we will get a message about the sled being synced. - let synced_message = tokio::time::timeout( - LINK_NOTIFICATION_TIMEOUT, - metrics_rx.recv(), - ).await.expect("Should have received a message about the sled being synced within the timeout") - .expect("Should have received a message about the sled being synced"); - assert_eq!( - synced_message, - metrics::Message::TimeSynced { sled_id: mgr.sled_id() } - ); - - // In this case, the manager creates the zone once, and then "ensuring" - // it a second time is a no-op. So we simply expect the same message - // sequence as starting a zone for the first time. - let vnic_message = tokio::time::timeout( - LINK_NOTIFICATION_TIMEOUT, - metrics_rx.recv(), - ) - .await - .expect( - "Should have received a message about the zone's VNIC within the timeout" - ) - .expect("Should have received a message about the zone's VNIC"); - let zone_name = format!("oxz_ntp_{}", id); - assert_eq!( - vnic_message, - metrics::Message::TrackVnic { - zone_name, - name: "oxControlService0".into() - }, - ); - assert_eq!(metrics_rx.try_recv(), Err(TryRecvError::Empty)); - - drop_service_manager(mgr).await; - - helper.cleanup().await; - logctx.cleanup_successful(); - } - - #[tokio::test] - async fn test_services_are_recreated_on_reboot() { - let logctx = omicron_test_utils::dev::test_setup_log( - "test_services_are_recreated_on_reboot", - ); - let test_config = TestConfig::new().await; - let mut helper = - LedgerTestHelper::new(logctx.log.clone(), &test_config).await; - - // First, spin up a ServiceManager, create a new zone, and then tear - // down the ServiceManager. - let mgr = helper.new_service_manager(FakeSystemApi::new( - test_config.config_dir.path().to_path_buf(), - )); - let (metrics_queue, mut metrics_rx) = MetricsRequestQueue::for_test(); - LedgerTestHelper::sled_agent_started( - &logctx.log, - &test_config, - &mgr, - metrics_queue, - ) - .await; - - let v2 = Generation::new().next(); - let id = OmicronZoneUuid::new_v4(); - ensure_new_service(&mgr, id, v2).await; - - let sled_id = mgr.sled_id(); - drop_service_manager(mgr).await; - - // First, we will get a message about the sled being synced. - let synced_message = tokio::time::timeout( - LINK_NOTIFICATION_TIMEOUT, - metrics_rx.recv(), - ).await.expect("Should have received a message about the sled being synced within the timeout") - .expect("Should have received a message about the sled being synced"); - assert_eq!(synced_message, metrics::Message::TimeSynced { sled_id }); - - // Check that we received a message about the zone's VNIC. Since the - // manager is being dropped, it should also send a message about the - // VNIC being deleted. - let zone_name = format!("oxz_ntp_{}", id); - for expected_vnic_message in [ - metrics::Message::TrackVnic { - zone_name, - name: "oxControlService0".into(), - }, - metrics::Message::UntrackVnic { name: "oxControlService0".into() }, - ] { - println!( - "Expecting message from manager: {expected_vnic_message:#?}" - ); - let vnic_message = tokio::time::timeout( - LINK_NOTIFICATION_TIMEOUT, - metrics_rx.recv(), - ) - .await - .expect( - "Should have received a message about the zone's VNIC within the timeout" - ) - .expect("Should have received a message about the zone's VNIC"); - assert_eq!(vnic_message, expected_vnic_message,); - } - // Note that the manager has been dropped, so we should get - // disconnected, not empty. - assert_eq!(metrics_rx.try_recv(), Err(TryRecvError::Disconnected)); - - // Before we re-create the service manager - notably, using the same - // config file! - expect that a service gets initialized. - // TODO? - let mgr = helper.new_service_manager(FakeSystemApi::new( - test_config.config_dir.path().to_path_buf(), - )); - let (metrics_queue, mut metrics_rx) = MetricsRequestQueue::for_test(); - LedgerTestHelper::sled_agent_started( - &logctx.log, - &test_config, - &mgr, - metrics_queue, - ) - .await; - - let found = mgr.omicron_zones_list().await; - assert_eq!(found.generation, v2); - assert_eq!(found.zones.len(), 1); - assert_eq!(found.zones[0].id, id); - - // Note that the `omicron_zones_list()` request just returns the - // configured zones, stored in the on-disk ledger. There is nothing - // above that actually ensures that those zones exist, as far as I can - // tell! - assert_eq!(metrics_rx.try_recv(), Err(TryRecvError::Empty)); - - drop_service_manager(mgr).await; - - helper.cleanup().await; - logctx.cleanup_successful(); - } - - #[tokio::test] - async fn test_services_do_not_persist_without_config() { - let logctx = omicron_test_utils::dev::test_setup_log( - "test_services_do_not_persist_without_config", - ); - let test_config = TestConfig::new().await; - let mut helper = - LedgerTestHelper::new(logctx.log.clone(), &test_config).await; - - // First, spin up a ServiceManager, create a new zone, and then tear - // down the ServiceManager. - let mgr = helper.new_service_manager(FakeSystemApi::new( - test_config.config_dir.path().to_path_buf(), - )); - let metrics_handles = MetricsRequestQueue::for_test(); - LedgerTestHelper::sled_agent_started( - &logctx.log, - &test_config, - &mgr, - metrics_handles.0.clone(), - ) - .await; - - let v1 = Generation::new(); - let v2 = v1.next(); - let id = OmicronZoneUuid::new_v4(); - ensure_new_service(&mgr, id, v2).await; - drop_service_manager(mgr).await; - - // Next, delete the ledger. This means the zone we just created will not - // be remembered on the next initialization. - std::fs::remove_file( - test_config.config_dir.path().join(ZONES_LEDGER_FILENAME), - ) - .unwrap(); - - // Observe that the old service is not re-initialized. - let mgr = helper.new_service_manager(FakeSystemApi::new( - test_config.config_dir.path().to_path_buf(), - )); - let metrics_handles = MetricsRequestQueue::for_test(); - LedgerTestHelper::sled_agent_started( - &logctx.log, - &test_config, - &mgr, - metrics_handles.0.clone(), - ) - .await; - - let found = mgr.omicron_zones_list().await; - assert_eq!(found.generation, v1); - assert!(found.zones.is_empty()); - - drop_service_manager(mgr).await; - - helper.cleanup().await; - logctx.cleanup_successful(); - } - - #[tokio::test] - async fn test_bad_generations() { - // Start like the normal tests. - let logctx = - omicron_test_utils::dev::test_setup_log("test_bad_generations"); - let test_config = TestConfig::new().await; - let mut helper = - LedgerTestHelper::new(logctx.log.clone(), &test_config).await; - let mgr = helper.new_service_manager(FakeSystemApi::new( - test_config.config_dir.path().to_path_buf(), - )); - let metrics_handles = MetricsRequestQueue::for_test(); - LedgerTestHelper::sled_agent_started( - &logctx.log, - &test_config, - &mgr, - metrics_handles.0.clone(), - ) - .await; - - // Like the normal tests, set up a generation with one zone in it. - let v1 = Generation::new(); - let v2 = v1.next(); - let id1 = OmicronZoneUuid::new_v4(); - - let address = - SocketAddrV6::new(Ipv6Addr::LOCALHOST, EXPECTED_PORT, 0, 0); - let mut zones = vec![OmicronZoneConfig { - id: id1, - zone_type: OmicronZoneType::InternalNtp { address }, - filesystem_pool: None, - image_source: OmicronZoneImageSource::InstallDataset, - }]; - - mgr.ensure_all_omicron_zones_persistent(OmicronZonesConfig { - generation: v2, - zones: zones.clone(), - }) - .await - .unwrap(); - - let found = mgr.omicron_zones_list().await; - assert_eq!(found.generation, v2); - assert_eq!(found.zones.len(), 1); - assert_eq!(found.zones[0].id, id1); - - // Make a new list of zones that we're going to try with a bunch of - // different generation numbers. - let id2 = OmicronZoneUuid::new_v4(); - zones.push(OmicronZoneConfig { - id: id2, - zone_type: OmicronZoneType::InternalNtp { address }, - filesystem_pool: None, - image_source: OmicronZoneImageSource::InstallDataset, - }); - - // Now try to apply that list with an older generation number. This - // shouldn't work and the reported state should be unchanged. - let error = mgr - .ensure_all_omicron_zones_persistent(OmicronZonesConfig { - generation: v1, - zones: zones.clone(), - }) - .await - .expect_err("unexpectedly went backwards in zones generation"); - assert!(matches!( - error, - Error::RequestedZoneConfigOutdated { requested, current } - if requested == v1 && current == v2 - )); - let found2 = mgr.omicron_zones_list().await; - assert_eq!(found, found2); - - // Now try to apply that list with the same generation number that we - // used before. This shouldn't work either. - let error = mgr - .ensure_all_omicron_zones_persistent(OmicronZonesConfig { - generation: v2, - zones: zones.clone(), - }) - .await - .expect_err("unexpectedly changed a single zone generation"); - assert!(matches!( - error, - Error::RequestedConfigConflicts(vr) if vr == v2 - )); - let found3 = mgr.omicron_zones_list().await; - assert_eq!(found, found3); - - // But we should be able to apply this new list of zones as long as we - // advance the generation number. - let v3 = v2.next(); - mgr.ensure_all_omicron_zones_persistent(OmicronZonesConfig { - generation: v3, - zones: zones.clone(), - }) - .await - .expect("failed to remove all zones in a new generation"); - let found4 = mgr.omicron_zones_list().await; - assert_eq!(found4.generation, v3); - let mut our_zones = zones; - our_zones.sort_by(|a, b| a.id.cmp(&b.id)); - let mut found_zones = found4.zones; - found_zones.sort_by(|a, b| a.id.cmp(&b.id)); - assert_eq!(our_zones, found_zones); - - drop_service_manager(mgr).await; - - helper.cleanup().await; - logctx.cleanup_successful(); - } -} - #[cfg(test)] mod test { use super::*; - use nexus_sled_agent_shared::inventory::OmicronZoneImageSource; - use omicron_uuid_kinds::ZpoolUuid; use sled_agent_types::zone_bundle::ZoneBundleMetadata; #[test] @@ -5998,282 +4379,4 @@ mod test { &serde_json::to_string_pretty(&schema).unwrap(), ); } - - #[test] - fn test_fix_7229_zone_config_reconciliation() { - fn make_omicron_zone_config( - filesystem_pool: Option<&ZpoolName>, - ) -> OmicronZoneConfig { - OmicronZoneConfig { - id: OmicronZoneUuid::new_v4(), - filesystem_pool: filesystem_pool.cloned(), - zone_type: OmicronZoneType::Oximeter { - address: "[::1]:0".parse().unwrap(), - }, - image_source: OmicronZoneImageSource::InstallDataset, - } - } - - let logctx = - omicron_test_utils::dev::test_setup_log("test_ensure_service"); - let log = &logctx.log; - - let some_zpools = (0..10) - .map(|_| ZpoolName::new_external(ZpoolUuid::new_v4())) - .collect::>(); - - // Test 1: We have some zones; the new config makes no changes. - { - let mut existing = vec![ - ( - make_omicron_zone_config(None), - ZpoolOrRamdisk::Zpool(some_zpools[0].clone()), - ), - ( - make_omicron_zone_config(Some(&some_zpools[1])), - ZpoolOrRamdisk::Zpool(some_zpools[1].clone()), - ), - ( - make_omicron_zone_config(Some(&some_zpools[2])), - ZpoolOrRamdisk::Zpool(some_zpools[2].clone()), - ), - ]; - let new_request = OmicronZonesConfig { - generation: Generation::new().next(), - zones: existing.iter().map(|(zone, _)| zone.clone()).collect(), - }; - let reconciled = reconcile_running_zones_with_new_request_impl( - existing.iter_mut().map(|(z, p)| (z, &*p)), - new_request.clone(), - log, - ) - .expect("reconciled successfully"); - assert_eq!(reconciled.zones_to_be_removed, HashSet::new()); - assert_eq!(reconciled.zones_to_be_added, HashSet::new()); - assert_eq!( - existing.iter().map(|(z, _)| z.clone()).collect::>(), - new_request.zones, - ); - } - - // Test 2: We have some zones; the new config changes `filesystem_pool` - // to match our runtime pools (i.e., the #7229 fix). - { - let mut existing = vec![ - ( - make_omicron_zone_config(None), - ZpoolOrRamdisk::Zpool(some_zpools[0].clone()), - ), - ( - make_omicron_zone_config(None), - ZpoolOrRamdisk::Zpool(some_zpools[1].clone()), - ), - ( - make_omicron_zone_config(None), - ZpoolOrRamdisk::Zpool(some_zpools[2].clone()), - ), - ]; - let new_request = OmicronZonesConfig { - generation: Generation::new().next(), - zones: existing - .iter() - .enumerate() - .map(|(i, (zone, _))| { - let mut zone = zone.clone(); - zone.filesystem_pool = Some(some_zpools[i].clone()); - zone - }) - .collect(), - }; - let reconciled = reconcile_running_zones_with_new_request_impl( - existing.iter_mut().map(|(z, p)| (z, &*p)), - new_request.clone(), - log, - ) - .expect("reconciled successfully"); - assert_eq!(reconciled.zones_to_be_removed, HashSet::new()); - assert_eq!(reconciled.zones_to_be_added, HashSet::new()); - assert_eq!( - existing.iter().map(|(z, _)| z.clone()).collect::>(), - new_request.zones, - ); - } - - // Test 3: We have some zones; the new config changes `filesystem_pool` - // to match our runtime pools (i.e., the #7229 fix) but also changes - // something else in the config for the final zone; we should attempt to - // remove and re-add that final zone. - { - let mut existing = vec![ - ( - make_omicron_zone_config(None), - ZpoolOrRamdisk::Zpool(some_zpools[0].clone()), - ), - ( - make_omicron_zone_config(None), - ZpoolOrRamdisk::Zpool(some_zpools[1].clone()), - ), - ( - make_omicron_zone_config(None), - ZpoolOrRamdisk::Zpool(some_zpools[2].clone()), - ), - ]; - let new_request = OmicronZonesConfig { - generation: Generation::new().next(), - zones: existing - .iter() - .enumerate() - .map(|(i, (zone, _))| { - let mut zone = zone.clone(); - zone.filesystem_pool = Some(some_zpools[i].clone()); - if i == 2 { - zone.zone_type = OmicronZoneType::Oximeter { - address: "[::1]:10000".parse().unwrap(), - }; - } - zone - }) - .collect(), - }; - let reconciled = reconcile_running_zones_with_new_request_impl( - existing.iter_mut().map(|(z, p)| (z, &*p)), - new_request.clone(), - log, - ) - .expect("reconciled successfully"); - assert_eq!( - reconciled.zones_to_be_removed, - HashSet::from([existing[2].0.clone()]), - ); - assert_eq!( - reconciled.zones_to_be_added, - HashSet::from([new_request.zones[2].clone()]), - ); - // The first two existing zones should have been updated to match - // the new request. - assert_eq!( - Vec::from_iter(existing[..2].iter().map(|(z, _)| z.clone())), - &new_request.zones[..2], - ); - } - - // Test 4: We have some zones; the new config changes `filesystem_pool` - // to match our runtime pools (i.e., the #7229 fix), except the new pool - // on the final zone is incorrect. We should get an error back. - { - let mut existing = vec![ - ( - make_omicron_zone_config(None), - ZpoolOrRamdisk::Zpool(some_zpools[0].clone()), - ), - ( - make_omicron_zone_config(None), - ZpoolOrRamdisk::Zpool(some_zpools[1].clone()), - ), - ( - make_omicron_zone_config(None), - ZpoolOrRamdisk::Zpool(some_zpools[2].clone()), - ), - ]; - let existing_orig = - existing.iter().map(|(z, _)| z.clone()).collect::>(); - let new_request = OmicronZonesConfig { - generation: Generation::new().next(), - zones: existing - .iter() - .enumerate() - .map(|(i, (zone, _))| { - let mut zone = zone.clone(); - if i < 2 { - zone.filesystem_pool = Some(some_zpools[i].clone()); - } else { - zone.filesystem_pool = Some(some_zpools[4].clone()); - } - zone - }) - .collect(), - }; - let err = reconcile_running_zones_with_new_request_impl( - existing.iter_mut().map(|(z, p)| (z, &*p)), - new_request.clone(), - log, - ) - .expect_err("should not have reconciled successfully"); - - match err { - Error::InvalidFilesystemPoolZoneConfig { - zone_id, - expected_pool, - got_pool, - } => { - assert_eq!(zone_id, existing[2].0.id); - assert_eq!(expected_pool, some_zpools[2]); - assert_eq!(got_pool, some_zpools[4]); - } - _ => panic!("unexpected error: {err}"), - } - // reconciliation failed, so the contents of our existing configs - // should not have changed (even though a couple of the changes - // were okay, we should either take all or none to maintain - // consistency with the generation-tagged OmicronZonesConfig) - assert_eq!( - existing.iter().map(|(z, _)| z.clone()).collect::>(), - existing_orig, - ); - } - - // Test 5: We have some zones. The new config applies #7229 fix to the - // first zone, doesn't include the remaining zones, and adds some new - // zones. We should see "the remaining zones" removed, the "new zones" - // added, and the 7229-fixed zone not in either set. - { - let mut existing = vec![ - ( - make_omicron_zone_config(None), - ZpoolOrRamdisk::Zpool(some_zpools[0].clone()), - ), - ( - make_omicron_zone_config(None), - ZpoolOrRamdisk::Zpool(some_zpools[1].clone()), - ), - ( - make_omicron_zone_config(None), - ZpoolOrRamdisk::Zpool(some_zpools[2].clone()), - ), - ]; - let new_request = OmicronZonesConfig { - generation: Generation::new().next(), - zones: vec![ - { - let mut z = existing[0].0.clone(); - z.filesystem_pool = Some(some_zpools[0].clone()); - z - }, - make_omicron_zone_config(None), - make_omicron_zone_config(None), - ], - }; - let reconciled = reconcile_running_zones_with_new_request_impl( - existing.iter_mut().map(|(z, p)| (z, &*p)), - new_request.clone(), - log, - ) - .expect("reconciled successfully"); - - assert_eq!( - reconciled.zones_to_be_removed, - HashSet::from_iter( - existing[1..].iter().map(|(z, _)| z.clone()) - ), - ); - assert_eq!( - reconciled.zones_to_be_added, - HashSet::from_iter(new_request.zones[1..].iter().cloned()), - ); - // Only the first existing zone is being kept; ensure it matches the - // new request. - assert_eq!(existing[0].0, new_request.zones[0]); - } - logctx.cleanup_successful(); - } } diff --git a/sled-agent/src/sim/http_entrypoints.rs b/sled-agent/src/sim/http_entrypoints.rs index 57ea9491484..0c7965ca231 100644 --- a/sled-agent/src/sim/http_entrypoints.rs +++ b/sled-agent/src/sim/http_entrypoints.rs @@ -25,7 +25,6 @@ use dropshot::TypedBody; use dropshot::endpoint; use nexus_sled_agent_shared::inventory::Inventory; use nexus_sled_agent_shared::inventory::OmicronSledConfig; -use nexus_sled_agent_shared::inventory::OmicronSledConfigResult; use nexus_sled_agent_shared::inventory::SledRole; use omicron_common::api::internal::nexus::DiskRuntimeState; use omicron_common::api::internal::nexus::SledVmmState; @@ -35,8 +34,6 @@ use omicron_common::api::internal::shared::VirtualNetworkInterfaceHost; use omicron_common::api::internal::shared::{ ResolvedVpcRouteSet, ResolvedVpcRouteState, SwitchPorts, }; -use omicron_common::disk::DatasetsConfig; -use omicron_common::disk::OmicronPhysicalDisksConfig; use range_requests::RequestContextEx; use sled_agent_api::*; use sled_agent_types::boot_disk::BootDiskOsWriteStatus; @@ -339,28 +336,14 @@ impl SledAgentApi for SledAgentSimImpl { )) } - async fn datasets_get( - rqctx: RequestContext, - ) -> Result, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk(sa.datasets_config_list()?)) - } - - async fn omicron_physical_disks_get( - rqctx: RequestContext, - ) -> Result, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk(sa.omicron_physical_disks_list()?)) - } - async fn omicron_config_put( rqctx: RequestContext, body: TypedBody, - ) -> Result, HttpError> { + ) -> Result { let sa = rqctx.context(); let body_args = body.into_inner(); - let result = sa.set_omicron_config(body_args)?; - Ok(HttpResponseOk(result)) + sa.set_omicron_config(body_args)?; + Ok(HttpResponseUpdatedNoContent()) } async fn sled_add( @@ -633,12 +616,6 @@ impl SledAgentApi for SledAgentSimImpl { method_unimplemented() } - async fn zpools_get( - _rqctx: RequestContext, - ) -> Result>, HttpError> { - method_unimplemented() - } - async fn sled_role_get( _rqctx: RequestContext, ) -> Result, HttpError> { diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index 032c7205729..65ef6102082 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -25,7 +25,7 @@ use dropshot::HttpError; use futures::Stream; use nexus_sled_agent_shared::inventory::{ Inventory, InventoryDataset, InventoryDisk, InventoryZpool, - OmicronSledConfig, OmicronSledConfigResult, OmicronZonesConfig, SledRole, + OmicronSledConfig, OmicronZonesConfig, SledRole, }; use omicron_common::api::external::{ ByteCount, DiskState, Error, Generation, ResourceType, @@ -896,7 +896,9 @@ impl SledAgent { pub fn set_omicron_config( &self, config: OmicronSledConfig, - ) -> Result { + ) -> Result<(), HttpError> { + // TODO Update the simulator to work on `OmicronSledConfig` instead of + // the three separate legacy configs let disks_config = OmicronPhysicalDisksConfig { generation: config.generation, disks: config.disks.into_iter().collect(), @@ -909,16 +911,14 @@ impl SledAgent { generation: config.generation, zones: config.zones.into_iter().collect(), }; - let (disks, datasets) = { - let mut storage = self.storage.lock(); - let DisksManagementResult { status: disks } = - storage.omicron_physical_disks_ensure(disks_config)?; - let DatasetsManagementResult { status: datasets } = - storage.datasets_ensure(datasets_config)?; - (disks, datasets) - }; + + let mut storage = self.storage.lock(); + let _ = storage.omicron_physical_disks_ensure(disks_config)?; + let _ = storage.datasets_ensure(datasets_config)?; *self.fake_zones.lock().unwrap() = zones_config; - Ok(OmicronSledConfigResult { disks, datasets }) + //*self.sled_config.lock().unwrap() = Some(config); + + Ok(()) } pub fn omicron_zones_list(&self) -> OmicronZonesConfig { diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index e1976c2b29a..e378e89261c 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -4,26 +4,24 @@ //! Sled agent implementation -use crate::artifact_store::ArtifactStore; +use crate::artifact_store::{self, ArtifactStore}; use crate::boot_disk_os_writer::BootDiskOsWriter; use crate::bootstrap::config::BOOTSTRAP_AGENT_RACK_INIT_PORT; use crate::bootstrap::early_networking::EarlyNetworkSetupError; use crate::config::Config; use crate::instance_manager::InstanceManager; use crate::long_running_tasks::LongRunningTaskHandles; -use crate::metrics::MetricsManager; +use crate::metrics::{self, MetricsManager, MetricsRequestQueue}; use crate::nexus::{ NexusClient, NexusNotifierHandle, NexusNotifierInput, NexusNotifierTask, }; -use crate::params::OmicronZoneTypeExt; use crate::probe_manager::ProbeManager; use crate::services::{self, ServiceManager}; -use crate::storage_monitor::StorageMonitorHandle; use crate::support_bundle::logs::SupportBundleLogs; use crate::support_bundle::storage::SupportBundleManager; use crate::vmm_reservoir::{ReservoirMode, VmmReservoirManager}; -use crate::zone_bundle; use crate::zone_bundle::BundleError; +use crate::zone_bundle::{self, ZoneBundler}; use bootstore::schemes::v0 as bootstore; use camino::Utf8PathBuf; use derive_more::From; @@ -31,14 +29,18 @@ use dropshot::HttpError; use futures::StreamExt; use futures::stream::FuturesUnordered; use illumos_utils::opte::PortManager; +use illumos_utils::running_zone::RunningZone; +use illumos_utils::zpool::ZpoolName; use nexus_sled_agent_shared::inventory::{ - Inventory, InventoryDataset, InventoryDisk, InventoryZpool, - OmicronSledConfig, OmicronSledConfigResult, OmicronZonesConfig, SledRole, + Inventory, OmicronSledConfig, OmicronZoneConfig, OmicronZonesConfig, + SledRole, }; use omicron_common::address::{ Ipv6Subnet, SLED_PREFIX, get_sled_address, get_switch_zone_address, }; -use omicron_common::api::external::{ByteCount, ByteCountRangeError, Vni}; +use omicron_common::api::external::{ + ByteCount, ByteCountRangeError, Generation, Vni, +}; use omicron_common::api::internal::nexus::{DiskRuntimeState, SledVmmState}; use omicron_common::api::internal::shared::{ ExternalIpGatewayMap, HostPortConfig, RackNetworkConfig, @@ -48,13 +50,14 @@ use omicron_common::api::internal::shared::{ use omicron_common::backoff::{ BackoffError, retry_notify, retry_policy_internal_service_aggressive, }; -use omicron_common::disk::{ - DatasetsConfig, DatasetsManagementResult, DisksManagementResult, - OmicronPhysicalDisksConfig, -}; +use omicron_common::disk::M2Slot; use omicron_ddm_admin_client::Client as DdmAdminClient; use omicron_uuid_kinds::{GenericUuid, PropolisUuid, SledUuid}; -use sled_agent_api::Zpool; +use sled_agent_config_reconciler::{ + ConfigReconcilerHandle, ConfigReconcilerSpawnToken, InternalDisksReceiver, + LedgerNewConfigError, LedgerTaskError, ReconcilerInventory, + SledAgentArtifactStore, SledAgentFacilities, TimeSyncStatus, +}; use sled_agent_types::disk::DiskStateRequested; use sled_agent_types::early_networking::EarlyNetworkConfig; use sled_agent_types::instance::{ @@ -65,18 +68,21 @@ use sled_agent_types::sled::{BaseboardId, StartSledAgentRequest}; use sled_agent_types::time_sync::TimeSync; use sled_agent_types::zone_bundle::{ BundleUtilization, CleanupContext, CleanupCount, CleanupPeriod, - PriorityOrder, StorageLimit, ZoneBundleMetadata, + PriorityOrder, StorageLimit, ZoneBundleCause, ZoneBundleMetadata, }; use sled_diagnostics::{SledDiagnosticsCmdError, SledDiagnosticsCmdOutput}; -use sled_hardware::{HardwareManager, MemoryReservations, underlay}; +use sled_hardware::{ + HardwareManager, MemoryReservations, PooledDiskError, underlay, +}; use sled_hardware_types::Baseboard; use sled_hardware_types::underlay::BootstrapInterface; -use sled_storage::manager::StorageHandle; +use sled_storage::config::MountConfig; use slog::Logger; use sprockets_tls::keys::SprocketsConfig; use std::collections::BTreeMap; use std::net::{Ipv6Addr, SocketAddrV6}; use std::sync::Arc; +use tufaceous_artifact::ArtifactHash; use uuid::Uuid; use illumos_utils::dladm::Dladm; @@ -112,9 +118,6 @@ pub enum Error { #[error("Failed to operate on underlay device: {0}")] Underlay(#[from] underlay::Error), - #[error("Failed to request firewall rules")] - FirewallRequest(#[source] nexus_client::Error), - #[error(transparent)] Services(#[from] crate::services::Error), @@ -127,9 +130,6 @@ pub enum Error { #[error("Error managing storage: {0}")] Storage(#[from] sled_storage::error::Error), - #[error("Error monitoring storage: {0}")] - StorageMonitor(#[from] crate::storage_monitor::Error), - #[error("Error updating: {0}")] Download(#[from] crate::updates::Error), @@ -168,6 +168,9 @@ pub enum Error { #[error(transparent)] RepoDepotStart(#[from] crate::artifact_store::StartError), + + #[error("Time not yet synchronized")] + TimeNotSynchronized, } impl From for omicron_common::api::external::Error { @@ -330,12 +333,8 @@ struct SledAgentInner { // This is used for idempotence checks during RSS/Add-Sled internal APIs start_request: StartSledAgentRequest, - // Component of Sled Agent responsible for storage and dataset management. - storage: StorageHandle, - - // Component of Sled Agent responsible for monitoring storage and updating - // dump devices. - storage_monitor: StorageMonitorHandle, + // Handle to the sled-agent-config-reconciler system. + config_reconciler: Arc, // Component of Sled Agent responsible for managing Propolis instances. instances: InstanceManager, @@ -349,9 +348,6 @@ struct SledAgentInner { // Other Oxide-controlled services running on this Sled. services: ServiceManager, - // Connection to Nexus. - nexus_client: NexusClient, - // A mechanism for notifiying nexus about sled-agent updates nexus_notifier: NexusNotifierHandle, @@ -374,7 +370,7 @@ struct SledAgentInner { probes: ProbeManager, // Component of Sled Agent responsible for managing the artifact store. - repo_depot: dropshot::HttpServer>, + repo_depot: dropshot::HttpServer>>, } impl SledAgentInner { @@ -403,6 +399,7 @@ impl SledAgent { request: StartSledAgentRequest, services: ServiceManager, long_running_task_handles: LongRunningTaskHandles, + config_reconciler_spawn_token: ConfigReconcilerSpawnToken, ) -> Result { // Pass the "parent_log" to all subcomponents that want to set their own // "component" value. @@ -421,12 +418,14 @@ impl SledAgent { ) .cleanup_snapshots(); - let storage_manager = &long_running_task_handles.storage_manager; - let boot_disk = storage_manager - .get_latest_disks() - .await - .boot_disk() - .ok_or_else(|| Error::BootDiskNotFound)?; + let config_reconciler = + Arc::clone(&long_running_task_handles.config_reconciler); + let boot_disk_zpool = config_reconciler + .internal_disks_rx() + .current() + .boot_disk_zpool() + .ok_or_else(|| Error::BootDiskNotFound)? + .clone(); // Configure a swap device of the configured size before other system setup. match config.swap_device_size_gb { @@ -434,7 +433,7 @@ impl SledAgent { info!(log, "Requested swap device of size {} GiB", sz); crate::swap_device::ensure_swap_device( &parent_log, - &boot_disk.1, + &boot_disk_zpool, sz, )?; } @@ -447,7 +446,7 @@ impl SledAgent { } info!(log, "Mounting backing filesystems"); - crate::backing_fs::ensure_backing_fs(&parent_log, &boot_disk.1)?; + crate::backing_fs::ensure_backing_fs(&parent_log, &boot_disk_zpool)?; // TODO-correctness Bootstrap-agent already ensures the underlay // etherstub and etherstub VNIC exist on startup - could it pass them @@ -537,7 +536,8 @@ impl SledAgent { nexus_client.clone(), instance_vnic_allocator, port_manager.clone(), - storage_manager.clone(), + config_reconciler.currently_managed_zpools_rx().clone(), + config_reconciler.available_datasets_rx(), long_running_task_handles.zone_bundler.clone(), vmm_reservoir_manager.clone(), metrics_manager.request_queue(), @@ -586,9 +586,30 @@ impl SledAgent { .await .expect( "Expected an infinite retry loop getting \ - network config from bootstore", + network config from bootstore", ); + let artifact_store = Arc::new( + ArtifactStore::new( + &log, + config_reconciler.internal_disks_rx().clone(), + Some(Arc::clone(&config_reconciler)), + ) + .await, + ); + + // Start reconciling against our ledgered sled config. + config_reconciler.spawn_reconciliation_task( + etherstub_vnic, + ReconcilerFacilities { + service_manager: services.clone(), + metrics_queue: metrics_manager.request_queue(), + zone_bundler: long_running_task_handles.zone_bundler.clone(), + }, + SledAgentArtifactStoreWrapper(Arc::clone(&artifact_store)), + config_reconciler_spawn_token, + ); + services .sled_agent_started( svc_config, @@ -600,14 +621,8 @@ impl SledAgent { ) .await?; - let repo_depot = ArtifactStore::new( - &log, - storage_manager.clone(), - Some(services.clone()), - ) - .await - .start(sled_address, &config.dropshot) - .await?; + let repo_depot = + artifact_store.start(sled_address, &config.dropshot).await?; // Spawn a background task for managing notifications to nexus // about this sled-agent. @@ -630,27 +645,26 @@ impl SledAgent { request.body.id.into_untyped_uuid(), nexus_client.clone(), etherstub.clone(), - storage_manager.clone(), port_manager.clone(), metrics_manager.request_queue(), + config_reconciler.available_datasets_rx(), log.new(o!("component" => "ProbeManager")), ); + let currently_managed_zpools_rx = + config_reconciler.currently_managed_zpools_rx().clone(); + let sled_agent = SledAgent { inner: Arc::new(SledAgentInner { id: request.body.id, subnet: request.body.subnet, start_request: request, - storage: long_running_task_handles.storage_manager.clone(), - storage_monitor: long_running_task_handles - .storage_monitor_handle - .clone(), + config_reconciler, instances, probes, hardware: long_running_task_handles.hardware_manager.clone(), port_manager, services, - nexus_client, nexus_notifier: nexus_notifier_handle, rack_network_config, zone_bundler: long_running_task_handles.zone_bundler.clone(), @@ -663,7 +677,7 @@ impl SledAgent { sprockets: config.sprockets.clone(), }; - sled_agent.inner.probes.run().await; + sled_agent.inner.probes.run(currently_managed_zpools_rx).await; // We immediately add a notification to the request queue about our // existence. If inspection of the hardware later informs us that we're @@ -674,66 +688,17 @@ impl SledAgent { Ok(sled_agent) } - /// Load services for which we're responsible. - /// - /// Blocks until all services have started, retrying indefinitely on - /// failure. - pub(crate) async fn load_services(&self) { - info!(self.log, "Loading cold boot services"); - retry_notify( - retry_policy_internal_service_aggressive(), - || async { - // Load as many services as we can, and don't exit immediately - // upon failure. - let load_services_result = - self.inner.services.load_services().await.map_err(|err| { - BackoffError::transient(Error::from(err)) - }); - - // If there wasn't any work to do, we're done immediately. - if matches!( - load_services_result, - Ok(services::LoadServicesResult::NoServicesToLoad) - ) { - info!( - self.log, - "load_services exiting early; no services to be loaded" - ); - return Ok(()); - } - - // Otherwise, request firewall rule updates for as many services as - // we can. Note that we still make this request even if we only - // partially load some services. - let firewall_result = self - .request_firewall_update() - .await - .map_err(|err| BackoffError::transient(err)); - - // Only complete if we have loaded all services and firewall - // rules successfully. - load_services_result.and(firewall_result) - }, - |err, delay| { - warn!( - self.log, - "Failed to load services, will retry in {:?}", delay; - "error" => ?err, - ); - }, - ) - .await - .unwrap(); // we retry forever, so this can't fail - } - /// Accesses the [SupportBundleManager] API. pub(crate) fn as_support_bundle_storage(&self) -> SupportBundleManager<'_> { - SupportBundleManager::new(&self.log, self.storage()) + SupportBundleManager::new(&self.log, &*self.inner.config_reconciler) } /// Accesses the [SupportBundleLogs] API. pub(crate) fn as_support_bundle_logs(&self) -> SupportBundleLogs<'_> { - SupportBundleLogs::new(&self.log, self.storage()) + SupportBundleLogs::new( + &self.log, + self.inner.config_reconciler.internal_disks_rx(), + ) } pub(crate) fn switch_zone_underlay_info( @@ -758,20 +723,6 @@ impl SledAgent { self.sprockets.clone() } - /// Requests firewall rules from Nexus. - /// - /// Does not retry upon failure. - async fn request_firewall_update(&self) -> Result<(), Error> { - let sled_id = self.inner.id; - - self.inner - .nexus_client - .sled_firewall_rules_request(&sled_id) - .await - .map_err(|err| Error::FirewallRequest(err))?; - Ok(()) - } - /// Trigger a request to Nexus informing it that the current sled exists, /// with information about the existing set of hardware. pub(crate) async fn notify_nexus_about_self(&self, log: &Logger) { @@ -862,212 +813,13 @@ impl SledAgent { self.inner.zone_bundler.cleanup().await.map_err(Error::from) } - pub async fn datasets_config_list(&self) -> Result { - Ok(self.storage().datasets_config_list().await?) - } - - async fn datasets_ensure( - &self, - config: DatasetsConfig, - ) -> Result { - info!(self.log, "datasets ensure"); - let datasets_result = self.storage().datasets_ensure(config).await?; - info!(self.log, "datasets ensure: Updated storage"); - - // TODO(https://github.com/oxidecomputer/omicron/issues/6177): - // At the moment, we don't actually remove any datasets -- this function - // just adds new datasets. - // - // Once we start removing old datasets, we should probably ensure that - // they are not longer in-use before returning (similar to - // omicron_physical_disks_ensure). - - Ok(datasets_result) - } - - /// Requests the set of physical disks currently managed by the Sled Agent. - /// - /// This should be contrasted by the set of disks in the inventory, which - /// may contain a slightly different set, if certain disks are not expected - /// to be in-use by the broader control plane. - pub async fn omicron_physical_disks_list( - &self, - ) -> Result { - Ok(self.storage().omicron_physical_disks_list().await?) - } - - /// Ensures that the specific set of Omicron Physical Disks are running - /// on this sled, and that no other disks are being used by the control - /// plane (with the exception of M.2s, which are always automatically - /// in-use). - async fn omicron_physical_disks_ensure( - &self, - config: OmicronPhysicalDisksConfig, - ) -> Result { - info!(self.log, "physical disks ensure"); - // Tell the storage subsystem which disks should be managed. - let disk_result = - self.storage().omicron_physical_disks_ensure(config).await?; - info!(self.log, "physical disks ensure: Updated storage"); - - // Grab a view of the latest set of disks, alongside a generation - // number. - // - // This generation is at LEAST as high as our last call through - // omicron_physical_disks_ensure. It may actually be higher, if a - // concurrent operation occurred. - // - // "latest_disks" has a generation number, which is important for other - // subcomponents of Sled Agent to consider. If multiple requests to - // ensure disks arrive concurrently, it's important to "only advance - // forward" as requested by Nexus. - // - // For example: if we receive the following requests concurrently: - // - Use Disks {A, B, C}, generation = 1 - // - Use Disks {A, B, C, D}, generation = 2 - // - // If we ignore generation numbers, it's possible that we start using - // "disk D" -- e.g., for instance filesystems -- and then immediately - // delete it when we process the request with "generation 1". - // - // By keeping these requests ordered, we prevent this thrashing, and - // ensure that we always progress towards the last-requested state. - let latest_disks = self.storage().get_latest_disks().await; - let our_gen = latest_disks.generation(); - info!(self.log, "physical disks ensure: Propagating new generation of disks"; "generation" => ?our_gen); - - // Ensure that the StorageMonitor, and the dump devices, have committed - // to start using new disks and stop using old ones. - self.inner.storage_monitor.await_generation(*our_gen).await?; - info!(self.log, "physical disks ensure: Updated storage monitor"); - - // Ensure that the ZoneBundler, if it was creating a bundle referencing - // the old U.2s, has stopped using them. - self.inner.zone_bundler.await_completion_of_prior_bundles().await; - info!(self.log, "physical disks ensure: Updated zone bundler"); - - // Ensure that all probes, at least after our call to - // "omicron_physical_disks_ensure", stop using any disks that - // may have been in-service from before that request. - self.inner.probes.use_only_these_disks(&latest_disks).await; - info!(self.log, "physical disks ensure: Updated probes"); - - // Do the same for instances - mark them failed if they were using - // expunged disks. - self.inner.instances.use_only_these_disks(latest_disks).await?; - info!(self.log, "physical disks ensure: Updated instances"); - - Ok(disk_result) - } - /// Ensures that the specific sets of disks, datasets, and zones specified /// by `config` are running. - /// - /// This method currently blocks while each of disks, datasets, and zones - /// are ensured in that order; a failure on one prevents any attempt to - /// ensure the subsequent step(s). pub async fn set_omicron_config( &self, config: OmicronSledConfig, - ) -> Result { - // Until the config-reconciler work lands: unpack the unified config - // into the three split configs for indepenedent ledgering. - let disks_config = OmicronPhysicalDisksConfig { - generation: config.generation, - disks: config.disks.into_iter().collect(), - }; - - let disks = self.omicron_physical_disks_ensure(disks_config).await?; - - // If we only had partial success deploying disks, don't proceed. - if disks.has_error() { - return Ok(OmicronSledConfigResult { - disks: disks.status, - datasets: Vec::new(), - }); - } - - let datasets_config = DatasetsConfig { - generation: config.generation, - datasets: config.datasets.into_iter().map(|d| (d.id, d)).collect(), - }; - - let datasets = self.datasets_ensure(datasets_config).await?; - - // If we only had partial success deploying datasets, don't proceed. - if datasets.has_error() { - return Ok(OmicronSledConfigResult { - disks: disks.status, - datasets: datasets.status, - }); - } - - let zones_config = OmicronZonesConfig { - generation: config.generation, - zones: config.zones.into_iter().collect(), - }; - - self.omicron_zones_ensure(zones_config).await?; - - Ok(OmicronSledConfigResult { - disks: disks.status, - datasets: datasets.status, - }) - } - - /// Ensures that the specific set of Omicron zones are running as configured - /// (and that no other zones are running) - async fn omicron_zones_ensure( - &self, - requested_zones: OmicronZonesConfig, - ) -> Result<(), Error> { - // TODO(https://github.com/oxidecomputer/omicron/issues/6043): - // - If these are the set of filesystems, we should also consider - // removing the ones which are not listed here. - // - It's probably worth sending a bulk request to the storage system, - // rather than requesting individual datasets. - for zone in &requested_zones.zones { - let Some(dataset_name) = zone.dataset_name() else { - continue; - }; - - // NOTE: This code will be deprecated by https://github.com/oxidecomputer/omicron/pull/7160 - // - // However, we need to ensure that all blueprints have datasets - // within them before we can remove this back-fill. - // - // Therefore, we do something hairy here: We ensure the filesystem - // exists, but don't specify any dataset UUID value. - // - // This means that: - // - If the dataset exists and has a UUID, this will be a no-op - // - If the dataset doesn't exist, it'll be created without its - // oxide:uuid zfs property set - // - If a subsequent call to "datasets_ensure" tries to set a UUID, - // it should be able to get set (once). - self.inner.storage.upsert_filesystem(None, dataset_name).await?; - } - - self.inner - .services - .ensure_all_omicron_zones_persistent(requested_zones) - .await?; - Ok(()) - } - - /// Gets the sled's current list of all zpools. - pub async fn zpools_get(&self) -> Vec { - self.inner - .storage - .get_latest_disks() - .await - .get_all_zpools() - .into_iter() - .map(|(name, variant)| Zpool { - id: name.id(), - disk_type: variant.into(), - }) - .collect() + ) -> Result, LedgerTaskError> { + self.inner.config_reconciler.set_sled_config(config).await } /// Returns whether or not the sled believes itself to be a scrimlet @@ -1180,7 +932,7 @@ impl SledAgent { todo!("Disk attachment not yet implemented"); } - pub fn artifact_store(&self) -> &ArtifactStore { + pub fn artifact_store(&self) -> &ArtifactStore { &self.inner.repo_depot.app_private() } @@ -1237,7 +989,18 @@ impl SledAgent { /// Gets the sled's current time synchronization state pub async fn timesync_get(&self) -> Result { - self.inner.services.timesync_get().await.map_err(Error::from) + let status = self.inner.config_reconciler.timesync_status(); + + // TODO-cleanup we could give a more specific error cause in the + // `FailedToGetSyncStatus` case. + match status { + TimeSyncStatus::NotYetChecked + | TimeSyncStatus::ConfiguredToSkip + | TimeSyncStatus::FailedToGetSyncStatus(_) => { + Err(Error::TimeNotSynchronized) + } + TimeSyncStatus::TimeSync(time_sync) => Ok(time_sync), + } } pub async fn ensure_scrimlet_host_ports( @@ -1302,8 +1065,15 @@ impl SledAgent { Ok(()) } - pub(crate) fn storage(&self) -> &StorageHandle { - &self.inner.storage + pub(crate) fn boot_image_raw_devfs_path( + &self, + slot: M2Slot, + ) -> Option>> { + self.inner + .config_reconciler + .internal_disks_rx() + .current() + .image_raw_devfs_path(slot) } pub(crate) fn boot_disk_os_writer(&self) -> &BootDiskOsWriter { @@ -1347,64 +1117,30 @@ impl SledAgent { let sled_role = if is_scrimlet { SledRole::Scrimlet } else { SledRole::Gimlet }; - let mut disks = vec![]; - let mut zpools = vec![]; - let mut datasets = vec![]; - let (all_disks, omicron_zones) = tokio::join!( - self.storage().get_latest_disks(), - self.inner.services.omicron_zones_list() - ); - for (identity, variant, slot, firmware) in all_disks.iter_all() { - disks.push(InventoryDisk { - identity: identity.clone(), - variant, - slot, - active_firmware_slot: firmware.active_slot(), - next_active_firmware_slot: firmware.next_active_slot(), - number_of_firmware_slots: firmware.number_of_slots(), - slot1_is_read_only: firmware.slot1_read_only(), - slot_firmware_versions: firmware.slots().to_vec(), - }); - } - for zpool in all_disks.all_u2_zpools() { - let info = - match illumos_utils::zpool::Zpool::get_info(&zpool.to_string()) - { - Ok(info) => info, - Err(err) => { - warn!( - self.log, - "Failed to access zpool info"; - "zpool" => %zpool, - "err" => %err - ); - continue; - } - }; - - zpools.push(InventoryZpool { - id: zpool.id(), - total_size: ByteCount::try_from(info.size())?, + let ReconcilerInventory { + disks, + zpools, + datasets, + ledgered_sled_config, + } = self.inner.config_reconciler.inventory(); + + // TODO this inventory needs to be more robust for the reconciler; + // this is nontrivial so coming in a subsequent PR. For now just + // backfill the existing inventory structure. + let omicron_physical_disks_generation = ledgered_sled_config + .as_ref() + .map(|sled_config| sled_config.generation) + .unwrap_or_else(Generation::new); + let omicron_zones = ledgered_sled_config + .map(|sled_config| OmicronZonesConfig { + generation: sled_config.generation, + zones: sled_config.zones.into_iter().collect(), + }) + .unwrap_or_else(|| OmicronZonesConfig { + generation: Generation::new(), + zones: Vec::new(), }); - let inv_props = - match self.storage().datasets_list(zpool.clone()).await { - Ok(props) => props - .into_iter() - .map(|prop| InventoryDataset::from(prop)), - Err(err) => { - warn!( - self.log, - "Failed to access dataset info within zpool"; - "zpool" => %zpool, - "err" => %err - ); - continue; - } - }; - datasets.extend(inv_props); - } - Ok(Inventory { sled_id, sled_agent_address, @@ -1417,7 +1153,7 @@ impl SledAgent { disks, zpools, datasets, - omicron_physical_disks_generation: *all_disks.generation(), + omicron_physical_disks_generation, }) } @@ -1573,3 +1309,77 @@ pub async fn sled_add( info!(log, "Peer agent initialized"; "peer_bootstrap_addr" => %bootstrap_addr, "peer_id" => %baseboard); Ok(()) } + +struct ReconcilerFacilities { + service_manager: ServiceManager, + metrics_queue: MetricsRequestQueue, + zone_bundler: ZoneBundler, +} + +impl SledAgentFacilities for ReconcilerFacilities { + type StartZoneError = services::Error; + type MetricsUntrackZoneLinksError = Vec; + type ZoneBundleCreateError = BundleError; + + async fn on_time_sync(&self) { + self.service_manager.on_time_sync().await + } + + async fn start_omicron_zone( + &self, + zone_config: &OmicronZoneConfig, + mount_config: &MountConfig, + is_time_synchronized: bool, + all_u2_pools: &[ZpoolName], + ) -> Result { + self.service_manager + .start_omicron_zone( + mount_config, + zone_config, + is_time_synchronized, + all_u2_pools, + ) + .await + } + + fn metrics_untrack_zone_links( + &self, + zone: &RunningZone, + ) -> Result<(), Self::MetricsUntrackZoneLinksError> { + self.metrics_queue.untrack_zone_links(zone) + } + + fn ddm_add_internal_dns_prefix(&self, prefix: Ipv6Subnet) { + self.service_manager.ddm_reconciler().add_internal_dns_subnet(prefix); + } + + fn ddm_remove_internal_dns_prefix(&self, prefix: Ipv6Subnet) { + self.service_manager + .ddm_reconciler() + .remove_internal_dns_subnet(prefix); + } + + async fn zone_bundle_create( + &self, + zone: &RunningZone, + cause: ZoneBundleCause, + ) -> Result<(), Self::ZoneBundleCreateError> { + self.zone_bundler.create(zone, cause).await?; + Ok(()) + } +} + +// Workaround wrapper for orphan rules. +struct SledAgentArtifactStoreWrapper(Arc>); + +impl SledAgentArtifactStore for SledAgentArtifactStoreWrapper { + type ArtifactExistsValidationError = artifact_store::Error; + + async fn validate_artifact_exists_in_storage( + &self, + artifact: ArtifactHash, + ) -> Result<(), Self::ArtifactExistsValidationError> { + self.0.get(artifact).await?; + Ok(()) + } +} diff --git a/sled-agent/src/storage_monitor.rs b/sled-agent/src/storage_monitor.rs deleted file mode 100644 index 626d81d54ff..00000000000 --- a/sled-agent/src/storage_monitor.rs +++ /dev/null @@ -1,116 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! A task that listens for storage events from [`sled_storage::manager::StorageManager`] -//! and dispatches them to other parts of the bootstrap agent and sled agent -//! code. - -use omicron_common::api::external::Generation; -use sled_agent_config_reconciler::dump_setup::DumpSetup; -use sled_storage::config::MountConfig; -use sled_storage::manager::StorageHandle; -use sled_storage::resources::AllDisks; -use slog::Logger; -use std::sync::Arc; -use tokio::sync::watch; - -#[derive(thiserror::Error, Debug)] -pub enum Error { - #[error("Storage Monitor no longer running")] - NotRunning, -} - -pub struct StorageMonitor { - log: Logger, - storage_manager: StorageHandle, - - // Invokes dumpadm(8) and savecore(8) when new disks are encountered - dump_setup: DumpSetup, - - tx: watch::Sender, -} - -/// Emits status about storage monitoring. -#[derive(Debug, Clone)] -pub struct StorageMonitorStatus { - /// The latest generation of physical disks to be processed - /// by the storage monitor. - pub latest_gen: Option, -} - -impl StorageMonitorStatus { - fn new() -> Self { - Self { latest_gen: None } - } -} - -#[derive(Clone)] -pub struct StorageMonitorHandle { - rx: watch::Receiver, -} - -impl StorageMonitorHandle { - pub async fn await_generation( - &self, - wanted: Generation, - ) -> Result<(), Error> { - self.rx - .clone() - .wait_for(|status| { - let Some(observed) = status.latest_gen else { - return false; - }; - return observed >= wanted; - }) - .await - .map_err(|_| Error::NotRunning)?; - Ok(()) - } -} - -impl StorageMonitor { - pub fn new( - log: &Logger, - mount_config: MountConfig, - storage_manager: StorageHandle, - ) -> (StorageMonitor, StorageMonitorHandle) { - let dump_setup = DumpSetup::new(&log, Arc::new(mount_config)); - let log = log.new(o!("component" => "StorageMonitor")); - let (tx, rx) = watch::channel(StorageMonitorStatus::new()); - ( - StorageMonitor { log, storage_manager, dump_setup, tx }, - StorageMonitorHandle { rx }, - ) - } - - /// Run the main receive loop of the `StorageMonitor` - /// - /// This should be spawned into a tokio task - pub async fn run(mut self) { - loop { - tokio::select! { - disks = self.storage_manager.wait_for_changes() => { - info!( - self.log, - "Received storage manager update"; - "disks" => ?disks - ); - self.handle_resource_update(disks).await; - } - } - } - } - - async fn handle_resource_update(&mut self, updated_disks: AllDisks) { - let generation = updated_disks.generation(); - self.dump_setup - .update_dumpdev_setup( - updated_disks.iter_managed().map(|(_id, disk)| disk), - ) - .await; - self.tx.send_replace(StorageMonitorStatus { - latest_gen: Some(*generation), - }); - } -} diff --git a/sled-agent/src/support_bundle/logs.rs b/sled-agent/src/support_bundle/logs.rs index 268f2b64835..93da8c91f66 100644 --- a/sled-agent/src/support_bundle/logs.rs +++ b/sled-agent/src/support_bundle/logs.rs @@ -7,7 +7,7 @@ use camino_tempfile::tempfile_in; use dropshot::HttpError; use range_requests::make_get_response; -use sled_storage::manager::StorageHandle; +use sled_agent_config_reconciler::InternalDisksReceiver; use slog::Logger; use slog_error_chain::InlineErrorChain; use tokio::io::AsyncSeekExt; @@ -43,12 +43,15 @@ impl From for HttpError { pub struct SupportBundleLogs<'a> { log: &'a Logger, - sled_storage: &'a StorageHandle, + internal_disks_rx: &'a InternalDisksReceiver, } impl<'a> SupportBundleLogs<'a> { - pub fn new(log: &'a Logger, sled_storage: &'a StorageHandle) -> Self { - Self { log, sled_storage } + pub fn new( + log: &'a Logger, + internal_disks_rx: &'a InternalDisksReceiver, + ) -> Self { + Self { log, internal_disks_rx } } /// Get a list of zones on a sled containing logs that we want to include in @@ -77,12 +80,9 @@ impl<'a> SupportBundleLogs<'a> { { // We are using an M.2 device for temporary storage to assemble a zip // file made up of all of the discovered zone's logs. - let m2_debug_datasets = self - .sled_storage - .get_latest_disks() - .await - .all_sled_diagnostics_directories(); - let tempdir = m2_debug_datasets.first().ok_or(Error::MissingStorage)?; + let current_internal_disks = self.internal_disks_rx.current(); + let mut m2_debug_datasets = current_internal_disks.all_debug_datasets(); + let tempdir = m2_debug_datasets.next().ok_or(Error::MissingStorage)?; let mut tempfile = tempfile_in(tempdir)?; let log = self.log.clone(); diff --git a/sled-agent/src/support_bundle/storage.rs b/sled-agent/src/support_bundle/storage.rs index 9101e090725..dc65f95e718 100644 --- a/sled-agent/src/support_bundle/storage.rs +++ b/sled-agent/src/support_bundle/storage.rs @@ -14,6 +14,7 @@ use futures::Stream; use futures::StreamExt; use illumos_utils::zfs::DatasetProperties; use omicron_common::api::external::Error as ExternalError; +use omicron_common::api::external::Generation; use omicron_common::disk::CompressionAlgorithm; use omicron_common::disk::DatasetConfig; use omicron_common::disk::DatasetName; @@ -28,15 +29,16 @@ use range_requests::PotentialRange; use range_requests::SingleRange; use sha2::{Digest, Sha256}; use sled_agent_api::*; +use sled_agent_config_reconciler::ConfigReconcilerHandle; +use sled_agent_config_reconciler::DatasetTaskError; use sled_agent_types::support_bundle::BUNDLE_FILE_NAME; use sled_agent_types::support_bundle::BUNDLE_TMP_FILE_NAME_SUFFIX; use sled_storage::manager::NestedDatasetConfig; use sled_storage::manager::NestedDatasetListOptions; use sled_storage::manager::NestedDatasetLocation; -use sled_storage::manager::StorageHandle; use slog::Logger; use slog_error_chain::InlineErrorChain; -use std::borrow::Cow; +use std::collections::BTreeMap; use std::io::Write; use tokio::io::AsyncReadExt; use tokio::io::AsyncSeekExt; @@ -86,6 +88,9 @@ pub enum Error { #[error(transparent)] Zip(#[from] ZipError), + + #[error(transparent)] + DatasetTask(#[from] DatasetTaskError), } fn err_str(err: &dyn std::error::Error) -> String { @@ -144,7 +149,6 @@ pub trait LocalStorage: Sync { async fn dyn_ensure_mounted_and_get_mountpoint( &self, dataset: NestedDatasetLocation, - mount_root: &Utf8Path, ) -> Result; /// Returns all nested datasets within an existing dataset @@ -165,19 +169,29 @@ pub trait LocalStorage: Sync { &self, name: NestedDatasetLocation, ) -> Result<(), Error>; - - /// Returns the root filesystem path where datasets are mounted. - /// - /// This is typically "/" in prod, but can be a temporary directory - /// for tests to isolate storage that typically appears globally. - fn zpool_mountpoint_root(&self) -> Cow; } /// This implementation is effectively a pass-through to the real methods #[async_trait] -impl LocalStorage for StorageHandle { +impl LocalStorage for ConfigReconcilerHandle { async fn dyn_datasets_config_list(&self) -> Result { - self.datasets_config_list().await.map_err(|err| err.into()) + // TODO-cleanup This is super gross; add a better API (maybe fetch a + // single dataset by ID, since that's what our caller wants?) + Ok(self + .inventory() + .ledgered_sled_config + .map(|sled_config| DatasetsConfig { + generation: sled_config.generation, + datasets: sled_config + .datasets + .into_iter() + .map(|d| (d.id, d)) + .collect(), + }) + .unwrap_or_else(|| DatasetsConfig { + generation: Generation::new(), + datasets: BTreeMap::new(), + })) } fn dyn_dataset_get( @@ -204,12 +218,10 @@ impl LocalStorage for StorageHandle { async fn dyn_ensure_mounted_and_get_mountpoint( &self, dataset: NestedDatasetLocation, - mount_root: &Utf8Path, ) -> Result { - dataset - .ensure_mounted_and_get_mountpoint(mount_root) + self.nested_dataset_ensure_mounted(dataset) .await - .map_err(Error::from) + .map_err(|err| err.into()) } async fn dyn_nested_dataset_list( @@ -233,10 +245,6 @@ impl LocalStorage for StorageHandle { ) -> Result<(), Error> { self.nested_dataset_destroy(name).await.map_err(|err| err.into()) } - - fn zpool_mountpoint_root(&self) -> Cow { - Cow::Borrowed(self.mount_config().root.as_path()) - } } /// This implementation allows storage bundles to be stored on simulated storage @@ -256,10 +264,10 @@ impl LocalStorage for crate::sim::Storage { async fn dyn_ensure_mounted_and_get_mountpoint( &self, dataset: NestedDatasetLocation, - mount_root: &Utf8Path, ) -> Result { + let slf = self.lock(); // Simulated storage treats all datasets as mounted. - Ok(dataset.mountpoint(mount_root)) + Ok(dataset.mountpoint(slf.root())) } async fn dyn_nested_dataset_list( @@ -283,10 +291,6 @@ impl LocalStorage for crate::sim::Storage { ) -> Result<(), Error> { self.lock().nested_dataset_destroy(name).map_err(|err| err.into()) } - - fn zpool_mountpoint_root(&self) -> Cow { - Cow::Owned(self.lock().root().to_path_buf()) - } } /// Describes the type of access to the support bundle @@ -511,10 +515,7 @@ impl<'a> SupportBundleManager<'a> { // The dataset for a support bundle exists. let support_bundle_path = self .storage - .dyn_ensure_mounted_and_get_mountpoint( - dataset.name, - &self.storage.zpool_mountpoint_root(), - ) + .dyn_ensure_mounted_and_get_mountpoint(dataset.name) .await? .join(BUNDLE_FILE_NAME); @@ -624,13 +625,8 @@ impl<'a> SupportBundleManager<'a> { info!(log, "Dataset does exist for bundle"); // The mounted root of the support bundle dataset - let support_bundle_dir = self - .storage - .dyn_ensure_mounted_and_get_mountpoint( - dataset, - &self.storage.zpool_mountpoint_root(), - ) - .await?; + let support_bundle_dir = + self.storage.dyn_ensure_mounted_and_get_mountpoint(dataset).await?; let support_bundle_path = support_bundle_dir.join(BUNDLE_FILE_NAME); let support_bundle_path_tmp = support_bundle_dir.join(format!( "{}-{BUNDLE_TMP_FILE_NAME_SUFFIX}", @@ -736,13 +732,8 @@ impl<'a> SupportBundleManager<'a> { NestedDatasetLocation { path: support_bundle_id.to_string(), root }; // The mounted root of the support bundle dataset - let support_bundle_dir = self - .storage - .dyn_ensure_mounted_and_get_mountpoint( - dataset, - &self.storage.zpool_mountpoint_root(), - ) - .await?; + let support_bundle_dir = + self.storage.dyn_ensure_mounted_and_get_mountpoint(dataset).await?; let path = support_bundle_dir.join(BUNDLE_FILE_NAME); let f = tokio::fs::File::open(&path).await?; @@ -943,11 +934,80 @@ mod tests { use omicron_common::disk::DatasetsConfig; use omicron_common::zpool_name::ZpoolName; use omicron_test_utils::dev::test_setup_log; + use sled_storage::manager::StorageHandle; use sled_storage::manager_test_harness::StorageManagerTestHarness; use std::collections::BTreeMap; use zip::ZipWriter; use zip::write::SimpleFileOptions; + // TODO-cleanup Should we rework these tests to not use StorageHandle (real + // code now goes through `ConfigReconcilerHandle`)? + #[async_trait] + impl LocalStorage for StorageHandle { + async fn dyn_datasets_config_list( + &self, + ) -> Result { + self.datasets_config_list().await.map_err(|err| err.into()) + } + + fn dyn_dataset_get( + &self, + dataset_name: &String, + ) -> Result { + let Some(dataset) = + illumos_utils::zfs::Zfs::get_dataset_properties( + &[dataset_name.clone()], + illumos_utils::zfs::WhichDatasets::SelfOnly, + ) + .map_err(|err| Error::DatasetLookup(err))? + .pop() + else { + // This should not be possible, unless the "zfs get" command is + // behaving unpredictably. We're only asking for a single dataset, + // so on success, we should see the result of that dataset. + return Err(Error::DatasetLookup(anyhow::anyhow!( + "Zfs::get_dataset_properties returned an empty vec?" + ))); + }; + + Ok(dataset) + } + + async fn dyn_ensure_mounted_and_get_mountpoint( + &self, + dataset: NestedDatasetLocation, + ) -> Result { + dataset + .ensure_mounted_and_get_mountpoint(&self.mount_config().root) + .await + .map_err(|err| err.into()) + } + + async fn dyn_nested_dataset_list( + &self, + name: NestedDatasetLocation, + options: NestedDatasetListOptions, + ) -> Result, Error> { + self.nested_dataset_list(name, options) + .await + .map_err(|err| err.into()) + } + + async fn dyn_nested_dataset_ensure( + &self, + config: NestedDatasetConfig, + ) -> Result<(), Error> { + self.nested_dataset_ensure(config).await.map_err(|err| err.into()) + } + + async fn dyn_nested_dataset_destroy( + &self, + name: NestedDatasetLocation, + ) -> Result<(), Error> { + self.nested_dataset_destroy(name).await.map_err(|err| err.into()) + } + } + struct SingleU2StorageHarness { storage_test_harness: StorageManagerTestHarness, zpool_id: ZpoolUuid, diff --git a/sled-agent/src/zone_bundle.rs b/sled-agent/src/zone_bundle.rs index 982aaf5d719..e06eb80f7d6 100644 --- a/sled-agent/src/zone_bundle.rs +++ b/sled-agent/src/zone_bundle.rs @@ -27,9 +27,9 @@ use illumos_utils::zfs::Snapshot; use illumos_utils::zfs::ZFS; use illumos_utils::zfs::Zfs; use illumos_utils::zone::AdmError; +use sled_agent_config_reconciler::AvailableDatasetsReceiver; +use sled_agent_config_reconciler::InternalDisksReceiver; use sled_agent_types::zone_bundle::*; -use sled_storage::dataset::U2_DEBUG_DATASET; -use sled_storage::manager::StorageHandle; use slog::Logger; use std::collections::BTreeMap; use std::collections::BTreeSet; @@ -136,7 +136,8 @@ pub struct ZoneBundler { // State shared between tasks, e.g., used when creating a bundle in different // tasks or between a creation and cleanup. struct Inner { - storage_handle: StorageHandle, + internal_disks_rx: InternalDisksReceiver, + available_datasets_rx: AvailableDatasetsReceiver, cleanup_context: CleanupContext, last_cleanup_at: Instant, } @@ -164,11 +165,11 @@ impl Inner { // that can exist but do not, i.e., those whose parent datasets already // exist; and returns those. async fn bundle_directories(&self) -> Vec { - let resources = self.storage_handle.get_latest_disks().await; // NOTE: These bundle directories are always stored on M.2s, so we don't // need to worry about synchronizing with U.2 disk expungement at the // callsite. - let expected = resources.all_zone_bundle_directories(); + let internal_disks = self.internal_disks_rx.current(); + let expected = internal_disks.all_zone_bundle_directories(); let mut out = Vec::with_capacity(expected.len()); for each in expected.into_iter() { if tokio::fs::create_dir_all(&each).await.is_ok() { @@ -233,7 +234,8 @@ impl ZoneBundler { /// to clean them up to free up space. pub fn new( log: Logger, - storage_handle: StorageHandle, + internal_disks_rx: InternalDisksReceiver, + available_datasets_rx: AvailableDatasetsReceiver, cleanup_context: CleanupContext, ) -> Self { // This is compiled out in tests because there's no way to set our @@ -251,7 +253,8 @@ impl ZoneBundler { .expect("Failed to initialize existing ZFS resources"); let notify_cleanup = Arc::new(Notify::new()); let inner = Arc::new(Mutex::new(Inner { - storage_handle, + internal_disks_rx, + available_datasets_rx, cleanup_context, last_cleanup_at: Instant::now(), })); @@ -352,9 +355,9 @@ impl ZoneBundler { // prior bundles have completed. let inner = self.inner.lock().await; let storage_dirs = inner.bundle_directories().await; - let resources = inner.storage_handle.get_latest_disks().await; - let extra_log_dirs = resources - .all_u2_mountpoints(U2_DEBUG_DATASET) + let extra_log_dirs = inner + .available_datasets_rx + .all_mounted_debug_datasets() .into_iter() .map(|pool_path| pool_path.path) .collect(); @@ -1758,7 +1761,10 @@ mod illumos_tests { use chrono::TimeZone; use chrono::Timelike; use chrono::Utc; + use omicron_common::disk::DiskIdentity; use rand::RngCore; + use sled_agent_config_reconciler::AvailableDatasetsReceiver; + use sled_agent_config_reconciler::InternalDisksReceiver; use sled_storage::manager_test_harness::StorageManagerTestHarness; use slog::Drain; use slog::Logger; @@ -1883,9 +1889,38 @@ mod illumos_tests { let log = test_logger(); let context = CleanupContext::default(); let resource_wrapper = ResourceWrapper::new(&log).await; + let handle = resource_wrapper.storage_test_harness.handle(); + let all_disks = handle.get_latest_disks().await; + + // Convert from StorageManagerTestHarness to config-reconciler channels. + // Do we want to expand config-reconciler test support and not use + // StorageManagerTestHarness? + let internal_disks_rx = InternalDisksReceiver::fake_static( + Arc::new(all_disks.mount_config().clone()), + all_disks.all_m2_zpools().into_iter().enumerate().map( + |(i, zpool)| { + ( + DiskIdentity { + vendor: format!("test-vendor-{i}"), + model: format!("test-model-{i}"), + serial: format!("test-serial-{i}"), + }, + zpool, + ) + }, + ), + ); + let available_datasets_rx = AvailableDatasetsReceiver::fake_static( + all_disks + .all_m2_zpools() + .into_iter() + .zip(all_disks.all_m2_mountpoints(".")), + ); + let bundler = ZoneBundler::new( log, - resource_wrapper.storage_test_harness.handle().clone(), + internal_disks_rx, + available_datasets_rx, context, ); Ok(CleanupTestContext {