From 5dd2bcb2df214f88ee9dda399beefd12ca822918 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 21 Jun 2023 12:16:31 -0700 Subject: [PATCH 01/12] Unify services and datasets, external and internal DNS are datasets too --- Cargo.lock | 1 + common/src/sql/dbinit.sql | 7 +- illumos-utils/Cargo.toml | 1 + illumos-utils/src/zpool.rs | 90 ++------- nexus/db-model/src/dataset_kind.rs | 8 + nexus/db-model/src/service_kind.rs | 12 ++ nexus/types/src/internal_api/params.rs | 29 ++- openapi/nexus-internal.json | 74 ++++++-- openapi/sled-agent.json | 101 +++++------ sled-agent/src/http_entrypoints.rs | 29 +-- sled-agent/src/params.rs | 84 +++------ sled-agent/src/rack_setup/plan/service.rs | 148 +++++++++------ sled-agent/src/rack_setup/service.rs | 211 ++++++++++------------ sled-agent/src/services.rs | 113 +----------- sled-agent/src/sled_agent.rs | 68 +++---- sled-agent/src/storage_manager.rs | 39 ++-- smf/external-dns/config.toml | 2 +- smf/internal-dns/config.toml | 2 +- 18 files changed, 425 insertions(+), 594 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1965ee06425..e7ac41acc71 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3174,6 +3174,7 @@ dependencies = [ "oxide-vpc", "schemars", "serde", + "serde_json", "slog", "smf", "thiserror", diff --git a/common/src/sql/dbinit.sql b/common/src/sql/dbinit.sql index fa7ec0b1c9f..e4ded8d82b8 100644 --- a/common/src/sql/dbinit.sql +++ b/common/src/sql/dbinit.sql @@ -186,6 +186,9 @@ CREATE INDEX ON omicron.public.switch ( */ CREATE TYPE omicron.public.service_kind AS ENUM ( + 'crucible', + 'cockroach', + 'clickhouse', 'crucible_pantry', 'dendrite', 'external_dns', @@ -385,7 +388,9 @@ CREATE TABLE omicron.public.Zpool ( CREATE TYPE omicron.public.dataset_kind AS ENUM ( 'crucible', 'cockroach', - 'clickhouse' + 'clickhouse', + 'external_dns', + 'internal_dns' ); /* diff --git a/illumos-utils/Cargo.toml b/illumos-utils/Cargo.toml index 2ad76d579f3..79b7d20e4b4 100644 --- a/illumos-utils/Cargo.toml +++ b/illumos-utils/Cargo.toml @@ -34,6 +34,7 @@ opte-ioctl.workspace = true [dev-dependencies] mockall.workspace = true +serde_json.workspace = true toml.workspace = true [features] diff --git a/illumos-utils/src/zpool.rs b/illumos-utils/src/zpool.rs index 2560e68eba9..38f20e7dcd6 100644 --- a/illumos-utils/src/zpool.rs +++ b/illumos-utils/src/zpool.rs @@ -7,7 +7,7 @@ use crate::{execute, ExecutionError, PFEXEC}; use camino::{Utf8Path, Utf8PathBuf}; use schemars::JsonSchema; -use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use serde::{Deserialize, Serialize}; use std::fmt; use std::str::FromStr; use uuid::Uuid; @@ -271,7 +271,9 @@ impl Zpool { } } -#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, JsonSchema)] +#[derive( + Copy, Clone, Debug, Hash, PartialEq, Eq, JsonSchema, Serialize, Deserialize, +)] #[serde(rename_all = "snake_case")] pub enum ZpoolKind { // This zpool is used for external storage (u.2) @@ -284,7 +286,9 @@ pub enum ZpoolKind { /// /// This expects that the format will be: `ox{i,p}_` - we parse the prefix /// when reading the structure, and validate that the UUID can be utilized. -#[derive(Clone, Debug, Hash, PartialEq, Eq, JsonSchema)] +#[derive( + Clone, Debug, Hash, PartialEq, Eq, JsonSchema, Serialize, Deserialize, +)] pub struct ZpoolName { id: Uuid, kind: ZpoolKind, @@ -323,25 +327,6 @@ impl ZpoolName { } } -impl<'de> Deserialize<'de> for ZpoolName { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - let s = String::deserialize(deserializer)?; - ZpoolName::from_str(&s).map_err(serde::de::Error::custom) - } -} - -impl Serialize for ZpoolName { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - serializer.serialize_str(&self.to_string()) - } -} - impl FromStr for ZpoolName { type Err = String; @@ -374,60 +359,15 @@ impl fmt::Display for ZpoolName { mod test { use super::*; - fn toml_string(s: &str) -> String { - format!("zpool_name = \"{}\"", s) - } - - fn parse_name(s: &str) -> Result { - toml_string(s) - .parse::() - .expect("Cannot parse as TOML value") - .get("zpool_name") - .expect("Missing key") - .clone() - .try_into::() - } - - #[test] - fn test_parse_external_zpool_name() { - let uuid: Uuid = - "d462a7f7-b628-40fe-80ff-4e4189e2d62b".parse().unwrap(); - let good_name = format!("{}{}", ZPOOL_EXTERNAL_PREFIX, uuid); - - let name = parse_name(&good_name).expect("Cannot parse as ZpoolName"); - assert_eq!(uuid, name.id()); - assert_eq!(ZpoolKind::External, name.kind()); - } - #[test] - fn test_parse_internal_zpool_name() { - let uuid: Uuid = - "d462a7f7-b628-40fe-80ff-4e4189e2d62b".parse().unwrap(); - let good_name = format!("{}{}", ZPOOL_INTERNAL_PREFIX, uuid); - - let name = parse_name(&good_name).expect("Cannot parse as ZpoolName"); - assert_eq!(uuid, name.id()); - assert_eq!(ZpoolKind::Internal, name.kind()); - } - - #[test] - fn test_parse_bad_zpool_names() { - let bad_names = vec![ - // Nonsense string - "this string is GARBAGE", - // Missing prefix - "d462a7f7-b628-40fe-80ff-4e4189e2d62b", - // Underscores - "oxp_d462a7f7_b628_40fe_80ff_4e4189e2d62b", - ]; - - for bad_name in &bad_names { - assert!( - parse_name(&bad_name).is_err(), - "Parsing {} should fail", - bad_name - ); - } + fn test_parse_zpool_name_json() { + let _zpool_name: ZpoolName = serde_json::from_str( + r#"{ + "id": "d462a7f7-b628-40fe-80ff-4e4189e2d62b", + "kind": "external" + }"#, + ) + .expect("Could not parse ZpoolName from Json Object"); } #[test] diff --git a/nexus/db-model/src/dataset_kind.rs b/nexus/db-model/src/dataset_kind.rs index e2c0510ab3d..f4c6a5eee66 100644 --- a/nexus/db-model/src/dataset_kind.rs +++ b/nexus/db-model/src/dataset_kind.rs @@ -19,6 +19,8 @@ impl_enum_type!( Crucible => b"crucible" Cockroach => b"cockroach" Clickhouse => b"clickhouse" + ExternalDns => b"external_dns" + InternalDns => b"internal_dns" ); impl From for DatasetKind { @@ -33,6 +35,12 @@ impl From for DatasetKind { internal_api::params::DatasetKind::Clickhouse => { DatasetKind::Clickhouse } + internal_api::params::DatasetKind::ExternalDns => { + DatasetKind::ExternalDns + } + internal_api::params::DatasetKind::InternalDns => { + DatasetKind::InternalDns + } } } } diff --git a/nexus/db-model/src/service_kind.rs b/nexus/db-model/src/service_kind.rs index 3be06c12057..afb29abaa79 100644 --- a/nexus/db-model/src/service_kind.rs +++ b/nexus/db-model/src/service_kind.rs @@ -17,6 +17,9 @@ impl_enum_type!( pub enum ServiceKind; // Enum values + Clickhouse => b"clickhouse" + Cockroach => b"cockroach" + Crucible => b"crucible" CruciblePantry => b"crucible_pantry" Dendrite => b"dendrite" ExternalDns => b"external_dns" @@ -48,6 +51,15 @@ impl From for ServiceKind { impl From for ServiceKind { fn from(k: internal_api::params::ServiceKind) -> Self { match k { + internal_api::params::ServiceKind::Clickhouse => { + ServiceKind::Clickhouse + } + internal_api::params::ServiceKind::Cockroach => { + ServiceKind::Cockroach + } + internal_api::params::ServiceKind::Crucible => { + ServiceKind::Crucible + } internal_api::params::ServiceKind::ExternalDns { .. } => { ServiceKind::ExternalDns } diff --git a/nexus/types/src/internal_api/params.rs b/nexus/types/src/internal_api/params.rs index dd45e16a1c5..9b3a5afecbd 100644 --- a/nexus/types/src/internal_api/params.rs +++ b/nexus/types/src/internal_api/params.rs @@ -17,7 +17,6 @@ use std::fmt; use std::net::IpAddr; use std::net::SocketAddr; use std::net::SocketAddrV6; -use std::str::FromStr; use uuid::Uuid; /// Describes the role of the sled within the rack. @@ -134,6 +133,8 @@ pub enum DatasetKind { Crucible, Cockroach, Clickhouse, + ExternalDns, + InternalDns, } impl fmt::Display for DatasetKind { @@ -143,27 +144,13 @@ impl fmt::Display for DatasetKind { Crucible => "crucible", Cockroach => "cockroach", Clickhouse => "clickhouse", + ExternalDns => "external_dns", + InternalDns => "internal_dns", }; write!(f, "{}", s) } } -impl FromStr for DatasetKind { - type Err = omicron_common::api::external::Error; - - fn from_str(s: &str) -> Result { - use DatasetKind::*; - match s { - "crucible" => Ok(Crucible), - "cockroach" => Ok(Cockroach), - "clickhouse" => Ok(Clickhouse), - _ => Err(Self::Err::InternalError { - internal_message: format!("Unknown dataset kind: {}", s), - }), - } - } -} - /// Describes a dataset within a pool. #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] pub struct DatasetPutRequest { @@ -188,13 +175,16 @@ pub struct ServiceNic { #[derive(Debug, Serialize, Deserialize, JsonSchema, Clone, PartialEq, Eq)] #[serde(rename_all = "snake_case", tag = "type", content = "content")] pub enum ServiceKind { + Clickhouse, + Cockroach, + Crucible, + CruciblePantry, ExternalDns { external_address: IpAddr, nic: ServiceNic }, InternalDns, Nexus { external_address: IpAddr, nic: ServiceNic }, Oximeter, Dendrite, Tfport, - CruciblePantry, BoundaryNtp { snat: SourceNatConfig, nic: ServiceNic }, InternalNtp, } @@ -203,6 +193,9 @@ impl fmt::Display for ServiceKind { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { use ServiceKind::*; let s = match self { + Clickhouse => "clickhouse", + Cockroach => "cockroach", + Crucible => "crucible", ExternalDns { .. } => "external_dns", InternalDns => "internal_dns", Nexus { .. } => "nexus", diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 656c876bf97..a8f414cf30f 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -931,7 +931,9 @@ "enum": [ "crucible", "cockroach", - "clickhouse" + "clickhouse", + "external_dns", + "internal_dns" ] }, "DatasetPutRequest": { @@ -2774,6 +2776,62 @@ "ServiceKind": { "description": "Describes the purpose of the service.", "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "clickhouse" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "cockroach" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "crucible" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "crucible_pantry" + ] + } + }, + "required": [ + "type" + ] + }, { "type": "object", "properties": { @@ -2892,20 +2950,6 @@ "type" ] }, - { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "crucible_pantry" - ] - } - }, - "required": [ - "type" - ] - }, { "type": "object", "properties": { diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 8f45de2e6c5..de323de0475 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -54,32 +54,6 @@ } } }, - "/filesystem": { - "put": { - "operationId": "filesystem_put", - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/DatasetEnsureBody" - } - } - }, - "required": true - }, - "responses": { - "204": { - "description": "resource updated" - }, - "4XX": { - "$ref": "#/components/responses/Error" - }, - "5XX": { - "$ref": "#/components/responses/Error" - } - } - } - }, "/instances/{instance_id}": { "put": { "operationId": "instance_register", @@ -607,32 +581,6 @@ "target" ] }, - "DatasetEnsureBody": { - "description": "Used to request a new dataset kind exists within a zpool.\n\nMany dataset types are associated with services that will be instantiated when the dataset is detected.", - "type": "object", - "properties": { - "address": { - "type": "string" - }, - "dataset_kind": { - "$ref": "#/components/schemas/DatasetKind" - }, - "id": { - "type": "string", - "format": "uuid" - }, - "zpool_id": { - "type": "string", - "format": "uuid" - } - }, - "required": [ - "address", - "dataset_kind", - "id", - "zpool_id" - ] - }, "DatasetKind": { "description": "The type of a dataset, and an auxiliary information necessary to successfully launch a zone managing the associated data.", "oneOf": [ @@ -677,6 +625,34 @@ "required": [ "type" ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "external_dns" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "internal_dns" + ] + } + }, + "required": [ + "type" + ] } ] }, @@ -695,6 +671,23 @@ "pool_name" ] }, + "DatasetRequest": { + "description": "Describes a request to provision a specific dataset", + "type": "object", + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "name": { + "$ref": "#/components/schemas/DatasetName" + } + }, + "required": [ + "id", + "name" + ] + }, "DiskEnsureBody": { "description": "Sent from to a sled agent to establish the runtime state of a Disk", "type": "object", @@ -1736,7 +1729,7 @@ "pattern": "^(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)(?:-((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+([0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$" }, "ServiceEnsureBody": { - "description": "Used to request that the Sled initialize multiple services.\n\nThis may be used to record that certain sleds are responsible for launching services which may not be associated with a dataset, such as Nexus.", + "description": "Used to request that the Sled initialize multiple services.", "type": "object", "properties": { "services": { @@ -2018,7 +2011,7 @@ "default": null, "allOf": [ { - "$ref": "#/components/schemas/DatasetName" + "$ref": "#/components/schemas/DatasetRequest" } ] }, diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index 6d81d1b2d49..ac83db25812 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -5,10 +5,9 @@ //! HTTP entrypoint functions for the sled agent's exposed API use crate::params::{ - DatasetEnsureBody, DiskEnsureBody, InstanceEnsureBody, - InstancePutMigrationIdsBody, InstancePutStateBody, - InstancePutStateResponse, InstanceUnregisterResponse, ServiceEnsureBody, - SledRole, TimeSync, VpcFirewallRulesEnsureBody, Zpool, + DiskEnsureBody, InstanceEnsureBody, InstancePutMigrationIdsBody, + InstancePutStateBody, InstancePutStateResponse, InstanceUnregisterResponse, + ServiceEnsureBody, SledRole, TimeSync, VpcFirewallRulesEnsureBody, Zpool, }; use dropshot::{ endpoint, ApiDescription, HttpError, HttpResponseOk, @@ -31,7 +30,6 @@ type SledApiDescription = ApiDescription; pub fn api() -> SledApiDescription { fn register_endpoints(api: &mut SledApiDescription) -> Result<(), String> { api.register(disk_put)?; - api.register(filesystem_put)?; api.register(instance_issue_disk_snapshot_request)?; api.register(instance_put_migration_ids)?; api.register(instance_put_state)?; @@ -109,27 +107,6 @@ async fn sled_role_get( Ok(HttpResponseOk(sa.get_role().await)) } -#[endpoint { - method = PUT, - path = "/filesystem", -}] -async fn filesystem_put( - rqctx: RequestContext, - body: TypedBody, -) -> Result { - let sa = rqctx.context(); - let body_args = body.into_inner(); - sa.filesystem_ensure( - body_args.id, - body_args.zpool_id, - body_args.dataset_kind, - body_args.address, - ) - .await - .map_err(|e| Error::from(e))?; - Ok(HttpResponseUpdatedNoContent()) -} - /// Path parameters for Instance requests (sled agent API) #[derive(Deserialize, JsonSchema)] struct InstancePathParam { diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index cfffd1fe823..787a7329bba 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -216,30 +216,8 @@ pub enum DatasetKind { CockroachDb, Crucible, Clickhouse, -} - -impl DatasetKind { - /// Returns the type of the zone which manages this dataset. - pub fn zone_type(&self) -> ZoneType { - match *self { - DatasetKind::CockroachDb => ZoneType::CockroachDb, - DatasetKind::Crucible => ZoneType::Crucible, - DatasetKind::Clickhouse => ZoneType::Clickhouse, - } - } - - /// Returns the service type which runs in the zone managing this dataset. - /// - /// NOTE: This interface is only viable because datasets run a single - /// service in their zone. If that precondition is no longer true, this - /// interface should be re-visited. - pub fn service_type(&self) -> ServiceType { - match *self { - DatasetKind::CockroachDb => ServiceType::CockroachDb, - DatasetKind::Crucible => ServiceType::Crucible, - DatasetKind::Clickhouse => ServiceType::Clickhouse, - } - } + ExternalDns, + InternalDns, } impl From for sled_agent_client::types::DatasetKind { @@ -249,6 +227,8 @@ impl From for sled_agent_client::types::DatasetKind { CockroachDb => Self::CockroachDb, Crucible => Self::Crucible, Clickhouse => Self::Clickhouse, + ExternalDns => Self::ExternalDns, + InternalDns => Self::InternalDns, } } } @@ -257,9 +237,11 @@ impl From for nexus_client::types::DatasetKind { fn from(k: DatasetKind) -> Self { use DatasetKind::*; match k { - CockroachDb { .. } => Self::Cockroach, + CockroachDb => Self::Cockroach, Crucible => Self::Crucible, Clickhouse => Self::Clickhouse, + ExternalDns => Self::ExternalDns, + InternalDns => Self::InternalDns, } } } @@ -271,38 +253,13 @@ impl std::fmt::Display for DatasetKind { Crucible => "crucible", CockroachDb { .. } => "cockroachdb", Clickhouse => "clickhouse", + ExternalDns { .. } => "external_dns", + InternalDns { .. } => "internal_dns", }; write!(f, "{}", s) } } -/// Used to request a new dataset kind exists within a zpool. -/// -/// Many dataset types are associated with services that will be -/// instantiated when the dataset is detected. -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] -pub struct DatasetEnsureBody { - // The UUID of the dataset, as well as the service using it directly. - pub id: Uuid, - // The name (and UUID) of the Zpool which we are inserting into. - pub zpool_id: Uuid, - // The type of the filesystem. - pub dataset_kind: DatasetKind, - // The address on which the zone will listen for requests. - pub address: SocketAddrV6, -} - -impl From for sled_agent_client::types::DatasetEnsureBody { - fn from(p: DatasetEnsureBody) -> Self { - Self { - zpool_id: p.zpool_id, - dataset_kind: p.dataset_kind.into(), - address: p.address.to_string(), - id: p.id, - } - } -} - /// Describes service-specific parameters. #[derive( Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, @@ -538,6 +495,21 @@ impl std::fmt::Display for ZoneType { } } +/// Describes a request to provision a specific dataset +#[derive( + Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, +)] +pub struct DatasetRequest { + pub id: Uuid, + pub name: crate::storage::dataset::DatasetName, +} + +impl From for sled_agent_client::types::DatasetRequest { + fn from(d: DatasetRequest) -> Self { + Self { id: d.id, name: d.name.into() } + } +} + /// Describes a request to create a zone running one or more services. #[derive( Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, @@ -551,7 +523,7 @@ pub struct ServiceZoneRequest { pub addresses: Vec, // Datasets which should be managed by this service. #[serde(default)] - pub dataset: Option, + pub dataset: Option, // The addresses in the global zone which should be created, if necessary // to route to the service. // @@ -576,7 +548,7 @@ impl ServiceZoneRequest { // The name of a unique identifier for the zone, if one is necessary. pub fn zone_name_unique_identifier(&self) -> Option { - self.dataset.as_ref().map(|d| d.pool().to_string()) + self.dataset.as_ref().map(|d| d.name.pool().to_string()) } } @@ -623,10 +595,6 @@ impl TryFrom } /// Used to request that the Sled initialize multiple services. -/// -/// This may be used to record that certain sleds are responsible for -/// launching services which may not be associated with a dataset, such -/// as Nexus. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] pub struct ServiceEnsureBody { pub services: Vec, diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index 7323d9f2d8f..6bbe29f179c 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -7,13 +7,15 @@ use crate::bootstrap::params::StartSledAgentRequest; use crate::ledger::{Ledger, Ledgerable}; use crate::params::{ - DatasetEnsureBody, ServiceType, ServiceZoneRequest, ServiceZoneService, + DatasetRequest, ServiceType, ServiceZoneRequest, ServiceZoneService, ZoneType, }; use crate::rack_setup::config::SetupServiceConfig as Config; +use crate::storage::dataset::DatasetName; use crate::storage_manager::StorageResources; use camino::Utf8PathBuf; use dns_service_client::types::DnsConfigParams; +use illumos_utils::zpool::ZpoolName; use internal_dns::{ServiceName, DNS_ZONE}; use omicron_common::address::{ get_sled_address, get_switch_zone_address, Ipv6Subnet, ReservedRackSubnet, @@ -91,10 +93,6 @@ pub enum PlanError { #[derive(Clone, Debug, Default, Deserialize, Serialize, PartialEq)] pub struct SledRequest { - /// Datasets to be created. - #[serde(default, rename = "dataset")] - pub datasets: Vec, - /// Services to be instantiated. #[serde(default, rename = "service")] pub services: Vec, @@ -165,7 +163,7 @@ impl Plan { async fn get_u2_zpools_from_sled( log: &Logger, address: SocketAddrV6, - ) -> Result, PlanError> { + ) -> Result, PlanError> { let dur = std::time::Duration::from_secs(60); let client = reqwest::ClientBuilder::new() .connect_timeout(dur) @@ -179,7 +177,7 @@ impl Plan { ); let get_u2_zpools = || async { - let zpools: Vec = client + let zpools: Vec = client .zpools_get() .await .map(|response| { @@ -187,7 +185,9 @@ impl Plan { .into_inner() .into_iter() .filter_map(|zpool| match zpool.disk_type { - SledAgentTypes::DiskType::U2 => Some(zpool.id), + SledAgentTypes::DiskType::U2 => { + Some(ZpoolName::new_external(zpool.id)) + } SledAgentTypes::DiskType::M2 => None, }) .collect() @@ -284,7 +284,8 @@ impl Plan { if idx < EXTERNAL_DNS_COUNT { let internal_ip = addr_alloc.next().expect("Not enough addrs"); let http_port = omicron_common::address::DNS_HTTP_PORT; - let dns_port = omicron_common::address::DNS_PORT; + let http_address = + SocketAddrV6::new(internal_ip, http_port, 0, 0); let id = Uuid::new_v4(); let zone = dns_builder.host_zone(id, internal_ip).unwrap(); dns_builder @@ -296,26 +297,27 @@ impl Plan { .unwrap(); let (nic, external_ip) = svc_port_builder.next_dns(id, &mut services_ip_pool)?; + let dns_port = omicron_common::address::DNS_PORT; + let dns_address = SocketAddr::new(external_ip, dns_port); + let dataset_kind = crate::params::DatasetKind::ExternalDns; + let dataset_name = + DatasetName::new(u2_zpools[0].clone(), dataset_kind); + request.services.push(ServiceZoneRequest { id, zone_type: ZoneType::ExternalDns, - addresses: vec![internal_ip], - dataset: None, + addresses: vec![*http_address.ip()], + dataset: Some(DatasetRequest { id, name: dataset_name }), gz_addresses: vec![], services: vec![ServiceZoneService { id, details: ServiceType::ExternalDns { - http_address: SocketAddrV6::new( - internal_ip, - http_port, - 0, - 0, - ), - dns_address: SocketAddr::new(external_ip, dns_port), + http_address, + dns_address, nic, }, }], - }) + }); } // The first enumerated sleds get assigned the responsibility @@ -388,64 +390,91 @@ impl Plan { // zpools described from the underlying config file. if idx < CRDB_COUNT { let id = Uuid::new_v4(); - let address = addr_alloc.next().expect("Not enough addrs"); + let ip = addr_alloc.next().expect("Not enough addrs"); let port = omicron_common::address::COCKROACH_PORT; - let zone = dns_builder.host_zone(id, address).unwrap(); + let zone = dns_builder.host_zone(id, ip).unwrap(); dns_builder .service_backend_zone(ServiceName::Cockroach, &zone, port) .unwrap(); - let address = SocketAddrV6::new(address, port, 0, 0); - request.datasets.push(DatasetEnsureBody { + request.services.push(ServiceZoneRequest { id, - zpool_id: u2_zpools[0], - dataset_kind: crate::params::DatasetKind::CockroachDb, - address, + zone_type: ZoneType::CockroachDb, + addresses: vec![ip], + dataset: Some(DatasetRequest { + id, + name: DatasetName::new( + u2_zpools[0].clone(), + crate::params::DatasetKind::CockroachDb, + ), + }), + gz_addresses: vec![], + services: vec![ServiceZoneService { + id, + details: ServiceType::CockroachDb, + }], }); } // TODO(https://github.com/oxidecomputer/omicron/issues/732): Remove if idx < CLICKHOUSE_COUNT { let id = Uuid::new_v4(); - let address = addr_alloc.next().expect("Not enough addrs"); + let ip = addr_alloc.next().expect("Not enough addrs"); let port = omicron_common::address::CLICKHOUSE_PORT; - let zone = dns_builder.host_zone(id, address).unwrap(); + let zone = dns_builder.host_zone(id, ip).unwrap(); dns_builder .service_backend_zone(ServiceName::Clickhouse, &zone, port) .unwrap(); - let address = SocketAddrV6::new(address, port, 0, 0); - request.datasets.push(DatasetEnsureBody { + request.services.push(ServiceZoneRequest { id, - zpool_id: u2_zpools[0], - dataset_kind: crate::params::DatasetKind::Clickhouse, - address, + zone_type: ZoneType::Clickhouse, + addresses: vec![ip], + dataset: Some(DatasetRequest { + id, + name: DatasetName::new( + u2_zpools[0].clone(), + crate::params::DatasetKind::Clickhouse, + ), + }), + gz_addresses: vec![], + services: vec![ServiceZoneService { + id, + details: ServiceType::Clickhouse, + }], }); } // Each zpool gets a crucible zone. // // TODO(https://github.com/oxidecomputer/omicron/issues/732): Remove - for zpool_id in u2_zpools { - let address = SocketAddrV6::new( - addr_alloc.next().expect("Not enough addrs"), - omicron_common::address::CRUCIBLE_PORT, - 0, - 0, - ); + for pool in &u2_zpools { + let ip = addr_alloc.next().expect("Not enough addrs"); + let port = omicron_common::address::CRUCIBLE_PORT; let id = Uuid::new_v4(); - let zone = dns_builder.host_zone(id, *address.ip()).unwrap(); + let zone = dns_builder.host_zone(id, ip).unwrap(); dns_builder .service_backend_zone( ServiceName::Crucible(id), &zone, - address.port(), + port, ) .unwrap(); - request.datasets.push(DatasetEnsureBody { + request.services.push(ServiceZoneRequest { id, - zpool_id, - dataset_kind: crate::params::DatasetKind::Crucible, - address, + zone_type: ZoneType::Crucible, + addresses: vec![ip], + dataset: Some(DatasetRequest { + id, + name: DatasetName::new( + pool.clone(), + crate::params::DatasetKind::Crucible, + ), + }), + gz_addresses: vec![], + services: vec![ServiceZoneService { + id, + details: ServiceType::Crucible, + }], }); } @@ -453,9 +482,12 @@ impl Plan { // responsibility of being internal DNS servers. if idx < dns_subnets.len() { let dns_subnet = &dns_subnets[idx]; - let dns_addr = dns_subnet.dns_address().ip(); + let ip = dns_subnet.dns_address().ip(); + let http_address = SocketAddrV6::new(ip, DNS_HTTP_PORT, 0, 0); + let dns_address = SocketAddrV6::new(ip, DNS_PORT, 0, 0); + let id = Uuid::new_v4(); - let zone = dns_builder.host_zone(id, dns_addr).unwrap(); + let zone = dns_builder.host_zone(id, ip).unwrap(); dns_builder .service_backend_zone( ServiceName::InternalDns, @@ -463,24 +495,22 @@ impl Plan { DNS_HTTP_PORT, ) .unwrap(); + let dataset_name = DatasetName::new( + u2_zpools[0].clone(), + crate::params::DatasetKind::InternalDns, + ); + request.services.push(ServiceZoneRequest { id, zone_type: ZoneType::InternalDns, - addresses: vec![dns_addr], - dataset: None, + addresses: vec![ip], + dataset: Some(DatasetRequest { id, name: dataset_name }), gz_addresses: vec![dns_subnet.gz_address().ip()], services: vec![ServiceZoneService { id, details: ServiceType::InternalDns { - http_address: SocketAddrV6::new( - dns_addr, - DNS_HTTP_PORT, - 0, - 0, - ), - dns_address: SocketAddrV6::new( - dns_addr, DNS_PORT, 0, 0, - ), + http_address, + dns_address, }, }], }); diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 04404e23c81..4400c810968 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -62,8 +62,8 @@ use crate::bootstrap::rss_handle::BootstrapAgentHandle; use crate::ledger::{Ledger, Ledgerable}; use crate::nexus::d2n_params; use crate::params::{ - AutonomousServiceOnlyError, DatasetEnsureBody, ServiceType, - ServiceZoneRequest, TimeSync, ZoneType, + AutonomousServiceOnlyError, DatasetKind, ServiceType, ServiceZoneRequest, + ServiceZoneService, TimeSync, ZoneType, }; use crate::rack_setup::plan::service::{ Plan as ServicePlan, PlanError as ServicePlanError, @@ -86,8 +86,9 @@ use nexus_client::{ }; use omicron_common::address::Ipv6Subnet; use omicron_common::address::{ - get_sled_address, CRUCIBLE_PANTRY_PORT, DENDRITE_PORT, NEXUS_INTERNAL_PORT, - NTP_PORT, OXIMETER_PORT, + get_sled_address, CLICKHOUSE_PORT, COCKROACH_PORT, CRUCIBLE_PANTRY_PORT, + CRUCIBLE_PORT, DENDRITE_PORT, DNS_HTTP_PORT, NEXUS_INTERNAL_PORT, NTP_PORT, + OXIMETER_PORT, }; use omicron_common::api::internal::shared::{PortFec, PortSpeed}; use omicron_common::backoff::{ @@ -281,48 +282,6 @@ impl ServiceInner { ServiceInner { log } } - async fn initialize_datasets( - &self, - sled_address: SocketAddrV6, - datasets: &Vec, - ) -> Result<(), SetupServiceError> { - let dur = std::time::Duration::from_secs(60); - - let client = reqwest::ClientBuilder::new() - .connect_timeout(dur) - .timeout(dur) - .build() - .map_err(SetupServiceError::HttpClient)?; - let client = SledAgentClient::new_with_client( - &format!("http://{}", sled_address), - client, - self.log.new(o!("SledAgentClient" => sled_address.to_string())), - ); - - info!(self.log, "sending dataset requests..."); - for dataset in datasets { - let filesystem_put = || async { - info!(self.log, "creating new filesystem: {:?}", dataset); - client - .filesystem_put(&dataset.clone().into()) - .await - .map_err(BackoffError::transient)?; - Ok::<(), BackoffError>>(()) - }; - let log_failure = |error, _| { - warn!(self.log, "failed to create filesystem"; "error" => ?error); - }; - retry_notify( - retry_policy_internal_service_aggressive(), - filesystem_put, - log_failure, - ) - .await?; - } - - Ok(()) - } - async fn initialize_services( &self, sled_address: SocketAddrV6, @@ -384,9 +343,9 @@ impl ServiceInner { let services: Vec<_> = services_request .services .iter() - .filter_map(|svc| { - if matches!(svc.zone_type, ZoneType::InternalDns) { - Some(svc.clone()) + .filter_map(|service| { + if matches!(service.zone_type, ZoneType::InternalDns,) { + Some(service.clone()) } else { None } @@ -395,7 +354,6 @@ impl ServiceInner { if !services.is_empty() { self.initialize_services(*sled_address, &services).await?; } - Ok(()) }, )) @@ -410,39 +368,18 @@ impl ServiceInner { service_plan.services.iter().filter_map( |(_, services_request)| { // iterate services for this sled - let dns_addrs: Vec<_> = services_request + let dns_addrs: Vec = services_request .services .iter() - .filter_map(|svc| { - if !matches!(svc.zone_type, ZoneType::InternalDns) { - // This is not an internal DNS zone. - None - } else { - // This is an internal DNS zone. Find the IP - // and port that have been assigned to it. - // There should be exactly one. - let addrs = svc.services.iter().filter_map(|s| { - if let ServiceType::InternalDns { http_address, .. } = &s.details { - Some(*http_address) - } else { - None - } - }).collect::>(); - - if addrs.len() == 1 { - Some(addrs[0]) - } else { - warn!( - log, - "DNS configuration: expected one \ - InternalDns service for zone with \ - type ZoneType::InternalDns, but \ - found {} (zone {})", - addrs.len(), - svc.id, - ); - None - } + .filter_map(|service| { + match &service.services[0] { + ServiceZoneService { + details: ServiceType::InternalDns { http_address, .. }, + .. + } => { + Some(*http_address) + }, + _ => None, } }) .collect(); @@ -454,7 +391,7 @@ impl ServiceInner { } ) .flatten() - .collect::>(); + .collect::>(); let dns_config = &service_plan.dns_config; for ip_addr in dns_server_ips { @@ -783,25 +720,89 @@ impl ServiceInner { kind: NexusTypes::ServiceKind::InternalNtp, }); } - details => { + ServiceType::Clickhouse => { + services.push(NexusTypes::ServicePutRequest { + service_id, + zone_id, + sled_id, + address: SocketAddrV6::new( + zone.addresses[0], + CLICKHOUSE_PORT, + 0, + 0, + ) + .to_string(), + kind: NexusTypes::ServiceKind::Clickhouse, + }); + } + ServiceType::Crucible => { + services.push(NexusTypes::ServicePutRequest { + service_id, + zone_id, + sled_id, + address: SocketAddrV6::new( + zone.addresses[0], + CRUCIBLE_PORT, + 0, + 0, + ) + .to_string(), + kind: NexusTypes::ServiceKind::Crucible, + }); + } + ServiceType::CockroachDb => { + services.push(NexusTypes::ServicePutRequest { + service_id, + zone_id, + sled_id, + address: SocketAddrV6::new( + zone.addresses[0], + COCKROACH_PORT, + 0, + 0, + ) + .to_string(), + kind: NexusTypes::ServiceKind::Cockroach, + }); + } + ServiceType::ManagementGatewayService + | ServiceType::Wicketd { .. } + | ServiceType::Maghemite { .. } + | ServiceType::Tfport { .. } => { return Err(SetupServiceError::BadConfig(format!( "RSS should not request service of type: {}", - details + svc.details ))); } } } } - for dataset in service_request.datasets.iter() { - datasets.push(NexusTypes::DatasetCreateRequest { - zpool_id: dataset.zpool_id, - dataset_id: dataset.id, - request: NexusTypes::DatasetPutRequest { - address: dataset.address.to_string(), - kind: dataset.dataset_kind.clone().into(), - }, - }) + for service in service_request.services.iter() { + if let Some(dataset) = &service.dataset { + let port = match dataset.name.dataset() { + DatasetKind::CockroachDb => COCKROACH_PORT, + DatasetKind::Clickhouse => CLICKHOUSE_PORT, + DatasetKind::Crucible => CRUCIBLE_PORT, + DatasetKind::ExternalDns => DNS_HTTP_PORT, + DatasetKind::InternalDns => DNS_HTTP_PORT, + }; + + datasets.push(NexusTypes::DatasetCreateRequest { + zpool_id: dataset.name.pool().id(), + dataset_id: dataset.id, + request: NexusTypes::DatasetPutRequest { + address: SocketAddrV6::new( + service.addresses[0], + port, + 0, + 0, + ) + .to_string(), + kind: dataset.name.dataset().clone().into(), + }, + }) + } } } let internal_services_ip_pool_ranges = config @@ -1207,31 +1208,9 @@ impl ServiceInner { // Wait until time is synchronized on all sleds before proceeding. self.wait_for_timesync(&sled_addresses).await?; - // Issue the dataset initialization requests to all sleds. - futures::future::join_all(service_plan.services.iter().map( - |(sled_address, services_request)| async move { - self.initialize_datasets( - *sled_address, - &services_request.datasets, - ) - .await?; - Ok(()) - }, - )) - .await - .into_iter() - .collect::>()?; - - info!(self.log, "Finished setting up agents and datasets"); + info!(self.log, "Finished setting up Internal DNS and NTP"); // Issue service initialization requests. - // - // NOTE: This must happen *after* the dataset initialization, - // to ensure that CockroachDB has been initialized before Nexus - // starts. - // - // If Nexus was more resilient to concurrent initialization - // of CRDB, this requirement could be relaxed. futures::future::join_all(service_plan.services.iter().map( |(sled_address, services_request)| async move { // With the current implementation of "initialize_services", diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index e22c6143cff..55d680dcd27 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -217,7 +217,6 @@ impl Config { // The filename of the ledger, within the provided directory. const SERVICES_LEDGER_FILENAME: &str = "services.toml"; -const STORAGE_SERVICES_LEDGER_FILENAME: &str = "storage-services.toml"; // A wrapper around `ZoneRequest`, which allows it to be serialized // to a toml file. @@ -306,8 +305,6 @@ pub struct ServiceManagerInner { sidecar_revision: SidecarRevision, // Zones representing running services zones: Mutex>, - // Zones representing services which own datasets - dataset_zones: Mutex>, underlay_vnic_allocator: VnicAllocator, underlay_vnic: EtherstubVnic, bootstrap_vnic_allocator: VnicAllocator, @@ -380,7 +377,6 @@ impl ServiceManager { sidecar_revision, switch_zone_maghemite_links, zones: Mutex::new(vec![]), - dataset_zones: Mutex::new(vec![]), underlay_vnic_allocator: VnicAllocator::new( "Service", underlay_etherstub, @@ -430,22 +426,7 @@ impl ServiceManager { .collect() } - async fn all_storage_service_ledgers(&self) -> Vec { - if let Some(dir) = self.inner.ledger_directory_override.get() { - return vec![dir.join(STORAGE_SERVICES_LEDGER_FILENAME)]; - } - - self.inner - .storage - .resources() - .all_m2_mountpoints(sled_hardware::disk::CONFIG_DATASET) - .await - .into_iter() - .map(|p| p.join(STORAGE_SERVICES_LEDGER_FILENAME)) - .collect() - } - - pub async fn load_non_storage_services(&self) -> Result<(), Error> { + pub async fn load_services(&self) -> Result<(), Error> { let log = &self.inner.log; let mut existing_zones = self.inner.zones.lock().await; let Some(ledger) = Ledger::::new( @@ -521,25 +502,6 @@ impl ServiceManager { Ok(()) } - pub async fn load_storage_services(&self) -> Result<(), Error> { - let log = &self.inner.log; - let mut existing_zones = self.inner.dataset_zones.lock().await; - let Some(ledger) = Ledger::::new( - log, - self.all_storage_service_ledgers().await, - ) - .await else { - return Ok(()); - }; - let services = ledger.data(); - self.initialize_services_locked( - &mut existing_zones, - &services.requests, - ) - .await?; - Ok(()) - } - /// Loads services from the services manager, and returns once all requested /// services have been started. pub async fn sled_agent_started( @@ -565,15 +527,14 @@ impl ServiceManager { .map_err(|_| "already set".to_string()) .expect("Sled Agent should only start once"); - self.load_non_storage_services().await?; // TODO(https://github.com/oxidecomputer/omicron/issues/2973): // These will fail if the disks aren't attached. // Should we have a retry loop here? Kinda like we have with the switch // / NTP zone? - // - // NOTE: We could totally do the same thing with - // "load_non_storage_services". - self.load_storage_services().await?; + self.load_services().await.map_err(|e| { + error!(self.inner.log, "failed to launch services"; "error" => e.to_string()); + e + })?; Ok(()) } @@ -922,7 +883,7 @@ impl ServiceManager { .zone .dataset .iter() - .map(|d| zone::Dataset { name: d.full() }) + .map(|d| zone::Dataset { name: d.name.full() }) .collect::>(); let devices: Vec = device_names @@ -1103,16 +1064,17 @@ impl ServiceManager { let listen_addr = &request.zone.addresses[0].to_string(); let listen_port = &CRUCIBLE_PORT.to_string(); - let dataset = request + let dataset_name = request .zone .dataset .as_ref() + .map(|d| d.name.full()) .expect("Crucible requires dataset"); let uuid = &Uuid::new_v4().to_string(); let config = PropertyGroupBuilder::new("config") .add_property("datalink", "astring", datalink) .add_property("gateway", "astring", gateway) - .add_property("dataset", "astring", &dataset.full()) + .add_property("dataset", "astring", &dataset_name) .add_property("listen_addr", "astring", listen_addr) .add_property("listen_port", "astring", listen_port) .add_property("uuid", "astring", uuid) @@ -1919,63 +1881,6 @@ impl ServiceManager { Ok(()) } - /// Ensures that a storage zone be initialized. - /// - /// These services will be instantiated by this function, and will be - /// recorded to a local file to ensure they start automatically on next - /// boot. - pub async fn ensure_storage_service( - &self, - request: ServiceZoneRequest, - ) -> Result<(), Error> { - let log = &self.inner.log; - let mut existing_zones = self.inner.dataset_zones.lock().await; - - // Read the existing set of services from the ledger. - let service_paths = self.all_storage_service_ledgers().await; - let mut ledger = - match Ledger::::new(log, service_paths.clone()) - .await - { - Some(ledger) => ledger, - None => Ledger::::new_with( - log, - service_paths.clone(), - AllZoneRequests::default(), - ), - }; - let ledger_zone_requests = ledger.data_mut(); - - if !ledger_zone_requests - .requests - .iter() - .any(|zone_request| zone_request.zone.id == request.id) - { - // If this is a new request, provision a zone filesystem on the same - // disk as the dataset. - let dataset = request - .dataset - .as_ref() - .expect("Storage services should have a dataset"); - let root = dataset - .pool() - .dataset_mountpoint(sled_hardware::disk::ZONE_DATASET); - ledger_zone_requests - .requests - .push(ZoneRequest { zone: request, root }); - } - - self.initialize_services_locked( - &mut existing_zones, - &ledger_zone_requests.requests, - ) - .await?; - - ledger.commit().await?; - - Ok(()) - } - pub fn boottime_rewrite(&self, zones: &Vec) { if self .inner diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 0e63112966e..55fcf7ae933 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -9,10 +9,10 @@ use crate::config::Config; use crate::instance_manager::InstanceManager; use crate::nexus::{LazyNexusClient, NexusRequestQueue}; use crate::params::{ - DatasetKind, DiskStateRequested, InstanceHardware, - InstanceMigrationSourceParams, InstancePutStateResponse, - InstanceStateRequested, InstanceUnregisterResponse, ServiceEnsureBody, - ServiceZoneService, SledRole, TimeSync, VpcFirewallRule, Zpool, + DiskStateRequested, InstanceHardware, InstanceMigrationSourceParams, + InstancePutStateResponse, InstanceStateRequested, + InstanceUnregisterResponse, ServiceEnsureBody, SledRole, TimeSync, + VpcFirewallRule, Zpool, }; use crate::services::{self, ServiceManager}; use crate::storage_manager::{self, StorageManager}; @@ -528,6 +528,25 @@ impl SledAgent { &self, requested_services: ServiceEnsureBody, ) -> Result<(), Error> { + let datasets: Vec<_> = requested_services + .services + .iter() + .filter_map(|service| service.dataset.clone()) + .collect(); + + // TODO: + // - If these are the set of filesystems, we should also consider + // removing the ones which are not listed here. + // - It's probably worth sending a bulk request to the storage system, + // rather than requesting individual datasets. + for dataset in &datasets { + // First, ensure the dataset exists + self.inner + .storage + .upsert_filesystem(dataset.id, dataset.name.clone()) + .await?; + } + self.inner.services.ensure_all_services(requested_services).await?; Ok(()) } @@ -547,47 +566,6 @@ impl SledAgent { } } - /// Ensures that a filesystem type exists within the zpool. - pub async fn filesystem_ensure( - &self, - dataset_id: Uuid, - zpool_id: Uuid, - dataset_kind: DatasetKind, - address: SocketAddrV6, - ) -> Result<(), Error> { - // First, ensure the dataset exists - let dataset = self - .inner - .storage - .upsert_filesystem(dataset_id, zpool_id, dataset_kind.clone()) - .await?; - - // NOTE: We use the "dataset_id" as the "service_id" here. - // - // Since datasets are tightly coupled with their own services - e.g., - // from the perspective of Nexus, provisioning a dataset implies the - // sled should start a service - this is ID re-use is reasonable. - // - // If Nexus ever wants sleds to provision datasets independently of - // launching services, this ID type overlap should be reconsidered. - let service_type = dataset_kind.service_type(); - let services = - vec![ServiceZoneService { id: dataset_id, details: service_type }]; - - // Next, ensure a zone exists to manage storage for that dataset - let request = crate::params::ServiceZoneRequest { - id: dataset_id, - zone_type: dataset_kind.zone_type(), - addresses: vec![*address.ip()], - dataset: Some(dataset), - gz_addresses: vec![], - services, - }; - self.inner.services.ensure_storage_service(request).await?; - - Ok(()) - } - /// Idempotently ensures that a given instance is registered with this sled, /// i.e., that it can be addressed by future calls to /// [`instance_ensure_state`]. diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index c573dbe6a2c..0c901ce0887 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -5,7 +5,6 @@ //! Management of sled-local storage. use crate::nexus::LazyNexusClient; -use crate::params::DatasetKind; use crate::storage::dataset::DatasetName; use camino::Utf8PathBuf; use futures::stream::FuturesOrdered; @@ -84,7 +83,7 @@ pub enum Error { }, #[error("Dataset {name:?} exists with a different uuid (has {old}, requested {new})")] - UuidMismatch { name: DatasetName, old: Uuid, new: Uuid }, + UuidMismatch { name: Box, old: Uuid, new: Uuid }, #[error("Error parsing pool {name}'s size: {err}")] BadPoolSize { @@ -156,8 +155,7 @@ type NotifyFut = #[derive(Debug)] struct NewFilesystemRequest { dataset_id: Uuid, - zpool_id: Uuid, - dataset_kind: DatasetKind, + dataset_name: DatasetName, responder: oneshot::Sender>, } @@ -327,7 +325,7 @@ impl StorageWorker { if let Ok(id) = id_str.parse::() { if id != dataset_id { return Err(Error::UuidMismatch { - name: dataset_name.clone(), + name: Box::new(dataset_name.clone()), old: id, new: dataset_id, }); @@ -788,14 +786,18 @@ impl StorageWorker { ) -> Result { info!(self.log, "add_dataset: {:?}", request); let mut pools = resources.pools.lock().await; - let pool = pools.get_mut(&request.zpool_id).ok_or_else(|| { - Error::ZpoolNotFound(format!( - "{}, looked up while trying to add dataset", - request.zpool_id - )) - })?; - let dataset_name = - DatasetName::new(pool.name.clone(), request.dataset_kind.clone()); + let pool = pools + .get_mut(&request.dataset_name.pool().id()) + .ok_or_else(|| { + Error::ZpoolNotFound(format!( + "{}, looked up while trying to add dataset", + request.dataset_name.pool(), + )) + })?; + let dataset_name = DatasetName::new( + pool.name.clone(), + request.dataset_name.dataset().clone(), + ); self.ensure_dataset(request.dataset_id, &dataset_name)?; Ok(dataset_name) } @@ -1027,16 +1029,11 @@ impl StorageManager { pub async fn upsert_filesystem( &self, dataset_id: Uuid, - zpool_id: Uuid, - dataset_kind: DatasetKind, + dataset_name: DatasetName, ) -> Result { let (tx, rx) = oneshot::channel(); - let request = NewFilesystemRequest { - dataset_id, - zpool_id, - dataset_kind, - responder: tx, - }; + let request = + NewFilesystemRequest { dataset_id, dataset_name, responder: tx }; self.inner .tx diff --git a/smf/external-dns/config.toml b/smf/external-dns/config.toml index 4f572ddbc1b..b90224f48c2 100644 --- a/smf/external-dns/config.toml +++ b/smf/external-dns/config.toml @@ -11,5 +11,5 @@ path = "/dev/stdout" if_exists = "append" [storage] -storage_path = "/var/oxide/dns" +storage_path = "/data" keep_old_generations = 3 diff --git a/smf/internal-dns/config.toml b/smf/internal-dns/config.toml index 4f572ddbc1b..b90224f48c2 100644 --- a/smf/internal-dns/config.toml +++ b/smf/internal-dns/config.toml @@ -11,5 +11,5 @@ path = "/dev/stdout" if_exists = "append" [storage] -storage_path = "/var/oxide/dns" +storage_path = "/data" keep_old_generations = 3 From 90ff57716d8c386b6faac8e558371f0979f1f324 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 21 Jun 2023 14:34:01 -0700 Subject: [PATCH 02/12] Custom JsonSchema for ZpoolName, clarify some function names --- illumos-utils/src/zpool.rs | 135 ++++++++++++++++++++++++++--- openapi/sled-agent.json | 25 +----- sled-agent/src/bootstrap/agent.rs | 10 +-- sled-agent/src/bootstrap/server.rs | 4 +- sled-agent/src/services.rs | 2 +- sled-agent/src/storage/dataset.rs | 21 +---- 6 files changed, 136 insertions(+), 61 deletions(-) diff --git a/illumos-utils/src/zpool.rs b/illumos-utils/src/zpool.rs index 38f20e7dcd6..5cb0e4cfd19 100644 --- a/illumos-utils/src/zpool.rs +++ b/illumos-utils/src/zpool.rs @@ -7,7 +7,7 @@ use crate::{execute, ExecutionError, PFEXEC}; use camino::{Utf8Path, Utf8PathBuf}; use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::fmt; use std::str::FromStr; use uuid::Uuid; @@ -271,9 +271,7 @@ impl Zpool { } } -#[derive( - Copy, Clone, Debug, Hash, PartialEq, Eq, JsonSchema, Serialize, Deserialize, -)] +#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, JsonSchema)] #[serde(rename_all = "snake_case")] pub enum ZpoolKind { // This zpool is used for external storage (u.2) @@ -286,14 +284,39 @@ pub enum ZpoolKind { /// /// This expects that the format will be: `ox{i,p}_` - we parse the prefix /// when reading the structure, and validate that the UUID can be utilized. -#[derive( - Clone, Debug, Hash, PartialEq, Eq, JsonSchema, Serialize, Deserialize, -)] +#[derive(Clone, Debug, Hash, PartialEq, Eq)] pub struct ZpoolName { id: Uuid, kind: ZpoolKind, } +/// Custom JsonSchema implementation to encode the constraints on Name. +impl JsonSchema for ZpoolName { + fn schema_name() -> String { + "ZpoolName".to_string() + } + fn json_schema( + _: &mut schemars::gen::SchemaGenerator, + ) -> schemars::schema::Schema { + schemars::schema::SchemaObject { + metadata: Some(Box::new(schemars::schema::Metadata { + title: Some( + "The name of a Zpool".to_string(), + ), + description: Some( + "Zpool names are of the format ox{i,p}_. They are either \ + Internal or External, and should be unique" + .to_string(), + ), + ..Default::default() + })), + instance_type: Some(schemars::schema::InstanceType::String.into()), + ..Default::default() + } + .into() + } +} + impl ZpoolName { pub fn new_internal(id: Uuid) -> Self { Self { id, kind: ZpoolKind::Internal } @@ -327,6 +350,25 @@ impl ZpoolName { } } +impl<'de> Deserialize<'de> for ZpoolName { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let s = String::deserialize(deserializer)?; + ZpoolName::from_str(&s).map_err(serde::de::Error::custom) + } +} + +impl Serialize for ZpoolName { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_str(&self.to_string()) + } +} + impl FromStr for ZpoolName { type Err = String; @@ -361,13 +403,78 @@ mod test { #[test] fn test_parse_zpool_name_json() { - let _zpool_name: ZpoolName = serde_json::from_str( - r#"{ - "id": "d462a7f7-b628-40fe-80ff-4e4189e2d62b", - "kind": "external" - }"#, - ) - .expect("Could not parse ZpoolName from Json Object"); + #[derive(Serialize, Deserialize, JsonSchema)] + struct TestDataset { + pool_name: ZpoolName, + } + + // Confirm that we can convert from a JSON string to a a ZpoolName + let json_string = + r#"{"pool_name":"oxi_d462a7f7-b628-40fe-80ff-4e4189e2d62b"}"#; + let dataset: TestDataset = serde_json::from_str(json_string) + .expect("Could not parse ZpoolName from Json Object"); + assert!(matches!(dataset.pool_name.kind, ZpoolKind::Internal)); + + // Confirm we can go the other way (ZpoolName to JSON string) too. + let j = serde_json::to_string(&dataset) + .expect("Cannot convert back to JSON string"); + assert_eq!(j, json_string); + } + + fn toml_string(s: &str) -> String { + format!("zpool_name = \"{}\"", s) + } + + fn parse_name(s: &str) -> Result { + toml_string(s) + .parse::() + .expect("Cannot parse as TOML value") + .get("zpool_name") + .expect("Missing key") + .clone() + .try_into::() + } + + #[test] + fn test_parse_external_zpool_name() { + let uuid: Uuid = + "d462a7f7-b628-40fe-80ff-4e4189e2d62b".parse().unwrap(); + let good_name = format!("{}{}", ZPOOL_EXTERNAL_PREFIX, uuid); + + let name = parse_name(&good_name).expect("Cannot parse as ZpoolName"); + assert_eq!(uuid, name.id()); + assert_eq!(ZpoolKind::External, name.kind()); + } + + #[test] + fn test_parse_internal_zpool_name() { + let uuid: Uuid = + "d462a7f7-b628-40fe-80ff-4e4189e2d62b".parse().unwrap(); + let good_name = format!("{}{}", ZPOOL_INTERNAL_PREFIX, uuid); + + let name = parse_name(&good_name).expect("Cannot parse as ZpoolName"); + assert_eq!(uuid, name.id()); + assert_eq!(ZpoolKind::Internal, name.kind()); + } + + #[test] + fn test_parse_bad_zpool_names() { + let bad_names = vec![ + // Nonsense string + "this string is GARBAGE", + // Missing prefix + "d462a7f7-b628-40fe-80ff-4e4189e2d62b", + // Underscores + "oxp_d462a7f7_b628_40fe_80ff_4e4189e2d62b", + ]; + + for bad_name in &bad_names { + assert!( + parse_name(&bad_name).is_err(), + "Parsing {} should fail", + bad_name + ); + } } #[test] diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index de323de0475..e9d81d20752 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -2479,29 +2479,10 @@ "id" ] }, - "ZpoolKind": { - "type": "string", - "enum": [ - "external", - "internal" - ] - }, "ZpoolName": { - "description": "A wrapper around a zpool name.\n\nThis expects that the format will be: `ox{i,p}_` - we parse the prefix when reading the structure, and validate that the UUID can be utilized.", - "type": "object", - "properties": { - "id": { - "type": "string", - "format": "uuid" - }, - "kind": { - "$ref": "#/components/schemas/ZpoolKind" - } - }, - "required": [ - "id", - "kind" - ] + "title": "The name of a Zpool", + "description": "Zpool names are of the format ox{i,p}_. They are either Internal or External, and should be unique", + "type": "string" } } } diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index 4da8ae0d121..211dcc2a50c 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -425,7 +425,7 @@ impl Agent { let paths = sled_config_paths(&storage_resources).await; let maybe_ledger = Ledger::::new(&ba_log, paths).await; - let make_agent = move |initialized| Agent { + let make_bootstrap_agent = move |initialized| Agent { log: ba_log, parent_log: log, ip, @@ -443,13 +443,13 @@ impl Agent { baseboard, }; let agent = if let Some(ledger) = maybe_ledger { - let agent = make_agent(true); + let agent = make_bootstrap_agent(true); info!(agent.log, "Sled already configured, loading sled agent"); let sled_request = ledger.data(); - agent.request_agent(&sled_request.request).await?; + agent.request_sled_agent(&sled_request.request).await?; agent } else { - make_agent(false) + make_bootstrap_agent(false) }; Ok(agent) @@ -505,7 +505,7 @@ impl Agent { /// If the Sled Agent has already been initialized: /// - This method is idempotent for the same request /// - Thie method returns an error for different requests - pub async fn request_agent( + pub async fn request_sled_agent( &self, request: &StartSledAgentRequest, ) -> Result { diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs index bd4f700497a..25b114997bd 100644 --- a/sled-agent/src/bootstrap/server.rs +++ b/sled-agent/src/bootstrap/server.rs @@ -190,10 +190,10 @@ async fn handle_start_sled_agent_request( let response = match read_request(&mut stream).await? { Request::StartSledAgentRequest(request) => { - // The call to `request_agent` should be idempotent if the request + // The call to `request_sled_agent` should be idempotent if the request // was the same. bootstrap_agent - .request_agent(&request) + .request_sled_agent(&request) .await .map(|response| Response::SledAgentResponse(response)) .map_err(|err| { diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 55d680dcd27..0e4945f4b7e 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -511,7 +511,7 @@ impl ServiceManager { underlay_address: Ipv6Addr, rack_id: Uuid, ) -> Result<(), Error> { - debug!(&self.inner.log, "sled agent started"; "underlay_address" => underlay_address.to_string()); + info!(&self.inner.log, "sled agent started"; "underlay_address" => underlay_address.to_string()); self.inner .sled_info .set(SledAgentInfo { diff --git a/sled-agent/src/storage/dataset.rs b/sled-agent/src/storage/dataset.rs index d4f0d63474e..3ccc45c2985 100644 --- a/sled-agent/src/storage/dataset.rs +++ b/sled-agent/src/storage/dataset.rs @@ -3,7 +3,6 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use crate::params::DatasetKind; -use illumos_utils::zpool::ZpoolKind; use illumos_utils::zpool::ZpoolName; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -38,22 +37,10 @@ impl DatasetName { impl From for sled_agent_client::types::DatasetName { fn from(n: DatasetName) -> Self { - let id = n.pool().id(); - - // NOTE: Ideally, this translation would live alongside the definitions - // of ZpoolKind and ZpoolName, but they're currently in illumos-utils, - // which has no dependency on sled_agent_client. - let kind = match n.pool().kind() { - ZpoolKind::External => { - sled_agent_client::types::ZpoolKind::External - } - ZpoolKind::Internal => { - sled_agent_client::types::ZpoolKind::Internal - } - }; - let pool_name = sled_agent_client::types::ZpoolName { id, kind }; - - Self { pool_name, kind: n.dataset().clone().into() } + Self { + pool_name: n.pool().to_string().into(), + kind: n.dataset().clone().into(), + } } } From c83d94c12f19ee2a852c3ba6ebbcd9cd4fc0ebda Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 21 Jun 2023 16:22:45 -0700 Subject: [PATCH 03/12] logging --- sled-agent/src/services.rs | 6 +++++- sled-agent/src/sled_agent.rs | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 0e4945f4b7e..2c56788e8b5 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -428,12 +428,16 @@ impl ServiceManager { pub async fn load_services(&self) -> Result<(), Error> { let log = &self.inner.log; + let ledger_paths = self.all_service_ledgers().await; + info!(log, "Loading services from: {ledger_paths:?}"); + let mut existing_zones = self.inner.zones.lock().await; let Some(ledger) = Ledger::::new( log, - self.all_service_ledgers().await, + ledger_paths, ) .await else { + info!(log, "Loading services - No services detected"); return Ok(()); }; let services = ledger.data(); diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 55fcf7ae933..dac0f40a99e 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -218,7 +218,7 @@ impl SledAgent { "component" => "SledAgent", "sled_id" => request.id.to_string(), )); - info!(&log, "created sled agent"); + info!(&log, "SledAgent::new(..) staring"); let etherstub = Dladm::ensure_etherstub( illumos_utils::dladm::UNDERLAY_ETHERSTUB_NAME, @@ -266,7 +266,7 @@ impl SledAgent { match config.vmm_reservoir_percentage { Some(sz) if sz > 0 && sz < 100 => { instances.set_reservoir_size(&hardware, sz).map_err(|e| { - warn!(log, "Failed to set VMM reservoir size: {e}"); + error!(log, "Failed to set VMM reservoir size: {e}"); e })?; } From 678f8b4c52477593d27c206722ddfb0232553e62 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 22 Jun 2023 11:23:26 -0700 Subject: [PATCH 04/12] Await CRDB before launching nexus in RSS --- sled-agent/src/rack_setup/service.rs | 68 ++++++++++++++-------------- 1 file changed, 33 insertions(+), 35 deletions(-) diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 4400c810968..1619dd32a2d 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -282,7 +282,7 @@ impl ServiceInner { ServiceInner { log } } - async fn initialize_services( + async fn initialize_services_on_sled( &self, sled_address: SocketAddrV6, services: &Vec, @@ -331,20 +331,25 @@ impl ServiceInner { Ok(()) } - // Configure the internal DNS servers with the initial DNS data - async fn initialize_dns( + // Ensure that all services of a particular type are running. + // + // This is useful in a rack-setup context, where initial boot ordering + // can matter for first-time-setup. + // + // Note that after first-time setup, the initialization order of + // services should not matter. + async fn ensure_all_services_of_type( &self, service_plan: &ServicePlan, + zone_types: &HashSet>, ) -> Result<(), SetupServiceError> { - let log = &self.log; - // Start up the internal DNS services futures::future::join_all(service_plan.services.iter().map( |(sled_address, services_request)| async move { let services: Vec<_> = services_request .services .iter() .filter_map(|service| { - if matches!(service.zone_type, ZoneType::InternalDns,) { + if zone_types.contains(&std::mem::discriminant(&service.zone_type)) { Some(service.clone()) } else { None @@ -352,7 +357,7 @@ impl ServiceInner { }) .collect(); if !services.is_empty() { - self.initialize_services(*sled_address, &services).await?; + self.initialize_services_on_sled(*sled_address, &services).await?; } Ok(()) }, @@ -360,6 +365,15 @@ impl ServiceInner { .await .into_iter() .collect::>()?; + Ok(()) + } + + // Configure the internal DNS servers with the initial DNS data + async fn initialize_internal_dns_records( + &self, + service_plan: &ServicePlan, + ) -> Result<(), SetupServiceError> { + let log = &self.log; // Determine the list of DNS servers that are supposed to exist based on // the service plan that has just been deployed. @@ -1049,7 +1063,10 @@ impl ServiceInner { // Set up internal DNS services first and write the initial // DNS configuration to the internal DNS servers. - self.initialize_dns(&service_plan).await?; + let mut zone_types = HashSet::new(); + zone_types.insert(std::mem::discriminant(&ZoneType::InternalDns)); + self.ensure_all_services_of_type(&service_plan, &zone_types).await?; + self.initialize_internal_dns_records(&service_plan).await?; // Initialize rack network before NTP comes online, otherwise boundary // services will not be available and NTP will fail to sync @@ -1179,41 +1196,22 @@ impl ServiceInner { // Next start up the NTP services. // Note we also specify internal DNS services again because it // can ony be additive. - futures::future::join_all(service_plan.services.iter().map( - |(sled_address, services_request)| async move { - let services: Vec<_> = services_request - .services - .iter() - .filter_map(|svc| { - if matches!( - svc.zone_type, - ZoneType::InternalDns | ZoneType::Ntp - ) { - Some(svc.clone()) - } else { - None - } - }) - .collect(); - if !services.is_empty() { - self.initialize_services(*sled_address, &services).await?; - } - Ok(()) - }, - )) - .await - .into_iter() - .collect::>()?; + zone_types.insert(std::mem::discriminant(&ZoneType::Ntp)); + self.ensure_all_services_of_type(&service_plan, &zone_types).await?; // Wait until time is synchronized on all sleds before proceeding. self.wait_for_timesync(&sled_addresses).await?; info!(self.log, "Finished setting up Internal DNS and NTP"); + // Wait until Cockroach has been initialized before running Nexus. + zone_types.insert(std::mem::discriminant(&ZoneType::CockroachDb)); + self.ensure_all_services_of_type(&service_plan, &zone_types).await?; + // Issue service initialization requests. futures::future::join_all(service_plan.services.iter().map( |(sled_address, services_request)| async move { - // With the current implementation of "initialize_services", + // With the current implementation of "initialize_services_on_sled", // we must provide the set of *all* services that should be // executing on a sled. // @@ -1221,7 +1219,7 @@ impl ServiceInner { // they are already running - this is fine, however, as the // receiving sled agent doesn't modify the already-running // service. - self.initialize_services( + self.initialize_services_on_sled( *sled_address, &services_request.services, ) From 97257b7ac8247d7b9f753176ae35258305c9e3a4 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 22 Jun 2023 11:30:22 -0700 Subject: [PATCH 05/12] fmt --- sled-agent/src/rack_setup/service.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 1619dd32a2d..2d8038ec5ff 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -349,7 +349,9 @@ impl ServiceInner { .services .iter() .filter_map(|service| { - if zone_types.contains(&std::mem::discriminant(&service.zone_type)) { + if zone_types.contains(&std::mem::discriminant( + &service.zone_type, + )) { Some(service.clone()) } else { None @@ -357,7 +359,8 @@ impl ServiceInner { }) .collect(); if !services.is_empty() { - self.initialize_services_on_sled(*sled_address, &services).await?; + self.initialize_services_on_sled(*sled_address, &services) + .await?; } Ok(()) }, From ae93a3ece49a63da7a352edbfd7d111dd086953f Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 28 Jun 2023 11:05:01 -0700 Subject: [PATCH 06/12] Add regex and tests --- Cargo.lock | 1 + illumos-utils/Cargo.toml | 1 + illumos-utils/src/zpool.rs | 50 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 52 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 2a7fd120ad2..5b3e49be9a9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3221,6 +3221,7 @@ dependencies = [ "omicron-common 0.1.0", "opte-ioctl", "oxide-vpc", + "regress", "schemars", "serde", "serde_json", diff --git a/illumos-utils/Cargo.toml b/illumos-utils/Cargo.toml index 79b7d20e4b4..04bdf8cad09 100644 --- a/illumos-utils/Cargo.toml +++ b/illumos-utils/Cargo.toml @@ -34,6 +34,7 @@ opte-ioctl.workspace = true [dev-dependencies] mockall.workspace = true +regress.workspace = true serde_json.workspace = true toml.workspace = true diff --git a/illumos-utils/src/zpool.rs b/illumos-utils/src/zpool.rs index 5cb0e4cfd19..81ded2655ec 100644 --- a/illumos-utils/src/zpool.rs +++ b/illumos-utils/src/zpool.rs @@ -290,6 +290,8 @@ pub struct ZpoolName { kind: ZpoolKind, } +const ZPOOL_NAME_REGEX: &str = r"^ox[ip]_[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$"; + /// Custom JsonSchema implementation to encode the constraints on Name. impl JsonSchema for ZpoolName { fn schema_name() -> String { @@ -311,6 +313,10 @@ impl JsonSchema for ZpoolName { ..Default::default() })), instance_type: Some(schemars::schema::InstanceType::String.into()), + string: Some(Box::new(schemars::schema::StringValidation { + pattern: Some(ZPOOL_NAME_REGEX.to_owned()), + ..Default::default() + })), ..Default::default() } .into() @@ -401,6 +407,50 @@ impl fmt::Display for ZpoolName { mod test { use super::*; + #[test] + fn test_zpool_name_regex() { + let valid = [ + "oxi_d462a7f7-b628-40fe-80ff-4e4189e2d62b", + "oxp_d462a7f7-b628-40fe-80ff-4e4189e2d62b", + ]; + + let invalid = [ + "", + // Whitespace + " oxp_d462a7f7-b628-40fe-80ff-4e4189e2d62b", + "oxp_d462a7f7-b628-40fe-80ff-4e4189e2d62b ", + // Case sensitivity + "oxp_D462A7F7-b628-40fe-80ff-4e4189e2d62b", + // Bad prefix + "ox_d462a7f7-b628-40fe-80ff-4e4189e2d62b", + "oxa_d462a7f7-b628-40fe-80ff-4e4189e2d62b", + "oxi-d462a7f7-b628-40fe-80ff-4e4189e2d62b", + "oxp-d462a7f7-b628-40fe-80ff-4e4189e2d62b", + // Missing Prefix + "d462a7f7-b628-40fe-80ff-4e4189e2d62b", + // Bad UUIDs (Not following UUIDv4 format) + "oxi_d462a7f7-b628-30fe-80ff-4e4189e2d62b", + "oxi_d462a7f7-b628-40fe-c0ff-4e4189e2d62b", + ]; + + let r = regress::Regex::new(ZPOOL_NAME_REGEX) + .expect("validation regex is valid"); + for input in valid { + let m = r + .find(input) + .unwrap_or_else(|| panic!("input {input} did not match regex")); + assert_eq!(m.start(), 0, "input {input} did not match start"); + assert_eq!(m.end(), input.len(), "input {input} did not match end"); + } + + for input in invalid { + assert!( + r.find(input).is_none(), + "invalid input {input} should not match validation regex" + ); + } + } + #[test] fn test_parse_zpool_name_json() { #[derive(Serialize, Deserialize, JsonSchema)] From 0d4e9dca6adf2b1b3927b3d9c78863c78b527174 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 28 Jun 2023 11:06:59 -0700 Subject: [PATCH 07/12] Alphabetize --- common/src/sql/dbinit.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common/src/sql/dbinit.sql b/common/src/sql/dbinit.sql index 445caf987cd..99871f54c59 100644 --- a/common/src/sql/dbinit.sql +++ b/common/src/sql/dbinit.sql @@ -186,9 +186,9 @@ CREATE INDEX ON omicron.public.switch ( */ CREATE TYPE omicron.public.service_kind AS ENUM ( - 'crucible', - 'cockroach', 'clickhouse', + 'cockroach', + 'crucible', 'crucible_pantry', 'dendrite', 'external_dns', From 02e52b4741c5a499690b29c77b77e7614a9536a8 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 28 Jun 2023 11:07:50 -0700 Subject: [PATCH 08/12] /data/dns --- smf/external-dns/config.toml | 2 +- smf/internal-dns/config.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/smf/external-dns/config.toml b/smf/external-dns/config.toml index b90224f48c2..b090d3391e8 100644 --- a/smf/external-dns/config.toml +++ b/smf/external-dns/config.toml @@ -11,5 +11,5 @@ path = "/dev/stdout" if_exists = "append" [storage] -storage_path = "/data" +storage_path = "/data/dns" keep_old_generations = 3 diff --git a/smf/internal-dns/config.toml b/smf/internal-dns/config.toml index b90224f48c2..b090d3391e8 100644 --- a/smf/internal-dns/config.toml +++ b/smf/internal-dns/config.toml @@ -11,5 +11,5 @@ path = "/dev/stdout" if_exists = "append" [storage] -storage_path = "/data" +storage_path = "/data/dns" keep_old_generations = 3 From 94ac853f2095be4679a9bce4c5aa5db658d69fd0 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 28 Jun 2023 11:13:58 -0700 Subject: [PATCH 09/12] staring --- sled-agent/src/sled_agent.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 2bf48741aad..f0fa2a6b6d4 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -228,7 +228,7 @@ impl SledAgent { "component" => "SledAgent", "sled_id" => request.id.to_string(), )); - info!(&log, "SledAgent::new(..) staring"); + info!(&log, "SledAgent::new(..) starting"); let etherstub = Dladm::ensure_etherstub( illumos_utils::dladm::UNDERLAY_ETHERSTUB_NAME, From 96f3afd407ca98bc56a28297f04a0001bb41b1a9 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 28 Jun 2023 15:26:24 -0700 Subject: [PATCH 10/12] indiscriminate --- sled-agent/src/rack_setup/service.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 38413232bb0..e2f78a42b01 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -340,7 +340,7 @@ impl ServiceInner { async fn ensure_all_services_of_type( &self, service_plan: &ServicePlan, - zone_types: &HashSet>, + zone_types: &HashSet, ) -> Result<(), SetupServiceError> { futures::future::join_all(service_plan.services.iter().map( |(sled_address, services_request)| async move { @@ -348,9 +348,7 @@ impl ServiceInner { .services .iter() .filter_map(|service| { - if zone_types.contains(&std::mem::discriminant( - &service.zone_type, - )) { + if zone_types.contains(&service.zone_type) { Some(service.clone()) } else { None @@ -1067,7 +1065,7 @@ impl ServiceInner { // Set up internal DNS services first and write the initial // DNS configuration to the internal DNS servers. let mut zone_types = HashSet::new(); - zone_types.insert(std::mem::discriminant(&ZoneType::InternalDns)); + zone_types.insert(ZoneType::InternalDns); self.ensure_all_services_of_type(&service_plan, &zone_types).await?; self.initialize_internal_dns_records(&service_plan).await?; @@ -1196,7 +1194,7 @@ impl ServiceInner { // Next start up the NTP services. // Note we also specify internal DNS services again because it // can ony be additive. - zone_types.insert(std::mem::discriminant(&ZoneType::Ntp)); + zone_types.insert(ZoneType::Ntp); self.ensure_all_services_of_type(&service_plan, &zone_types).await?; // Wait until time is synchronized on all sleds before proceeding. @@ -1205,7 +1203,7 @@ impl ServiceInner { info!(self.log, "Finished setting up Internal DNS and NTP"); // Wait until Cockroach has been initialized before running Nexus. - zone_types.insert(std::mem::discriminant(&ZoneType::CockroachDb)); + zone_types.insert(ZoneType::CockroachDb); self.ensure_all_services_of_type(&service_plan, &zone_types).await?; // Issue service initialization requests. From efe7c553da7d43f0535dc383b730db083c68efe6 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 28 Jun 2023 15:35:21 -0700 Subject: [PATCH 11/12] expectorate --- openapi/sled-agent.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index f6dd780adea..a15840e0094 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -2667,7 +2667,8 @@ "ZpoolName": { "title": "The name of a Zpool", "description": "Zpool names are of the format ox{i,p}_. They are either Internal or External, and should be unique", - "type": "string" + "type": "string", + "pattern": "^ox[ip]_[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$" } } } From aed156a1a8b0f071d654baad6363a17088243f98 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 28 Jun 2023 15:48:12 -0700 Subject: [PATCH 12/12] Type conversions --- sled-agent/src/storage/dataset.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sled-agent/src/storage/dataset.rs b/sled-agent/src/storage/dataset.rs index 3ccc45c2985..4efc0f320af 100644 --- a/sled-agent/src/storage/dataset.rs +++ b/sled-agent/src/storage/dataset.rs @@ -6,6 +6,7 @@ use crate::params::DatasetKind; use illumos_utils::zpool::ZpoolName; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use std::str::FromStr; #[derive( Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Clone, JsonSchema, @@ -38,7 +39,10 @@ impl DatasetName { impl From for sled_agent_client::types::DatasetName { fn from(n: DatasetName) -> Self { Self { - pool_name: n.pool().to_string().into(), + pool_name: sled_agent_client::types::ZpoolName::from_str( + &n.pool().to_string(), + ) + .unwrap(), kind: n.dataset().clone().into(), } }