From 17cda03354c5bbc69c14b62f23a7d433aa182f4d Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Fri, 6 Jun 2025 11:59:16 -0700 Subject: [PATCH 01/14] first cut --- nexus/db-model/src/deployment.rs | 50 +++- nexus/db-model/src/lib.rs | 1 + .../db-queries/src/db/datastore/deployment.rs | 271 +++++++++++++++++- nexus/db-schema/src/schema.rs | 13 + schema/crdb/dbinit.sql | 21 ++ 5 files changed, 347 insertions(+), 9 deletions(-) diff --git a/nexus/db-model/src/deployment.rs b/nexus/db-model/src/deployment.rs index b427ba6cc20..be8c47e140f 100644 --- a/nexus/db-model/src/deployment.rs +++ b/nexus/db-model/src/deployment.rs @@ -5,12 +5,13 @@ //! Types for representing the deployed software and configuration in the //! database -use crate::inventory::ZoneType; +use crate::inventory::{SpMgsSlot, SpType, ZoneType}; use crate::omicron_zone_config::{self, OmicronZoneNic}; use crate::typed_uuid::DbTypedUuid; use crate::{ - ArtifactHash, ByteCount, DbOximeterReadMode, Generation, MacAddr, Name, - SledState, SqlU8, SqlU16, SqlU32, TufArtifact, impl_enum_type, ipv6, + ArtifactHash, ByteCount, DbArtifactVersion, DbOximeterReadMode, Generation, + MacAddr, Name, SledState, SqlU8, SqlU16, SqlU32, TufArtifact, + impl_enum_type, ipv6, }; use anyhow::{Context, Result, anyhow, bail}; use chrono::{DateTime, Utc}; @@ -21,10 +22,10 @@ use nexus_db_schema::schema::{ bp_clickhouse_keeper_zone_id_to_node_id, bp_clickhouse_server_zone_id_to_node_id, bp_omicron_dataset, bp_omicron_physical_disk, bp_omicron_zone, bp_omicron_zone_nic, - bp_oximeter_read_policy, bp_sled_metadata, bp_target, + bp_oximeter_read_policy, bp_pending_mgs_update_sp, bp_sled_metadata, + bp_target, }; use nexus_sled_agent_shared::inventory::OmicronZoneDataset; -use nexus_types::deployment::BlueprintDatasetDisposition; use nexus_types::deployment::BlueprintPhysicalDiskConfig; use nexus_types::deployment::BlueprintPhysicalDiskDisposition; use nexus_types::deployment::BlueprintTarget; @@ -33,14 +34,18 @@ use nexus_types::deployment::BlueprintZoneDisposition; use nexus_types::deployment::BlueprintZoneType; use nexus_types::deployment::ClickhouseClusterConfig; use nexus_types::deployment::CockroachDbPreserveDowngrade; +use nexus_types::deployment::PendingMgsUpdate; +use nexus_types::deployment::PendingMgsUpdateDetails; use nexus_types::deployment::{ BlueprintDatasetConfig, BlueprintZoneImageVersion, OximeterReadMode, }; +use nexus_types::deployment::{BlueprintDatasetDisposition, ExpectedVersion}; use nexus_types::deployment::{BlueprintZoneImageSource, blueprint_zone_type}; use nexus_types::deployment::{ OmicronZoneExternalFloatingAddr, OmicronZoneExternalFloatingIp, OmicronZoneExternalSnatIp, }; +use nexus_types::inventory::BaseboardId; use omicron_common::api::internal::shared::NetworkInterface; use omicron_common::disk::DiskIdentity; use omicron_common::zpool_name::ZpoolName; @@ -50,6 +55,7 @@ use omicron_uuid_kinds::{ PhysicalDiskKind, SledKind, SledUuid, ZpoolKind, ZpoolUuid, }; use std::net::{IpAddr, SocketAddrV6}; +use std::sync::Arc; use uuid::Uuid; /// See [`nexus_types::deployment::Blueprint`]. @@ -1239,3 +1245,37 @@ impl BpOximeterReadPolicy { } } } + +#[derive(Queryable, Clone, Debug, Selectable, Insertable)] +#[diesel(table_name = bp_pending_mgs_update_sp)] +pub struct BpPendingMgsUpdateSp { + pub blueprint_id: DbTypedUuid, + pub hw_baseboard_id: Uuid, + pub sp_type: SpType, + pub sp_slot: SpMgsSlot, + pub artifact_sha256: ArtifactHash, + pub artifact_version: DbArtifactVersion, + pub expected_active_version: DbArtifactVersion, + pub expected_inactive_version: Option, +} + +impl BpPendingMgsUpdateSp { + pub fn into_generic(self, baseboard_id: Arc) -> PendingMgsUpdate { + PendingMgsUpdate { + baseboard_id, + sp_type: self.sp_type.into(), + slot_id: u32::from(**self.sp_slot), + artifact_hash: self.artifact_sha256.into(), + artifact_version: (*self.artifact_version).clone(), + details: PendingMgsUpdateDetails::Sp { + expected_active_version: (*self.expected_active_version) + .clone(), + expected_inactive_version: match self.expected_inactive_version + { + Some(v) => ExpectedVersion::Version((*v).clone()), + None => ExpectedVersion::NoValidVersion, + }, + }, + } + } +} diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index e0a0910f2aa..31d1161355c 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -233,6 +233,7 @@ pub use switch_interface::*; pub use switch_port::*; pub use target_release::*; pub use tuf_repo::*; +pub use typed_uuid::DbTypedUuid; pub use typed_uuid::to_db_typed_uuid; pub use upstairs_repair::*; pub use user_builtin::*; diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index 1df74355b8f..ae4fe5a53fb 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -26,6 +26,7 @@ use diesel::NullableExpressionMethods; use diesel::OptionalExtension; use diesel::QueryDsl; use diesel::RunQueryDsl; +use diesel::Table; use diesel::expression::SelectableHelper; use diesel::pg::Pg; use diesel::query_builder::AstPass; @@ -34,6 +35,7 @@ use diesel::query_builder::QueryId; use diesel::result::DatabaseErrorKind; use diesel::result::Error as DieselError; use diesel::sql_types; +use diesel::sql_types::Nullable; use futures::FutureExt; use id_map::IdMap; use nexus_db_errors::ErrorHandler; @@ -41,6 +43,7 @@ use nexus_db_errors::OptionalError; use nexus_db_errors::TransactionError; use nexus_db_errors::public_error_from_diesel; use nexus_db_lookup::DbConnection; +use nexus_db_model::ArtifactHash; use nexus_db_model::Blueprint as DbBlueprint; use nexus_db_model::BpClickhouseClusterConfig; use nexus_db_model::BpClickhouseKeeperZoneIdToNodeId; @@ -50,18 +53,29 @@ use nexus_db_model::BpOmicronPhysicalDisk; use nexus_db_model::BpOmicronZone; use nexus_db_model::BpOmicronZoneNic; use nexus_db_model::BpOximeterReadPolicy; +use nexus_db_model::BpPendingMgsUpdateSp; use nexus_db_model::BpSledMetadata; use nexus_db_model::BpTarget; +use nexus_db_model::DbArtifactVersion; +use nexus_db_model::DbTypedUuid; +use nexus_db_model::HwBaseboardId; +use nexus_db_model::SpMgsSlot; +use nexus_db_model::SpType; +use nexus_db_model::SqlU16; use nexus_db_model::TufArtifact; use nexus_db_model::to_db_typed_uuid; +use nexus_db_schema::enums::SpTypeEnum; use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintMetadata; use nexus_types::deployment::BlueprintSledConfig; use nexus_types::deployment::BlueprintTarget; use nexus_types::deployment::ClickhouseClusterConfig; use nexus_types::deployment::CockroachDbPreserveDowngrade; +use nexus_types::deployment::ExpectedVersion; use nexus_types::deployment::OximeterReadMode; +use nexus_types::deployment::PendingMgsUpdateDetails; use nexus_types::deployment::PendingMgsUpdates; +use nexus_types::inventory::BaseboardId; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; use omicron_common::api::external::Generation; @@ -74,6 +88,9 @@ use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::SledUuid; use std::collections::BTreeMap; +use std::collections::BTreeSet; +use std::sync::Arc; +use thiserror::Error; use tufaceous_artifact::KnownArtifactKind; use uuid::Uuid; @@ -299,6 +316,22 @@ impl DataStore { &blueprint.oximeter_read_mode, ); + #[derive(Debug, Error)] + enum TxnError { + #[error("database error")] + Diesel(#[from] DieselError), + + #[error( + "aborting transaction after unexpectedly inserting {count} rows \ + for baseboard {baseboard_id:?} into {table_name}" + )] + BadInsertCount { + table_name: &'static str, + baseboard_id: Arc, + count: usize, + }, + } + // This implementation inserts all records associated with the // blueprint in one transaction. This is required: we don't want // any planner or executor to see a half-inserted blueprint, nor do we @@ -410,11 +443,132 @@ impl DataStore { .await?; } + // Insert pending MGS updates for service processors for this + // blueprint. These include foreign keys into the hw_baseboard_id + // table that we don't have handy. To achieve this, we use the same + // pattern used during inventory insertion: + // INSERT INTO bp_pending_mgs_update_sp + // SELECT + // id + // [other column values as literals] + // FROM hw_baseboard_id + // WHERE part_number = ... AND serial_number = ...; + // + // This way, we don't need to know the id. The database looks it up + // for us as it does the INSERT. + for update in &blueprint.pending_mgs_updates + { + // Right now, we only implement support for storing SP updates. + let PendingMgsUpdateDetails::Sp { + expected_active_version, + expected_inactive_version + } = &update.details else { + continue; + }; + + use nexus_db_schema::schema::hw_baseboard_id::dsl as baseboard_dsl; + use nexus_db_schema::schema::bp_pending_mgs_update_sp::dsl as update_dsl; + let selection = nexus_db_schema::schema::hw_baseboard_id::table + .select(( + DbTypedUuid::from(blueprint_id).into_sql::(), + baseboard_dsl::id, + SpType::from(update.sp_type).into_sql::(), + // XXX-dap size of int + SpMgsSlot::from(SqlU16::from(u16::try_from(update.slot_id).unwrap())).into_sql::(), + ArtifactHash::from(update.artifact_hash).into_sql::(), + DbArtifactVersion::from(update.artifact_version.clone()).into_sql::(), + DbArtifactVersion::from(expected_active_version.clone()).into_sql::(), + match expected_inactive_version { + ExpectedVersion::NoValidVersion => None, + ExpectedVersion::Version(v) => Some(DbArtifactVersion::from(v.clone())), + } + .into_sql::>(), + )) + .filter( + baseboard_dsl::part_number + .eq(update.baseboard_id.part_number.clone()), + ) + .filter( + baseboard_dsl::serial_number + .eq(update.baseboard_id.serial_number.clone()), + ); + let count = + diesel::insert_into(update_dsl::bp_pending_mgs_update_sp) + .values(selection) + .into_columns(( + update_dsl::blueprint_id, + update_dsl::hw_baseboard_id, + update_dsl::sp_type, + update_dsl::sp_slot, + update_dsl::artifact_sha256, + update_dsl::artifact_version, + update_dsl::expected_active_version, + update_dsl::expected_inactive_version, + )) + .execute_async(&conn) + .await?; + if count != 1 { + // This should be impossible in practice. We will insert + // however many rows matched the `baseboard_id` parts of + // the query above. It can't be more than one 1 because + // we've filtered on a pair of columns that are unique + // together. It could only be 0 if the baseboard id had + // never been seen before in an inventory collection. But + // in that case, how did we manage to construct a blueprint + // with it? + // + // This could happen in the test suite or with + // `reconfigurator-cli`, which both let you create any + // blueprint you like. In the test suite, the test just has + // to deal with this behavior (e.g., by inserting an + // inventory collection containing this SP). With + // `reconfigurator-cli`, this amounts to user error. + error!(&opctx.log, + "blueprint insertion: unexpectedly tried to insert wrong number of + rows into bp_pending_mgs_update_sp (aborting transaction)"; + "count" => count, + &update.baseboard_id, + ); + return Err(TxnError::BadInsertCount { + table_name: "bp_pending_mgs_update_sp", + count, + baseboard_id: update.baseboard_id.clone(), + }); + } + + // This statement is just here to force a compilation error + // if the set of columns in `bp_pending_mgs_update_sp` + // changes because that will affect the correctness of the + // above statement. + // + // If you're here because of a compile error, you might be + // changing the `bp_pending_mgs_update_sp` table. Update + // the statement below and be sure to update the code above, + // too! + let ( + _blueprint_id, + _hw_baseboard_id, + _sp_type, + _sp_slot, + _artifact_sha256, + _artifact_version, + _expected_active_version, + _expected_inactive_version, + ) = update_dsl::bp_pending_mgs_update_sp::all_columns(); + } + Ok(()) }) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + .map_err(|e| match e { + TxnError::Diesel(e) => public_error_from_diesel(e, ErrorHandler::Server), + e @ TxnError::BadInsertCount { .. } => { + // This variant is always an internal error and has no causes so + // we don't need to use InlineErrorChain here. + Error::internal_error(&e.to_string()) + } + })?; info!( &opctx.log, @@ -940,11 +1094,96 @@ impl DataStore { } }; + // Load all pending SP updates. + // + // Pagination is a little silly here because we will only allow one at a + // time in practice for a while, but it's easy enough to do. + let mut pending_updates_sp = Vec::new(); + { + use nexus_db_schema::schema::bp_pending_mgs_update_sp::dsl; + + let mut paginator = Paginator::new(SQL_BATCH_SIZE); + while let Some(p) = paginator.next() { + let batch = paginated( + dsl::bp_pending_mgs_update_sp, + dsl::hw_baseboard_id, + &p.current_pagparams(), + ) + .filter(dsl::blueprint_id.eq(to_db_typed_uuid(blueprint_id))) + .select(BpPendingMgsUpdateSp::as_select()) + .load_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + + paginator = p.found_batch(&batch, &|d| d.hw_baseboard_id); + for row in batch { + pending_updates_sp.push(row); + } + } + } + + // Collect the unique baseboard ids referenced by pending updates. + let baseboard_id_ids: BTreeSet<_> = + pending_updates_sp.iter().map(|s| s.hw_baseboard_id).collect(); + // XXX-dap could commonize with inventory + // Fetch the corresponding baseboard records. + let baseboards_by_id: BTreeMap<_, _> = { + use nexus_db_schema::schema::hw_baseboard_id::dsl; + + let mut bbs = BTreeMap::new(); + + let mut paginator = Paginator::new(SQL_BATCH_SIZE); + while let Some(p) = paginator.next() { + let batch = paginated( + dsl::hw_baseboard_id, + dsl::id, + &p.current_pagparams(), + ) + .filter(dsl::id.eq_any(baseboard_id_ids.clone())) + .select(HwBaseboardId::as_select()) + .load_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + paginator = p.found_batch(&batch, &|row| row.id); + bbs.extend( + batch + .into_iter() + .map(|bb| (bb.id, Arc::new(BaseboardId::from(bb)))), + ); + } + + bbs + }; + + // Combine this information to assemble the set of pending MGS updates. + let mut pending_mgs_updates = PendingMgsUpdates::new(); + for row in pending_updates_sp { + let Some(baseboard) = baseboards_by_id.get(&row.hw_baseboard_id) + else { + // This should be impossible. + return Err(Error::internal_error(&format!( + "loading blueprint {}: missing baseboard that we should have fetched: {}", + blueprint_id, row.hw_baseboard_id + ))); + }; + + let update = row.into_generic(baseboard.clone()); + if let Some(previous) = pending_mgs_updates.insert(update) { + // This should be impossible. + return Err(Error::internal_error(&format!( + "blueprint {}: found multiple pending updates for baseboard {:?}", + blueprint_id, previous.baseboard_id + ))); + } + } + Ok(Blueprint { id: blueprint_id, - // TODO these need to be serialized to the database. - // See oxidecomputer/omicron#7981. - pending_mgs_updates: PendingMgsUpdates::new(), + pending_mgs_updates, sleds: sled_configs, parent_blueprint_id, internal_dns_version, @@ -1870,6 +2109,7 @@ mod tests { use nexus_types::deployment::BlueprintZoneImageVersion; use nexus_types::deployment::BlueprintZoneType; use nexus_types::deployment::OmicronZoneExternalFloatingIp; + use nexus_types::deployment::PendingMgsUpdate; use nexus_types::deployment::PlanningInput; use nexus_types::deployment::PlanningInputBuilder; use nexus_types::deployment::SledDetails; @@ -2314,6 +2554,21 @@ mod tests { .unwrap(); } + // Configure an SP update. + let (baseboard_id, sp) = + collection.sps.iter().next().expect("at least one SP"); + builder.pending_mgs_update_insert(PendingMgsUpdate { + baseboard_id: baseboard_id.clone(), + sp_type: sp.sp_type, + slot_id: u32::from(sp.sp_slot), + details: PendingMgsUpdateDetails::Sp { + expected_active_version: "1.0.0".parse().unwrap(), + expected_inactive_version: ExpectedVersion::NoValidVersion, + }, + artifact_hash: ArtifactHash([72; 32]), + artifact_version: "2.0.0".parse().unwrap(), + }); + let num_new_ntp_zones = 1; let num_new_crucible_zones = new_sled_zpools.len(); let num_new_sled_zones = num_new_ntp_zones + num_new_crucible_zones; @@ -2346,6 +2601,14 @@ mod tests { assert_all_zones_in_service(&blueprint2); assert_eq!(blueprint2.parent_blueprint_id, Some(blueprint1.id)); + // This blueprint contains a PendingMgsUpdate that references an SP from + // `collection`. This must already be present in the database for + // blueprint insertion to work. + datastore + .inventory_insert_collection(&opctx, &collection) + .await + .expect("failed to insert inventory collection"); + // Check that we can write it to the DB and read it back. datastore .blueprint_insert(&opctx, &blueprint2) diff --git a/nexus/db-schema/src/schema.rs b/nexus/db-schema/src/schema.rs index 42f0cf191c3..1ef5f69c11e 100644 --- a/nexus/db-schema/src/schema.rs +++ b/nexus/db-schema/src/schema.rs @@ -1960,6 +1960,19 @@ table! { } } +table! { + bp_pending_mgs_update_sp (blueprint_id, hw_baseboard_id) { + blueprint_id -> Uuid, + hw_baseboard_id -> Uuid, + sp_type -> crate::enums::SpTypeEnum, + sp_slot -> Int4, + artifact_sha256 -> Text, + artifact_version -> Text, + expected_active_version -> Text, + expected_inactive_version -> Nullable, + } +} + table! { bootstore_keys (key, generation) { key -> Text, diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 6e2dd8502f8..3829a3b9cc7 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -4412,6 +4412,27 @@ CREATE TABLE IF NOT EXISTS omicron.public.bp_oximeter_read_policy ( oximeter_read_mode omicron.public.oximeter_read_mode NOT NULL ); +-- Blueprint information related to pending SP upgrades. +CREATE TABLE IF NOT EXISTS omicron.public.bp_pending_mgs_update_sp ( + -- Foreign key into the `blueprint` table + blueprint_id UUID, + -- identify of the device to be updated + -- (foreign key into the `hw_baseboard_id` table) + hw_baseboard_id UUID NOT NULL, + -- location of this device according to MGS + sp_type omicron.public.sp_type NOT NULL, + sp_slot INT4 NOT NULL, + -- artifact to be deployed to this device + artifact_sha256 STRING(64) NOT NULL, + artifact_version STRING(64) NOT NULL, + + -- SP-specific details + expected_active_version STRING NOT NULL, + expected_inactive_version STRING, -- NULL means invalid (no version expected) + + PRIMARY KEY(blueprint_id, hw_baseboard_id) +); + -- Mapping of Omicron zone ID to CockroachDB node ID. This isn't directly used -- by the blueprint tables above, but is used by the more general Reconfigurator -- system along with them (e.g., to decommission expunged CRDB nodes). From 86f4605e7780a4cef444f2ddf449a5b6a0ef8639 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Fri, 6 Jun 2025 14:25:15 -0700 Subject: [PATCH 02/14] rustfmt --- nexus/db-model/src/deployment.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/nexus/db-model/src/deployment.rs b/nexus/db-model/src/deployment.rs index be8c47e140f..28443427ada 100644 --- a/nexus/db-model/src/deployment.rs +++ b/nexus/db-model/src/deployment.rs @@ -1260,7 +1260,10 @@ pub struct BpPendingMgsUpdateSp { } impl BpPendingMgsUpdateSp { - pub fn into_generic(self, baseboard_id: Arc) -> PendingMgsUpdate { + pub fn into_generic( + self, + baseboard_id: Arc, + ) -> PendingMgsUpdate { PendingMgsUpdate { baseboard_id, sp_type: self.sp_type.into(), From f538adfefba0b6616bd526c55e398a8f2b70455d Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Wed, 18 Jun 2025 14:38:52 -0700 Subject: [PATCH 03/14] clean up blueprint insert style --- .../db-queries/src/db/datastore/deployment.rs | 451 +++++++++++------- 1 file changed, 267 insertions(+), 184 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index 9123d652571..a5a252beca0 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -353,222 +353,305 @@ impl DataStore { #[allow(clippy::disallowed_methods)] self.transaction_non_retry_wrapper("blueprint_insert") .transaction(&conn, |conn| async move { - // Insert the row for the blueprint. - { - use nexus_db_schema::schema::blueprint::dsl; - let _: usize = diesel::insert_into(dsl::blueprint) - .values(row_blueprint) - .execute_async(&conn) - .await?; - } - - // Insert all the sled states for this blueprint. - { - use nexus_db_schema::schema::bp_sled_metadata::dsl as sled_metadata; + // Insert the row for the blueprint. + { + use nexus_db_schema::schema::blueprint::dsl; + let _: usize = diesel::insert_into(dsl::blueprint) + .values(row_blueprint) + .execute_async(&conn) + .await?; + } - let _ = diesel::insert_into(sled_metadata::bp_sled_metadata) - .values(sled_metadatas) - .execute_async(&conn) - .await?; - } + // Insert all the sled states for this blueprint. + { + // Skip formatting this line to prevent rustfmt bailing out. + #[rustfmt::skip] + use nexus_db_schema::schema::bp_sled_metadata::dsl + as sled_metadata; + let _ = + diesel::insert_into(sled_metadata::bp_sled_metadata) + .values(sled_metadatas) + .execute_async(&conn) + .await?; + } - // Insert all physical disks for this blueprint. - { - use nexus_db_schema::schema::bp_omicron_physical_disk::dsl as omicron_disk; - let _ = diesel::insert_into(omicron_disk::bp_omicron_physical_disk) + // Insert all physical disks for this blueprint. + { + // Skip formatting this line to prevent rustfmt bailing out. + #[rustfmt::skip] + use nexus_db_schema::schema::bp_omicron_physical_disk::dsl + as omicron_disk; + let _ = diesel::insert_into( + omicron_disk::bp_omicron_physical_disk, + ) .values(omicron_physical_disks) .execute_async(&conn) .await?; - } + } - // Insert all datasets for this blueprint. - { - use nexus_db_schema::schema::bp_omicron_dataset::dsl as omicron_dataset; - let _ = diesel::insert_into(omicron_dataset::bp_omicron_dataset) + // Insert all datasets for this blueprint. + { + // Skip formatting this line to prevent rustfmt bailing out. + #[rustfmt::skip] + use nexus_db_schema::schema::bp_omicron_dataset::dsl + as omicron_dataset; + let _ = diesel::insert_into( + omicron_dataset::bp_omicron_dataset, + ) .values(omicron_datasets) .execute_async(&conn) .await?; - } - - // Insert all the Omicron zones for this blueprint. - { - use nexus_db_schema::schema::bp_omicron_zone::dsl as omicron_zone; - let _ = diesel::insert_into(omicron_zone::bp_omicron_zone) - .values(omicron_zones) - .execute_async(&conn) - .await?; - } + } - { - use nexus_db_schema::schema::bp_omicron_zone_nic::dsl as omicron_zone_nic; - let _ = - diesel::insert_into(omicron_zone_nic::bp_omicron_zone_nic) - .values(omicron_zone_nics) + // Insert all the Omicron zones for this blueprint. + { + // Skip formatting this line to prevent rustfmt bailing out. + #[rustfmt::skip] + use nexus_db_schema::schema::bp_omicron_zone::dsl + as omicron_zone; + let _ = diesel::insert_into(omicron_zone::bp_omicron_zone) + .values(omicron_zones) .execute_async(&conn) .await?; - } - - // Insert all clickhouse cluster related tables if necessary - if let Some((clickhouse_cluster_config, keepers, servers)) = clickhouse_tables { - { - use nexus_db_schema::schema::bp_clickhouse_cluster_config::dsl; - let _ = diesel::insert_into(dsl::bp_clickhouse_cluster_config) - .values(clickhouse_cluster_config) - .execute_async(&conn) - .await?; } + { - use nexus_db_schema::schema::bp_clickhouse_keeper_zone_id_to_node_id::dsl; - let _ = diesel::insert_into(dsl::bp_clickhouse_keeper_zone_id_to_node_id) - .values(keepers) + // Skip formatting this line to prevent rustfmt bailing out. + #[rustfmt::skip] + use nexus_db_schema::schema::bp_omicron_zone_nic::dsl as + omicron_zone_nic; + let _ = diesel::insert_into( + omicron_zone_nic::bp_omicron_zone_nic, + ) + .values(omicron_zone_nics) .execute_async(&conn) .await?; } + + // Insert all clickhouse cluster related tables if necessary. + if let Some((clickhouse_cluster_config, keepers, servers)) = + clickhouse_tables { - use nexus_db_schema::schema::bp_clickhouse_server_zone_id_to_node_id::dsl; - let _ = diesel::insert_into(dsl::bp_clickhouse_server_zone_id_to_node_id) - .values(servers) - .execute_async(&conn) - .await?; + { + // Skip formatting this line to prevent rustfmt bailing + // out. + #[rustfmt::skip] + use nexus_db_schema::schema:: + bp_clickhouse_cluster_config::dsl; + let _ = diesel::insert_into( + dsl::bp_clickhouse_cluster_config, + ) + .values(clickhouse_cluster_config) + .execute_async(&conn) + .await?; + } + { + // Skip formatting this line to prevent rustfmt bailing + // out. + #[rustfmt::skip] + use nexus_db_schema::schema:: + bp_clickhouse_keeper_zone_id_to_node_id::dsl; + let _ = diesel::insert_into( + dsl::bp_clickhouse_keeper_zone_id_to_node_id, + ) + .values(keepers) + .execute_async(&conn) + .await?; + } + { + // Skip formatting this line to prevent rustfmt bailing + // out. + #[rustfmt::skip] + use nexus_db_schema::schema:: + bp_clickhouse_server_zone_id_to_node_id::dsl; + let _ = diesel::insert_into( + dsl::bp_clickhouse_server_zone_id_to_node_id, + ) + .values(servers) + .execute_async(&conn) + .await?; + } } - } - // Insert oximeter read policy for this blueprint - { - use nexus_db_schema::schema::bp_oximeter_read_policy::dsl; - let _ = - diesel::insert_into(dsl::bp_oximeter_read_policy) + // Insert oximeter read policy for this blueprint + { + use nexus_db_schema::schema::bp_oximeter_read_policy::dsl; + let _ = diesel::insert_into(dsl::bp_oximeter_read_policy) .values(oximeter_read_policy) .execute_async(&conn) .await?; - } + } - // Insert pending MGS updates for service processors for this - // blueprint. These include foreign keys into the hw_baseboard_id - // table that we don't have handy. To achieve this, we use the same - // pattern used during inventory insertion: - // INSERT INTO bp_pending_mgs_update_sp - // SELECT - // id - // [other column values as literals] - // FROM hw_baseboard_id - // WHERE part_number = ... AND serial_number = ...; - // - // This way, we don't need to know the id. The database looks it up - // for us as it does the INSERT. - for update in &blueprint.pending_mgs_updates - { - // Right now, we only implement support for storing SP updates. - let PendingMgsUpdateDetails::Sp { - expected_active_version, - expected_inactive_version - } = &update.details else { - continue; - }; + // Insert pending MGS updates for service processors for this + // blueprint. These include foreign keys into the hw_baseboard_id + // table that we don't have handy. To achieve this, we use the same + // pattern used during inventory insertion: + // INSERT INTO bp_pending_mgs_update_sp + // SELECT + // id + // [other column values as literals] + // FROM hw_baseboard_id + // WHERE part_number = ... AND serial_number = ...; + // + // This way, we don't need to know the id. The database looks it up + // for us as it does the INSERT. + + for update in &blueprint.pending_mgs_updates { + // Right now, we only implement support for storing SP + // updates. + let PendingMgsUpdateDetails::Sp { + expected_active_version, + expected_inactive_version, + } = &update.details + else { + continue; + }; - use nexus_db_schema::schema::hw_baseboard_id::dsl as baseboard_dsl; - use nexus_db_schema::schema::bp_pending_mgs_update_sp::dsl as update_dsl; - let selection = nexus_db_schema::schema::hw_baseboard_id::table - .select(( - DbTypedUuid::from(blueprint_id).into_sql::(), - baseboard_dsl::id, - SpType::from(update.sp_type).into_sql::(), - // XXX-dap size of int - SpMgsSlot::from(SqlU16::from(u16::try_from(update.slot_id).unwrap())).into_sql::(), - ArtifactHash::from(update.artifact_hash).into_sql::(), - DbArtifactVersion::from(update.artifact_version.clone()).into_sql::(), - DbArtifactVersion::from(expected_active_version.clone()).into_sql::(), + // slot_ids fit into a u16 in practice. This will be enforced + // at compile time shortly. See oxidecomputer/omicron#8378. + let update_slot_id = u16::try_from(update.slot_id) + .expect("slot id to fit into u16"); + + let db_blueprint_id = DbTypedUuid::from(blueprint_id) + .into_sql::( + ); + let db_sp_type = + SpType::from(update.sp_type).into_sql::(); + let db_slot_id = SpMgsSlot::from(SqlU16::from( + u16::try_from(update_slot_id).unwrap(), + )) + .into_sql::(); + let db_artifact_hash = + ArtifactHash::from(update.artifact_hash) + .into_sql::(); + let db_artifact_version = DbArtifactVersion::from( + update.artifact_version.clone(), + ) + .into_sql::(); + let db_expected_version = DbArtifactVersion::from( + expected_active_version.clone(), + ) + .into_sql::(); + let db_expected_inactive_version = match expected_inactive_version { ExpectedVersion::NoValidVersion => None, - ExpectedVersion::Version(v) => Some(DbArtifactVersion::from(v.clone())), + ExpectedVersion::Version(v) => { + Some(DbArtifactVersion::from(v.clone())) + } } - .into_sql::>(), - )) - .filter( - baseboard_dsl::part_number - .eq(update.baseboard_id.part_number.clone()), + .into_sql::>(); + + // Skip formatting several lines to prevent rustfmt bailing + // out. + #[rustfmt::skip] + use nexus_db_schema::schema::hw_baseboard_id::dsl + as baseboard_dsl; + #[rustfmt::skip] + use nexus_db_schema::schema::bp_pending_mgs_update_sp::dsl + as update_dsl; + let selection = + nexus_db_schema::schema::hw_baseboard_id::table + .select(( + db_blueprint_id, + baseboard_dsl::id, + db_slot_id, + db_artifact_hash, + db_artifact_version, + db_expected_version, + db_expected_inactive_version, + )) + .filter( + baseboard_dsl::part_number.eq(update + .baseboard_id + .part_number + .clone()), + ) + .filter( + baseboard_dsl::serial_number.eq(update + .baseboard_id + .serial_number + .clone()), + ); + let count = diesel::insert_into( + update_dsl::bp_pending_mgs_update_sp, ) - .filter( - baseboard_dsl::serial_number - .eq(update.baseboard_id.serial_number.clone()), - ); - let count = - diesel::insert_into(update_dsl::bp_pending_mgs_update_sp) - .values(selection) - .into_columns(( - update_dsl::blueprint_id, - update_dsl::hw_baseboard_id, - update_dsl::sp_type, - update_dsl::sp_slot, - update_dsl::artifact_sha256, - update_dsl::artifact_version, - update_dsl::expected_active_version, - update_dsl::expected_inactive_version, - )) - .execute_async(&conn) - .await?; - if count != 1 { - // This should be impossible in practice. We will insert - // however many rows matched the `baseboard_id` parts of - // the query above. It can't be more than one 1 because - // we've filtered on a pair of columns that are unique - // together. It could only be 0 if the baseboard id had - // never been seen before in an inventory collection. But - // in that case, how did we manage to construct a blueprint - // with it? + .values(selection) + .into_columns(( + update_dsl::blueprint_id, + update_dsl::hw_baseboard_id, + update_dsl::sp_type, + update_dsl::sp_slot, + update_dsl::artifact_sha256, + update_dsl::artifact_version, + update_dsl::expected_active_version, + update_dsl::expected_inactive_version, + )) + .execute_async(&conn) + .await?; + if count != 1 { + // This should be impossible in practice. We will + // insert however many rows matched the `baseboard_id` + // parts of the query above. It can't be more than one + // 1 because we've filtered on a pair of columns that + // are unique together. It could only be 0 if the + // baseboard id had never been seen before in an + // inventory collection. But in that case, how did we + // manage to construct a blueprint with it? + // + // This could happen in the test suite or with + // `reconfigurator-cli`, which both let you create any + // blueprint you like. In the test suite, the test just + // has to deal with this behavior (e.g., by inserting an + // inventory collection containing this SP). With + // `reconfigurator-cli`, this amounts to user error. + error!(&opctx.log, + "blueprint insertion: unexpectedly tried to insert \ + wrong number of rows into \ + bp_pending_mgs_update_sp (aborting transaction)"; + "count" => count, + &update.baseboard_id, + ); + return Err(TxnError::BadInsertCount { + table_name: "bp_pending_mgs_update_sp", + count, + baseboard_id: update.baseboard_id.clone(), + }); + } + + // This statement is just here to force a compilation error + // if the set of columns in `bp_pending_mgs_update_sp` + // changes because that will affect the correctness of the + // above statement. // - // This could happen in the test suite or with - // `reconfigurator-cli`, which both let you create any - // blueprint you like. In the test suite, the test just has - // to deal with this behavior (e.g., by inserting an - // inventory collection containing this SP). With - // `reconfigurator-cli`, this amounts to user error. - error!(&opctx.log, - "blueprint insertion: unexpectedly tried to insert wrong number of - rows into bp_pending_mgs_update_sp (aborting transaction)"; - "count" => count, - &update.baseboard_id, - ); - return Err(TxnError::BadInsertCount { - table_name: "bp_pending_mgs_update_sp", - count, - baseboard_id: update.baseboard_id.clone(), - }); + // If you're here because of a compile error, you might be + // changing the `bp_pending_mgs_update_sp` table. Update + // the statement below and be sure to update the code above, + // too! + let ( + _blueprint_id, + _hw_baseboard_id, + _sp_type, + _sp_slot, + _artifact_sha256, + _artifact_version, + _expected_active_version, + _expected_inactive_version, + ) = update_dsl::bp_pending_mgs_update_sp::all_columns(); } - // This statement is just here to force a compilation error - // if the set of columns in `bp_pending_mgs_update_sp` - // changes because that will affect the correctness of the - // above statement. - // - // If you're here because of a compile error, you might be - // changing the `bp_pending_mgs_update_sp` table. Update - // the statement below and be sure to update the code above, - // too! - let ( - _blueprint_id, - _hw_baseboard_id, - _sp_type, - _sp_slot, - _artifact_sha256, - _artifact_version, - _expected_active_version, - _expected_inactive_version, - ) = update_dsl::bp_pending_mgs_update_sp::all_columns(); - } - - Ok(()) - - }) - .await - .map_err(|e| match e { - TxnError::Diesel(e) => public_error_from_diesel(e, ErrorHandler::Server), - e @ TxnError::BadInsertCount { .. } => { - // This variant is always an internal error and has no causes so - // we don't need to use InlineErrorChain here. - Error::internal_error(&e.to_string()) - } - })?; + Ok(()) + }) + .await + .map_err(|e| match e { + TxnError::Diesel(e) => { + public_error_from_diesel(e, ErrorHandler::Server) + } + e @ TxnError::BadInsertCount { .. } => { + // This variant is always an internal error and has no + // causes so we don't need to use InlineErrorChain here. + Error::internal_error(&e.to_string()) + } + })?; info!( &opctx.log, From 31a882c6e62d058c16271766425bcf6612b7152a Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Thu, 19 Jun 2025 08:55:33 -0700 Subject: [PATCH 04/14] rustfmt bails out in blueprint_insert_on_connection() --- .../db-queries/src/db/datastore/deployment.rs | 181 +++++++++++------- 1 file changed, 110 insertions(+), 71 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index 93a71bec015..961af8aa460 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -320,101 +320,140 @@ impl DataStore { #[allow(clippy::disallowed_methods)] self.transaction_non_retry_wrapper("blueprint_insert") .transaction(&conn, |conn| async move { - // Insert the row for the blueprint. - { - use nexus_db_schema::schema::blueprint::dsl; - let _: usize = diesel::insert_into(dsl::blueprint) - .values(row_blueprint) - .execute_async(&conn) - .await?; - } - - // Insert all the sled states for this blueprint. - { - use nexus_db_schema::schema::bp_sled_metadata::dsl as sled_metadata; + // Insert the row for the blueprint. + { + use nexus_db_schema::schema::blueprint::dsl; + let _: usize = diesel::insert_into(dsl::blueprint) + .values(row_blueprint) + .execute_async(&conn) + .await?; + } - let _ = diesel::insert_into(sled_metadata::bp_sled_metadata) - .values(sled_metadatas) - .execute_async(&conn) - .await?; - } + // Insert all the sled states for this blueprint. + { + // Skip formatting this line to prevent rustfmt bailing out. + #[rustfmt::skip] + use nexus_db_schema::schema::bp_sled_metadata::dsl + as sled_metadata; + + let _ = + diesel::insert_into(sled_metadata::bp_sled_metadata) + .values(sled_metadatas) + .execute_async(&conn) + .await?; + } - // Insert all physical disks for this blueprint. - { - use nexus_db_schema::schema::bp_omicron_physical_disk::dsl as omicron_disk; - let _ = diesel::insert_into(omicron_disk::bp_omicron_physical_disk) + // Insert all physical disks for this blueprint. + { + // Skip formatting this line to prevent rustfmt bailing out. + #[rustfmt::skip] + use nexus_db_schema::schema::bp_omicron_physical_disk::dsl + as omicron_disk; + let _ = diesel::insert_into( + omicron_disk::bp_omicron_physical_disk, + ) .values(omicron_physical_disks) .execute_async(&conn) .await?; - } + } - // Insert all datasets for this blueprint. - { - use nexus_db_schema::schema::bp_omicron_dataset::dsl as omicron_dataset; - let _ = diesel::insert_into(omicron_dataset::bp_omicron_dataset) + // Insert all datasets for this blueprint. + { + // Skip formatting this line to prevent rustfmt bailing out. + #[rustfmt::skip] + use nexus_db_schema::schema::bp_omicron_dataset::dsl + as omicron_dataset; + let _ = diesel::insert_into( + omicron_dataset::bp_omicron_dataset, + ) .values(omicron_datasets) .execute_async(&conn) .await?; - } - - // Insert all the Omicron zones for this blueprint. - { - use nexus_db_schema::schema::bp_omicron_zone::dsl as omicron_zone; - let _ = diesel::insert_into(omicron_zone::bp_omicron_zone) - .values(omicron_zones) - .execute_async(&conn) - .await?; - } + } - { - use nexus_db_schema::schema::bp_omicron_zone_nic::dsl as omicron_zone_nic; - let _ = - diesel::insert_into(omicron_zone_nic::bp_omicron_zone_nic) - .values(omicron_zone_nics) + // Insert all the Omicron zones for this blueprint. + { + // Skip formatting this line to prevent rustfmt bailing out. + #[rustfmt::skip] + use nexus_db_schema::schema::bp_omicron_zone::dsl + as omicron_zone; + let _ = diesel::insert_into(omicron_zone::bp_omicron_zone) + .values(omicron_zones) .execute_async(&conn) .await?; - } - - // Insert all clickhouse cluster related tables if necessary - if let Some((clickhouse_cluster_config, keepers, servers)) = clickhouse_tables { - { - use nexus_db_schema::schema::bp_clickhouse_cluster_config::dsl; - let _ = diesel::insert_into(dsl::bp_clickhouse_cluster_config) - .values(clickhouse_cluster_config) - .execute_async(&conn) - .await?; } + { - use nexus_db_schema::schema::bp_clickhouse_keeper_zone_id_to_node_id::dsl; - let _ = diesel::insert_into(dsl::bp_clickhouse_keeper_zone_id_to_node_id) - .values(keepers) + // Skip formatting this line to prevent rustfmt bailing out. + #[rustfmt::skip] + use nexus_db_schema::schema::bp_omicron_zone_nic::dsl + as omicron_zone_nic; + let _ = diesel::insert_into( + omicron_zone_nic::bp_omicron_zone_nic, + ) + .values(omicron_zone_nics) .execute_async(&conn) .await?; } + + // Insert all clickhouse cluster related tables if necessary + if let Some((clickhouse_cluster_config, keepers, servers)) = + clickhouse_tables { - use nexus_db_schema::schema::bp_clickhouse_server_zone_id_to_node_id::dsl; - let _ = diesel::insert_into(dsl::bp_clickhouse_server_zone_id_to_node_id) - .values(servers) - .execute_async(&conn) - .await?; + { + // Skip formatting this line to prevent rustfmt bailing + // out. + #[rustfmt::skip] + use nexus_db_schema::schema:: + bp_clickhouse_cluster_config::dsl; + let _ = diesel::insert_into( + dsl::bp_clickhouse_cluster_config, + ) + .values(clickhouse_cluster_config) + .execute_async(&conn) + .await?; + } + { + // Skip formatting this line to prevent rustfmt bailing + // out. + #[rustfmt::skip] + use nexus_db_schema::schema:: + bp_clickhouse_keeper_zone_id_to_node_id::dsl; + let _ = diesel::insert_into( + dsl::bp_clickhouse_keeper_zone_id_to_node_id, + ) + .values(keepers) + .execute_async(&conn) + .await?; + } + { + // Skip formatting this line to prevent rustfmt bailing + // out. + #[rustfmt::skip] + use nexus_db_schema::schema:: + bp_clickhouse_server_zone_id_to_node_id::dsl; + let _ = diesel::insert_into( + dsl::bp_clickhouse_server_zone_id_to_node_id, + ) + .values(servers) + .execute_async(&conn) + .await?; + } } - } - // Insert oximeter read policy for this blueprint - { - use nexus_db_schema::schema::bp_oximeter_read_policy::dsl; - let _ = - diesel::insert_into(dsl::bp_oximeter_read_policy) + // Insert oximeter read policy for this blueprint + { + use nexus_db_schema::schema::bp_oximeter_read_policy::dsl; + let _ = diesel::insert_into(dsl::bp_oximeter_read_policy) .values(oximeter_read_policy) .execute_async(&conn) .await?; - } - - Ok(()) + } - }) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + Ok(()) + }) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; info!( &opctx.log, From dd3267aeefe58436619eb2d8793664a76ef31c5b Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Thu, 19 Jun 2025 09:08:17 -0700 Subject: [PATCH 05/14] unnecessary diffs --- nexus/db-queries/src/db/datastore/deployment.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index a5a252beca0..a8165cea340 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -368,6 +368,7 @@ impl DataStore { #[rustfmt::skip] use nexus_db_schema::schema::bp_sled_metadata::dsl as sled_metadata; + let _ = diesel::insert_into(sled_metadata::bp_sled_metadata) .values(sled_metadatas) @@ -418,8 +419,8 @@ impl DataStore { { // Skip formatting this line to prevent rustfmt bailing out. #[rustfmt::skip] - use nexus_db_schema::schema::bp_omicron_zone_nic::dsl as - omicron_zone_nic; + use nexus_db_schema::schema::bp_omicron_zone_nic::dsl + as omicron_zone_nic; let _ = diesel::insert_into( omicron_zone_nic::bp_omicron_zone_nic, ) From 0c72f01718c5887191d6f4146fe39e718454a7a5 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Thu, 19 Jun 2025 09:12:57 -0700 Subject: [PATCH 06/14] review feedback --- .../db-queries/src/db/datastore/deployment.rs | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index a8165cea340..ef033304e68 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -500,13 +500,20 @@ impl DataStore { for update in &blueprint.pending_mgs_updates { // Right now, we only implement support for storing SP // updates. - let PendingMgsUpdateDetails::Sp { - expected_active_version, - expected_inactive_version, - } = &update.details - else { - continue; - }; + let (expected_active_version, expected_inactive_version) = + match &update.details { + PendingMgsUpdateDetails::Sp { + expected_active_version, + expected_inactive_version, + } => ( + expected_active_version, + expected_inactive_version, + ), + PendingMgsUpdateDetails::Rot { .. } + | PendingMgsUpdateDetails::RotBootloader { + .. + } => continue, + }; // slot_ids fit into a u16 in practice. This will be enforced // at compile time shortly. See oxidecomputer/omicron#8378. From e63d9bd89cc7546664183cda4d1ab71c7770c8d8 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Thu, 19 Jun 2025 09:22:43 -0700 Subject: [PATCH 07/14] fixes --- nexus/db-queries/src/db/datastore/deployment.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index ef033304e68..5dd5a1f6dee 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -525,10 +525,9 @@ impl DataStore { ); let db_sp_type = SpType::from(update.sp_type).into_sql::(); - let db_slot_id = SpMgsSlot::from(SqlU16::from( - u16::try_from(update_slot_id).unwrap(), - )) - .into_sql::(); + let db_slot_id = + SpMgsSlot::from(SqlU16::from(update_slot_id)) + .into_sql::(); let db_artifact_hash = ArtifactHash::from(update.artifact_hash) .into_sql::(); @@ -562,6 +561,7 @@ impl DataStore { .select(( db_blueprint_id, baseboard_dsl::id, + db_sp_type, db_slot_id, db_artifact_hash, db_artifact_version, From 1feb3c8c5edaa005b10c6f215e97baa493dd63de Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Thu, 19 Jun 2025 09:27:52 -0700 Subject: [PATCH 08/14] more nits --- .../db-queries/src/db/datastore/deployment.rs | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index 5dd5a1f6dee..f6711069fbe 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -484,9 +484,11 @@ impl DataStore { } // Insert pending MGS updates for service processors for this - // blueprint. These include foreign keys into the hw_baseboard_id - // table that we don't have handy. To achieve this, we use the same - // pattern used during inventory insertion: + // blueprint. These include foreign keys into the + // hw_baseboard_id table that we don't have handy. To achieve + // this, we use the same pattern used during inventory + // insertion: + // // INSERT INTO bp_pending_mgs_update_sp // SELECT // id @@ -494,8 +496,8 @@ impl DataStore { // FROM hw_baseboard_id // WHERE part_number = ... AND serial_number = ...; // - // This way, we don't need to know the id. The database looks it up - // for us as it does the INSERT. + // This way, we don't need to know the id. The database looks + // it up for us as it does the INSERT. for update in &blueprint.pending_mgs_updates { // Right now, we only implement support for storing SP @@ -515,8 +517,9 @@ impl DataStore { } => continue, }; - // slot_ids fit into a u16 in practice. This will be enforced - // at compile time shortly. See oxidecomputer/omicron#8378. + // slot_ids fit into a u16 in practice. This will be + // enforced at compile time shortly. See + // oxidecomputer/omicron#8378. let update_slot_id = u16::try_from(update.slot_id) .expect("slot id to fit into u16"); @@ -1220,7 +1223,6 @@ impl DataStore { // Collect the unique baseboard ids referenced by pending updates. let baseboard_id_ids: BTreeSet<_> = pending_updates_sp.iter().map(|s| s.hw_baseboard_id).collect(); - // XXX-dap could commonize with inventory // Fetch the corresponding baseboard records. let baseboards_by_id: BTreeMap<_, _> = { use nexus_db_schema::schema::hw_baseboard_id::dsl; @@ -1259,7 +1261,8 @@ impl DataStore { else { // This should be impossible. return Err(Error::internal_error(&format!( - "loading blueprint {}: missing baseboard that we should have fetched: {}", + "loading blueprint {}: missing baseboard that we should \ + have fetched: {}", blueprint_id, row.hw_baseboard_id ))); }; @@ -1268,7 +1271,8 @@ impl DataStore { if let Some(previous) = pending_mgs_updates.insert(update) { // This should be impossible. return Err(Error::internal_error(&format!( - "blueprint {}: found multiple pending updates for baseboard {:?}", + "blueprint {}: found multiple pending updates for \ + baseboard {:?}", blueprint_id, previous.baseboard_id ))); } From 948c6b6d6e1baf47b27733d4d8dedad7034c6261 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Thu, 19 Jun 2025 09:32:08 -0700 Subject: [PATCH 09/14] add schema migration --- nexus/db-model/src/schema_versions.rs | 3 ++- schema/crdb/add-pending-mgs-updates/up.sql | 12 ++++++++++++ schema/crdb/dbinit.sql | 2 +- 3 files changed, 15 insertions(+), 2 deletions(-) create mode 100644 schema/crdb/add-pending-mgs-updates/up.sql diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index bd270fb4c78..034c666f91e 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -16,7 +16,7 @@ use std::{collections::BTreeMap, sync::LazyLock}; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: Version = Version::new(150, 0, 0); +pub const SCHEMA_VERSION: Version = Version::new(151, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -28,6 +28,7 @@ static KNOWN_VERSIONS: LazyLock> = LazyLock::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(151, "add-pending-mgs-updates"), KnownVersion::new(150, "add-last-reconciliation-orphaned-datasets"), KnownVersion::new(149, "bp-add-target-release-min-gen"), KnownVersion::new(148, "clean-misplaced-m2s"), diff --git a/schema/crdb/add-pending-mgs-updates/up.sql b/schema/crdb/add-pending-mgs-updates/up.sql new file mode 100644 index 00000000000..58719a3e4cb --- /dev/null +++ b/schema/crdb/add-pending-mgs-updates/up.sql @@ -0,0 +1,12 @@ +CREATE TABLE IF NOT EXISTS omicron.public.bp_pending_mgs_update_sp ( + blueprint_id UUID, + hw_baseboard_id UUID NOT NULL, + sp_slot INT4 NOT NULL, + artifact_sha256 STRING(64) NOT NULL, + artifact_version STRING(64) NOT NULL, + + expected_active_version STRING NOT NULL, + expected_inactive_version STRING, + + PRIMARY KEY(blueprint_id, hw_baseboard_id) +); diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index b443db4902b..5949510189b 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -5781,7 +5781,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '150.0.0', NULL) + (TRUE, NOW(), NOW(), '151.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; From 94e9c2c791a942bb42a8506b5be7fb937c4d1269 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Thu, 19 Jun 2025 10:44:34 -0700 Subject: [PATCH 10/14] fix brokenness --- schema/crdb/add-pending-mgs-updates/up.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/schema/crdb/add-pending-mgs-updates/up.sql b/schema/crdb/add-pending-mgs-updates/up.sql index 58719a3e4cb..28fa784d537 100644 --- a/schema/crdb/add-pending-mgs-updates/up.sql +++ b/schema/crdb/add-pending-mgs-updates/up.sql @@ -1,6 +1,7 @@ CREATE TABLE IF NOT EXISTS omicron.public.bp_pending_mgs_update_sp ( blueprint_id UUID, hw_baseboard_id UUID NOT NULL, + sp_type omicron.public.sp_type NOT NULL, sp_slot INT4 NOT NULL, artifact_sha256 STRING(64) NOT NULL, artifact_version STRING(64) NOT NULL, From 06b321541199c6c885f189f5234b32d26e233777 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Wed, 25 Jun 2025 19:31:09 -0700 Subject: [PATCH 11/14] fix deletion and make sure the test catches it --- .../db-queries/src/db/datastore/deployment.rs | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index e4be1bc8714..397cd16ce0b 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -1327,6 +1327,7 @@ impl DataStore { nclickhouse_keepers, nclickhouse_servers, noximeter_policy, + npending_mgs_updates_sp, ) = self .transaction_retry_wrapper("blueprint_delete") .transaction(&conn, |conn| { @@ -1509,6 +1510,21 @@ impl DataStore { .await? }; + let npending_mgs_updates_sp = { + // Skip rustfmt because it bails out on this long line. + #[rustfmt::skip] + use nexus_db_schema::schema:: + bp_pending_mgs_update_sp::dsl; + diesel::delete( + dsl::bp_pending_mgs_update_sp.filter( + dsl::blueprint_id + .eq(to_db_typed_uuid(blueprint_id)), + ), + ) + .execute_async(&conn) + .await? + }; + Ok(( nblueprints, nsled_metadata, @@ -1520,6 +1536,7 @@ impl DataStore { nclickhouse_keepers, nclickhouse_servers, noximeter_policy, + npending_mgs_updates_sp, )) } }) @@ -1541,6 +1558,7 @@ impl DataStore { "nclickhouse_keepers" => nclickhouse_keepers, "nclickhouse_servers" => nclickhouse_servers, "noximeter_policy" => noximeter_policy, + "npending_mgs_updates_sp" => npending_mgs_updates_sp, ); Ok(()) @@ -2378,6 +2396,7 @@ mod tests { query_count!(bp_clickhouse_keeper_zone_id_to_node_id, blueprint_id), query_count!(bp_clickhouse_server_zone_id_to_node_id, blueprint_id), query_count!(bp_oximeter_read_policy, blueprint_id), + query_count!(bp_pending_mgs_update_sp, blueprint_id), ] { let count: i64 = result.unwrap(); assert_eq!( @@ -2881,6 +2900,34 @@ mod tests { [blueprint2.id] ); + // blueprint2 is more interesting in terms of containing a variety of + // different blueprint structures. We want to try deleting that. To do + // that, we have to create a new blueprint and make that one the target. + let blueprint3 = BlueprintBuilder::new_based_on( + &logctx.log, + &blueprint2, + &planning_input, + &collection, + "dummy", + ) + .expect("failed to create builder") + .build(); + datastore + .blueprint_insert(&opctx, &blueprint3) + .await + .expect("failed to insert blueprint"); + let bp3_target = BlueprintTarget { + target_id: blueprint3.id, + enabled: true, + time_made_target: now_db_precision(), + }; + datastore + .blueprint_target_set_current(&opctx, bp3_target) + .await + .unwrap(); + datastore.blueprint_delete(&opctx, &authz_blueprint2).await.unwrap(); + ensure_blueprint_fully_deleted(&datastore, blueprint2.id).await; + // Clean up. db.terminate().await; logctx.cleanup_successful(); From afc003243ef1a31173125adc4b6e09cd30faf8e1 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Thu, 26 Jun 2025 17:34:45 -0700 Subject: [PATCH 12/14] Nexus must hang onto qorb resolvers used for MGS updates --- nexus/src/app/mod.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index c1916dc2763..de6b3c78110 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -269,6 +269,18 @@ pub struct Nexus { /// reports status of pending MGS-managed updates mgs_update_status_rx: watch::Receiver, + + /// DNS resolver used by MgsUpdateDriver for MGS + // We don't need to do anything with this, but we can't let it be dropped + // while Nexus is running. + #[allow(dead_code)] + mgs_resolver: Box, + + /// DNS resolver used by MgsUpdateDriver for Repo Depot + // We don't need to do anything with this, but we can't let it be dropped + // while Nexus is running. + #[allow(dead_code)] + repo_depot_resolver: Box, } impl Nexus { @@ -496,6 +508,8 @@ impl Nexus { )), tuf_artifact_replication_tx, mgs_update_status_rx, + mgs_resolver, + repo_depot_resolver, }; // TODO-cleanup all the extra Arcs here seems wrong From 1f900dbb4fc2b720c40961be3bacdc0bb534760c Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Fri, 27 Jun 2025 08:26:11 -0700 Subject: [PATCH 13/14] go buildomat go From 83c1a8f8eb6be3a2c2fdf33906dbc551e451df9c Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Mon, 30 Jun 2025 14:59:38 -0700 Subject: [PATCH 14/14] fix --- nexus/db-model/src/schema_versions.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index 25882b883af..20f01cee5fa 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -16,7 +16,7 @@ use std::{collections::BTreeMap, sync::LazyLock}; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: Version = Version::new(153, 0, 0); +pub const SCHEMA_VERSION: Version = Version::new(154, 0, 0); /// List of all past database schema versions, in *reverse* order ///