diff --git a/Cargo.lock b/Cargo.lock index 3418cf459ef..0d30e296902 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6646,6 +6646,7 @@ dependencies = [ "dropshot", "expectorate", "gateway-client", + "gateway-types", "id-map", "illumos-utils", "indexmap 2.9.0", @@ -10552,6 +10553,7 @@ dependencies = [ "datatest-stable", "dropshot", "expectorate", + "gateway-types", "humantime", "iddqd", "indent_write", diff --git a/dev-tools/reconfigurator-cli/Cargo.toml b/dev-tools/reconfigurator-cli/Cargo.toml index da04874dd4a..af80ae4f021 100644 --- a/dev-tools/reconfigurator-cli/Cargo.toml +++ b/dev-tools/reconfigurator-cli/Cargo.toml @@ -22,6 +22,7 @@ iddqd.workspace = true indent_write.workspace = true internal-dns-types.workspace = true itertools.workspace = true +gateway-types.workspace = true newtype-uuid.workspace = true nexus-inventory.workspace = true nexus-reconfigurator-blippy.workspace = true diff --git a/dev-tools/reconfigurator-cli/src/lib.rs b/dev-tools/reconfigurator-cli/src/lib.rs index 75dfb0ddc31..d7df9784168 100644 --- a/dev-tools/reconfigurator-cli/src/lib.rs +++ b/dev-tools/reconfigurator-cli/src/lib.rs @@ -209,6 +209,7 @@ fn process_command( Commands::SledRemove(args) => cmd_sled_remove(sim, args), Commands::SledShow(args) => cmd_sled_show(sim, args), Commands::SledSetPolicy(args) => cmd_sled_set_policy(sim, args), + Commands::SledUpdateRot(args) => cmd_sled_update_rot(sim, args), Commands::SledUpdateSp(args) => cmd_sled_update_sp(sim, args), Commands::SiloList => cmd_silo_list(sim), Commands::SiloAdd(args) => cmd_silo_add(sim, args), @@ -263,6 +264,8 @@ enum Commands { SledShow(SledArgs), /// set a sled's policy SledSetPolicy(SledSetPolicyArgs), + /// simulate updating the sled's RoT versions + SledUpdateRot(SledUpdateRotArgs), /// simulate updating the sled's SP versions SledUpdateSp(SledUpdateSpArgs), @@ -390,6 +393,24 @@ struct SledUpdateSpArgs { inactive: Option, } +#[derive(Debug, Args)] +struct SledUpdateRotArgs { + /// id of the sled + sled_id: SledUuid, + + /// sets the version reported for the RoT slot a + #[clap(long, required_unless_present_any = &["slot_b"])] + slot_a: Option, + + /// sets the version reported for the RoT slot b + #[clap(long, required_unless_present_any = &["slot_a"])] + slot_b: Option, + // TODO: In a follow up PR we could set other fields as well. + // They would be useful to simulate failures. + // These would be: active_slot, persistent_boot_preference, + // transient_boot_preference and pending_persistent_boot_preference. +} + #[derive(Debug, Args)] struct SledRemoveArgs { /// id of the sled @@ -914,6 +935,15 @@ fn cmd_sled_show( let sled_id = args.sled_id; let sp_active_version = description.sled_sp_active_version(sled_id)?; let sp_inactive_version = description.sled_sp_inactive_version(sled_id)?; + let rot_active_slot = description.sled_rot_active_slot(sled_id)?; + let rot_slot_a_version = description.sled_rot_slot_a_version(sled_id)?; + let rot_slot_b_version = description.sled_rot_slot_b_version(sled_id)?; + let rot_persistent_boot_preference = + description.sled_rot_persistent_boot_preference(sled_id)?; + let rot_pending_persistent_boot_preference = + description.sled_rot_pending_persistent_boot_preference(sled_id)?; + let rot_transient_boot_preference = + description.sled_rot_transient_boot_preference(sled_id)?; let planning_input = description .to_planning_input_builder() .context("failed to generate planning_input builder")? @@ -926,6 +956,24 @@ fn cmd_sled_show( swriteln!(s, "subnet {}", sled_resources.subnet.net()); swriteln!(s, "SP active version: {:?}", sp_active_version); swriteln!(s, "SP inactive version: {:?}", sp_inactive_version); + swriteln!(s, "RoT active slot: {}", rot_active_slot); + swriteln!(s, "RoT slot A version: {:?}", rot_slot_a_version); + swriteln!(s, "RoT slot B version: {:?}", rot_slot_b_version); + swriteln!( + s, + "RoT persistent boot preference: {}", + rot_persistent_boot_preference + ); + swriteln!( + s, + "RoT pending persistent boot preference: {:?}", + rot_pending_persistent_boot_preference + ); + swriteln!( + s, + "RoT transient boot preference: {:?}", + rot_transient_boot_preference + ); swriteln!(s, "zpools ({}):", sled_resources.zpools.len()); for (zpool, disk) in &sled_resources.zpools { swriteln!(s, " {:?}", zpool); @@ -993,6 +1041,47 @@ fn cmd_sled_update_sp( ))) } +fn cmd_sled_update_rot( + sim: &mut ReconfiguratorSim, + args: SledUpdateRotArgs, +) -> anyhow::Result> { + let mut labels = Vec::new(); + + if let Some(slot_a) = &args.slot_a { + labels.push(format!("slot a -> {}", slot_a)); + } + if let Some(slot_b) = &args.slot_b { + labels.push(format!("slot b -> {}", slot_b)); + } + + assert!( + !labels.is_empty(), + "clap configuration requires that at least one argument is specified" + ); + + let mut state = sim.current_state().to_mut(); + state.system_mut().description_mut().sled_update_rot_versions( + args.sled_id, + args.slot_a, + args.slot_b, + )?; + + sim.commit_and_bump( + format!( + "reconfigurator-cli sled-update-rot: {}: {}", + args.sled_id, + labels.join(", "), + ), + state, + ); + + Ok(Some(format!( + "set sled {} RoT settings: {}", + args.sled_id, + labels.join(", ") + ))) +} + fn cmd_inventory_list( sim: &mut ReconfiguratorSim, ) -> anyhow::Result> { diff --git a/dev-tools/reconfigurator-cli/tests/input/target-release.txt b/dev-tools/reconfigurator-cli/tests/input/target-release.txt index 1e224e8f892..4984dacc555 100644 --- a/dev-tools/reconfigurator-cli/tests/input/target-release.txt +++ b/dev-tools/reconfigurator-cli/tests/input/target-release.txt @@ -28,7 +28,7 @@ sled-list blueprint-list inventory-list -# First step: upgrade one SP. +# First step: upgrade one RoT. blueprint-plan dbcbd3d6-41ff-48ae-ac0b-1becc9b2fd21 f45ba181-4b56-42cc-a762-874d90184a43 blueprint-diff dbcbd3d6-41ff-48ae-ac0b-1becc9b2fd21 8da82a8e-bf97-4fbd-8ddd-9f6462732cf1 @@ -36,33 +36,57 @@ blueprint-diff dbcbd3d6-41ff-48ae-ac0b-1becc9b2fd21 8da82a8e-bf97-4fbd-8ddd-9f64 blueprint-plan 8da82a8e-bf97-4fbd-8ddd-9f6462732cf1 f45ba181-4b56-42cc-a762-874d90184a43 blueprint-diff 8da82a8e-bf97-4fbd-8ddd-9f6462732cf1 58d5e830-0884-47d8-a7cd-b2b3751adeb4 -# Now, update the simulated SP to reflect that the update completed. +# Now, update the simulated RoT to reflect that the update completed. # Collect inventory from it and use that collection for another planning step. # This should report that the update completed, remove that update, and add one -# for another sled. -sled-update-sp 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6 --active 1.0.0 +# for an SP on the same sled. +sled-update-rot 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6 --slot-a 1.0.0 inventory-generate blueprint-plan 58d5e830-0884-47d8-a7cd-b2b3751adeb4 eb0796d5-ab8a-4f7b-a884-b4aeacb8ab51 blueprint-diff 58d5e830-0884-47d8-a7cd-b2b3751adeb4 af934083-59b5-4bf6-8966-6fb5292c29e1 -# This time, make it more interesting. Change the inactive slot contents of -# the simulated SP. This should make the configured update impossible and cause -# the planner to fix it. -sled-update-sp 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c --inactive 0.5.0 +# After the RoT update has completed, we update the simulated SP to reflect that +# update has completed as well. +# Like before, collect inventory from it and use that collection for the next step. +# This should report that the update completed, remove that update, and add one +# for another sled. +sled-update-sp 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6 --active 1.0.0 inventory-generate blueprint-plan af934083-59b5-4bf6-8966-6fb5292c29e1 61f451b3-2121-4ed6-91c7-a550054f6c21 blueprint-diff af934083-59b5-4bf6-8966-6fb5292c29e1 df06bb57-ad42-4431-9206-abff322896c7 -# Now simulate the update completing successfully. -# Another planning step should try to update the last sled. -sled-update-sp 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c --active 1.0.0 +# This time, make it more interesting. Change the inactive slot contents of +# the simulated RoT. This should make the configured update impossible and cause +# the planner to fix it. +sled-update-rot 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c --slot-b 0.5.0 inventory-generate blueprint-plan df06bb57-ad42-4431-9206-abff322896c7 b1bda47d-2c19-4fba-96e3-d9df28db7436 blueprint-diff df06bb57-ad42-4431-9206-abff322896c7 7f976e0d-d2a5-4eeb-9e82-c82bc2824aba -# Finish updating the last sled and do one more planning run. -# There should be nothing left to do. -sled-update-sp d81c6a84-79b8-4958-ae41-ea46c9b19763 --active 1.0.0 +# Now simulate the update completing successfully. +# Like before, we should see a pending SP update for this sled. +sled-update-rot 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c --slot-a 1.0.0 inventory-generate blueprint-plan 7f976e0d-d2a5-4eeb-9e82-c82bc2824aba a71f7a73-35a6-45e8-acbe-f1c5925eed69 blueprint-diff 7f976e0d-d2a5-4eeb-9e82-c82bc2824aba 9034c710-3e57-45f3-99e5-4316145e87ac + +# Let's simulate the successful SP update as well. +# Another couple of planning steps should try to update the last sled. +sled-update-sp 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c --active 1.0.0 +inventory-generate +blueprint-plan 9034c710-3e57-45f3-99e5-4316145e87ac 0b5efbb3-0b1b-4bbf-b7d8-a2d6fca074c6 +blueprint-diff 9034c710-3e57-45f3-99e5-4316145e87ac d60afc57-f15d-476c-bd0f-b1071e2bb976 + +# Update the RoT on the last sled. +# There should be one last pending SP update. +sled-update-rot d81c6a84-79b8-4958-ae41-ea46c9b19763 --slot-a 1.0.0 +inventory-generate +blueprint-plan d60afc57-f15d-476c-bd0f-b1071e2bb976 78f72e8d-46a9-40a9-8618-602f54454d80 +blueprint-diff d60afc57-f15d-476c-bd0f-b1071e2bb976 a5a8f242-ffa5-473c-8efd-2acf2dc0b736 + +# Finish updating the last sled and do one more planning run. +# Now we should see there's nothing left to do! +sled-update-sp d81c6a84-79b8-4958-ae41-ea46c9b19763 --active 1.0.0 +inventory-generate +blueprint-plan a5a8f242-ffa5-473c-8efd-2acf2dc0b736 39363465-89ae-4ac2-9be1-099068da9d45 +blueprint-diff a5a8f242-ffa5-473c-8efd-2acf2dc0b736 626487fa-7139-45ec-8416-902271fc730b \ No newline at end of file diff --git a/dev-tools/reconfigurator-cli/tests/output/cmds-example-stdout b/dev-tools/reconfigurator-cli/tests/output/cmds-example-stdout index 50c14f73d0a..419d05f4019 100644 --- a/dev-tools/reconfigurator-cli/tests/output/cmds-example-stdout +++ b/dev-tools/reconfigurator-cli/tests/output/cmds-example-stdout @@ -42,6 +42,12 @@ serial serial1 subnet fd00:1122:3344:102::/64 SP active version: Some("0.0.1") SP inactive version: None +RoT active slot: A +RoT slot A version: Some("0.0.2") +RoT slot B version: None +RoT persistent boot preference: A +RoT pending persistent boot preference: None +RoT transient boot preference: None zpools (10): 055c4910-b641-46d9-b52d-313aae9d9cbf (zpool) SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-055c4910-b641-46d9-b52d-313aae9d9cbf" }, disk_id: 6a0cb52f-5cc2-48a5-9f44-ac8dea3ac45b (physical_disk), policy: InService, state: Active } @@ -403,6 +409,12 @@ serial serial0 subnet fd00:1122:3344:101::/64 SP active version: Some("0.0.1") SP inactive version: None +RoT active slot: A +RoT slot A version: Some("0.0.2") +RoT slot B version: None +RoT persistent boot preference: A +RoT pending persistent boot preference: None +RoT transient boot preference: None zpools (4): 0477165a-a72e-4814-b8d6-74aa02cb2040 (zpool) SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-0477165a-a72e-4814-b8d6-74aa02cb2040" }, disk_id: 6a5a31ab-4edc-44e0-a7a1-4190bfe582f7 (physical_disk), policy: InService, state: Active } diff --git a/dev-tools/reconfigurator-cli/tests/output/cmds-stdout b/dev-tools/reconfigurator-cli/tests/output/cmds-stdout index a9b9d834056..f96d91ed2d9 100644 --- a/dev-tools/reconfigurator-cli/tests/output/cmds-stdout +++ b/dev-tools/reconfigurator-cli/tests/output/cmds-stdout @@ -30,6 +30,12 @@ serial serial0 subnet fd00:1122:3344:101::/64 SP active version: Some("0.0.1") SP inactive version: None +RoT active slot: A +RoT slot A version: Some("0.0.2") +RoT slot B version: None +RoT persistent boot preference: A +RoT pending persistent boot preference: None +RoT transient boot preference: None zpools (10): 674c6591-11be-44f2-9df1-db3bb663ec01 (zpool) SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-674c6591-11be-44f2-9df1-db3bb663ec01" }, disk_id: a52a7c57-7fd0-4139-8293-bda299523c53 (physical_disk), policy: InService, state: Active } @@ -77,6 +83,12 @@ serial serial0 subnet fd00:1122:3344:101::/64 SP active version: Some("1.0.0") SP inactive version: None +RoT active slot: A +RoT slot A version: Some("0.0.2") +RoT slot B version: None +RoT persistent boot preference: A +RoT pending persistent boot preference: None +RoT transient boot preference: None zpools (10): 674c6591-11be-44f2-9df1-db3bb663ec01 (zpool) SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-674c6591-11be-44f2-9df1-db3bb663ec01" }, disk_id: a52a7c57-7fd0-4139-8293-bda299523c53 (physical_disk), policy: InService, state: Active } @@ -109,6 +121,12 @@ serial serial0 subnet fd00:1122:3344:101::/64 SP active version: Some("1.0.0") SP inactive version: Some("2.0.0") +RoT active slot: A +RoT slot A version: Some("0.0.2") +RoT slot B version: None +RoT persistent boot preference: A +RoT pending persistent boot preference: None +RoT transient boot preference: None zpools (10): 674c6591-11be-44f2-9df1-db3bb663ec01 (zpool) SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-674c6591-11be-44f2-9df1-db3bb663ec01" }, disk_id: a52a7c57-7fd0-4139-8293-bda299523c53 (physical_disk), policy: InService, state: Active } @@ -141,6 +159,12 @@ serial serial0 subnet fd00:1122:3344:101::/64 SP active version: Some("3.0.0") SP inactive version: Some("2.0.0") +RoT active slot: A +RoT slot A version: Some("0.0.2") +RoT slot B version: None +RoT persistent boot preference: A +RoT pending persistent boot preference: None +RoT transient boot preference: None zpools (10): 674c6591-11be-44f2-9df1-db3bb663ec01 (zpool) SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-674c6591-11be-44f2-9df1-db3bb663ec01" }, disk_id: a52a7c57-7fd0-4139-8293-bda299523c53 (physical_disk), policy: InService, state: Active } @@ -173,6 +197,12 @@ serial serial0 subnet fd00:1122:3344:101::/64 SP active version: Some("4.0.0") SP inactive version: None +RoT active slot: A +RoT slot A version: Some("0.0.2") +RoT slot B version: None +RoT persistent boot preference: A +RoT pending persistent boot preference: None +RoT transient boot preference: None zpools (10): 674c6591-11be-44f2-9df1-db3bb663ec01 (zpool) SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-674c6591-11be-44f2-9df1-db3bb663ec01" }, disk_id: a52a7c57-7fd0-4139-8293-bda299523c53 (physical_disk), policy: InService, state: Active } @@ -205,6 +235,12 @@ serial serial0 subnet fd00:1122:3344:101::/64 SP active version: Some("4.0.0") SP inactive version: Some("5.0.0") +RoT active slot: A +RoT slot A version: Some("0.0.2") +RoT slot B version: None +RoT persistent boot preference: A +RoT pending persistent boot preference: None +RoT transient boot preference: None zpools (10): 674c6591-11be-44f2-9df1-db3bb663ec01 (zpool) SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-674c6591-11be-44f2-9df1-db3bb663ec01" }, disk_id: a52a7c57-7fd0-4139-8293-bda299523c53 (physical_disk), policy: InService, state: Active } @@ -269,6 +305,12 @@ serial serial0 subnet fd00:1122:3344:101::/64 SP active version: Some("4.0.0") SP inactive version: Some("5.0.0") +RoT active slot: A +RoT slot A version: Some("0.0.2") +RoT slot B version: None +RoT persistent boot preference: A +RoT pending persistent boot preference: None +RoT transient boot preference: None zpools (10): 674c6591-11be-44f2-9df1-db3bb663ec01 (zpool) SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-674c6591-11be-44f2-9df1-db3bb663ec01" }, disk_id: a52a7c57-7fd0-4139-8293-bda299523c53 (physical_disk), policy: InService, state: Active } diff --git a/dev-tools/reconfigurator-cli/tests/output/target-release-stdout b/dev-tools/reconfigurator-cli/tests/output/target-release-stdout index 45e8e87bc67..f40e7c218cc 100644 --- a/dev-tools/reconfigurator-cli/tests/output/target-release-stdout +++ b/dev-tools/reconfigurator-cli/tests/output/target-release-stdout @@ -24,8 +24,8 @@ target release (generation 1): unset INFO extracting uploaded archive to INFO created directory to store extracted artifacts, path: INFO added artifact, name: SimGimletSp, kind: gimlet_sp, version: 1.0.0, hash: 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670, length: 747 -INFO added artifact, name: fake-gimlet-rot, kind: gimlet_rot_image_a, version: 1.0.0, hash: 04e4a7fdb84acca92c8fd3235e26d64ea61bef8a5f98202589fd346989c5720a, length: 735 -INFO added artifact, name: fake-gimlet-rot, kind: gimlet_rot_image_b, version: 1.0.0, hash: 04e4a7fdb84acca92c8fd3235e26d64ea61bef8a5f98202589fd346989c5720a, length: 735 +INFO added artifact, name: SimRot, kind: gimlet_rot_image_a, version: 1.0.0, hash: 04e4a7fdb84acca92c8fd3235e26d64ea61bef8a5f98202589fd346989c5720a, length: 735 +INFO added artifact, name: SimRot, kind: gimlet_rot_image_b, version: 1.0.0, hash: 04e4a7fdb84acca92c8fd3235e26d64ea61bef8a5f98202589fd346989c5720a, length: 735 INFO added artifact, name: fake-gimlet-rot-bootloader, kind: gimlet_rot_bootloader, version: 1.0.0, hash: 005ea358f1cd316df42465b1e3a0334ea22cc0c0442cf9ddf9b42fbf49780236, length: 750 INFO added artifact, name: fake-host, kind: host_phase_1, version: 1.0.0, hash: 2053f8594971bbf0a7326c833e2ffc12b065b9d823b9c0b967d275fa595e4e89, length: 524288 INFO added artifact, name: fake-host, kind: host_phase_2, version: 1.0.0, hash: f3dd0c7a1bd4500ea0d8bcf67581f576d47752b2f1998a4cb0f0c3155c483008, length: 1048576 @@ -53,8 +53,8 @@ external DNS generations: 1 target number of Nexus instances: default target release (generation 2): 1.0.0 (system-update-v1.0.0.zip) artifact: 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670 gimlet_sp (SimGimletSp version 1.0.0) - artifact: 04e4a7fdb84acca92c8fd3235e26d64ea61bef8a5f98202589fd346989c5720a gimlet_rot_image_a (fake-gimlet-rot version 1.0.0) - artifact: 04e4a7fdb84acca92c8fd3235e26d64ea61bef8a5f98202589fd346989c5720a gimlet_rot_image_b (fake-gimlet-rot version 1.0.0) + artifact: 04e4a7fdb84acca92c8fd3235e26d64ea61bef8a5f98202589fd346989c5720a gimlet_rot_image_a (SimRot version 1.0.0) + artifact: 04e4a7fdb84acca92c8fd3235e26d64ea61bef8a5f98202589fd346989c5720a gimlet_rot_image_b (SimRot version 1.0.0) artifact: 005ea358f1cd316df42465b1e3a0334ea22cc0c0442cf9ddf9b42fbf49780236 gimlet_rot_bootloader (fake-gimlet-rot-bootloader version 1.0.0) artifact: 2053f8594971bbf0a7326c833e2ffc12b065b9d823b9c0b967d275fa595e4e89 host_phase_1 (fake-host version 1.0.0) artifact: f3dd0c7a1bd4500ea0d8bcf67581f576d47752b2f1998a4cb0f0c3155c483008 host_phase_2 (fake-host version 1.0.0) @@ -117,8 +117,8 @@ external DNS generations: 1 target number of Nexus instances: default target release (generation 2): 1.0.0 (system-update-v1.0.0.zip) artifact: 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670 gimlet_sp (SimGimletSp version 1.0.0) - artifact: 04e4a7fdb84acca92c8fd3235e26d64ea61bef8a5f98202589fd346989c5720a gimlet_rot_image_a (fake-gimlet-rot version 1.0.0) - artifact: 04e4a7fdb84acca92c8fd3235e26d64ea61bef8a5f98202589fd346989c5720a gimlet_rot_image_b (fake-gimlet-rot version 1.0.0) + artifact: 04e4a7fdb84acca92c8fd3235e26d64ea61bef8a5f98202589fd346989c5720a gimlet_rot_image_a (SimRot version 1.0.0) + artifact: 04e4a7fdb84acca92c8fd3235e26d64ea61bef8a5f98202589fd346989c5720a gimlet_rot_image_b (SimRot version 1.0.0) artifact: 005ea358f1cd316df42465b1e3a0334ea22cc0c0442cf9ddf9b42fbf49780236 gimlet_rot_bootloader (fake-gimlet-rot-bootloader version 1.0.0) artifact: 2053f8594971bbf0a7326c833e2ffc12b065b9d823b9c0b967d275fa595e4e89 host_phase_1 (fake-host version 1.0.0) artifact: f3dd0c7a1bd4500ea0d8bcf67581f576d47752b2f1998a4cb0f0c3155c483008 host_phase_2 (fake-host version 1.0.0) @@ -156,7 +156,7 @@ ID NERRORS TIME_DONE f45ba181-4b56-42cc-a762-874d90184a43 0 -> # First step: upgrade one SP. +> # First step: upgrade one RoT. > blueprint-plan dbcbd3d6-41ff-48ae-ac0b-1becc9b2fd21 f45ba181-4b56-42cc-a762-874d90184a43 INFO sufficient BoundaryNtp zones exist in plan, desired_count: 0, current_count: 0 INFO sufficient Clickhouse zones exist in plan, desired_count: 1, current_count: 1 @@ -168,7 +168,7 @@ INFO sufficient InternalDns zones exist in plan, desired_count: 3, current_count INFO sufficient ExternalDns zones exist in plan, desired_count: 3, current_count: 3 INFO sufficient Nexus zones exist in plan, desired_count: 3, current_count: 3 INFO sufficient Oximeter zones exist in plan, desired_count: 0, current_count: 0 -INFO configuring SP update, artifact_version: 1.0.0, artifact_hash: 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670, expected_inactive_version: NoValidVersion, expected_active_version: 0.0.1, component: sp, sp_slot: 0, sp_type: Sled, serial_number: serial0, part_number: model0 +INFO configuring SP update, artifact_version: 1.0.0, artifact_hash: 04e4a7fdb84acca92c8fd3235e26d64ea61bef8a5f98202589fd346989c5720a, expected_transient_boot_preference: None, expected_pending_persistent_boot_preference: None, expected_persistent_boot_preference: A, expected_active_slot: ExpectedActiveRotSlot { slot: A, version: ArtifactVersion("0.0.2") }, expected_inactive_version: NoValidVersion, component: rot, sp_slot: 0, sp_type: Sled, serial_number: serial0, part_number: model0 INFO reached maximum number of pending SP updates, max: 1 INFO will ensure cockroachdb setting, setting: cluster.preserve_downgrade_option, value: DoNotModify generated blueprint 8da82a8e-bf97-4fbd-8ddd-9f6462732cf1 based on parent blueprint dbcbd3d6-41ff-48ae-ac0b-1becc9b2fd21 @@ -193,10 +193,10 @@ to: blueprint 8da82a8e-bf97-4fbd-8ddd-9f6462732cf1 PENDING MGS UPDATES: Pending MGS-managed updates (all baseboards): - ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - sp_type slot part_number serial_number artifact_hash artifact_version details - ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -+ sled 0 model0 serial0 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670 1.0.0 Sp { expected_active_version: ArtifactVersion("0.0.1"), expected_inactive_version: NoValidVersion } + ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + sp_type slot part_number serial_number artifact_hash artifact_version details + ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- ++ sled 0 model0 serial0 04e4a7fdb84acca92c8fd3235e26d64ea61bef8a5f98202589fd346989c5720a 1.0.0 Rot { expected_active_slot: ExpectedActiveRotSlot { slot: A, version: ArtifactVersion("0.0.2") }, expected_inactive_version: NoValidVersion, expected_persistent_boot_preference: A, expected_pending_persistent_boot_preference: None, expected_transient_boot_preference: None } internal DNS: @@ -350,7 +350,7 @@ INFO sufficient InternalDns zones exist in plan, desired_count: 3, current_count INFO sufficient ExternalDns zones exist in plan, desired_count: 3, current_count: 3 INFO sufficient Nexus zones exist in plan, desired_count: 3, current_count: 3 INFO sufficient Oximeter zones exist in plan, desired_count: 0, current_count: 0 -INFO SP update not yet completed (will keep it), artifact_version: 1.0.0, artifact_hash: 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670, expected_inactive_version: NoValidVersion, expected_active_version: 0.0.1, component: sp, sp_slot: 0, sp_type: Sled, serial_number: serial0, part_number: model0 +INFO SP update not yet completed (will keep it), artifact_version: 1.0.0, artifact_hash: 04e4a7fdb84acca92c8fd3235e26d64ea61bef8a5f98202589fd346989c5720a, expected_transient_boot_preference: None, expected_pending_persistent_boot_preference: None, expected_persistent_boot_preference: A, expected_active_slot: ExpectedActiveRotSlot { slot: A, version: ArtifactVersion("0.0.2") }, expected_inactive_version: NoValidVersion, component: rot, sp_slot: 0, sp_type: Sled, serial_number: serial0, part_number: model0 INFO reached maximum number of pending SP updates, max: 1 INFO will ensure cockroachdb setting, setting: cluster.preserve_downgrade_option, value: DoNotModify generated blueprint 58d5e830-0884-47d8-a7cd-b2b3751adeb4 based on parent blueprint 8da82a8e-bf97-4fbd-8ddd-9f6462732cf1 @@ -512,12 +512,12 @@ external DNS: -> # Now, update the simulated SP to reflect that the update completed. +> # Now, update the simulated RoT to reflect that the update completed. > # Collect inventory from it and use that collection for another planning step. > # This should report that the update completed, remove that update, and add one -> # for another sled. -> sled-update-sp 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6 --active 1.0.0 -set sled 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6 SP versions: active -> 1.0.0 +> # for an SP on the same sled. +> sled-update-rot 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6 --slot-a 1.0.0 +set sled 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6 RoT settings: slot a -> 1.0.0 > inventory-generate generated inventory collection eb0796d5-ab8a-4f7b-a884-b4aeacb8ab51 from configured sleds @@ -533,9 +533,8 @@ INFO sufficient InternalDns zones exist in plan, desired_count: 3, current_count INFO sufficient ExternalDns zones exist in plan, desired_count: 3, current_count: 3 INFO sufficient Nexus zones exist in plan, desired_count: 3, current_count: 3 INFO sufficient Oximeter zones exist in plan, desired_count: 0, current_count: 0 -INFO SP update completed (will remove it and re-evaluate board), artifact_version: 1.0.0, artifact_hash: 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670, expected_inactive_version: NoValidVersion, expected_active_version: 0.0.1, component: sp, sp_slot: 0, sp_type: Sled, serial_number: serial0, part_number: model0 -INFO skipping board for SP update, serial_number: serial0, part_number: model0 -INFO configuring SP update, artifact_version: 1.0.0, artifact_hash: 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670, expected_inactive_version: NoValidVersion, expected_active_version: 0.0.1, component: sp, sp_slot: 1, sp_type: Sled, serial_number: serial1, part_number: model1 +INFO SP update completed (will remove it and re-evaluate board), artifact_version: 1.0.0, artifact_hash: 04e4a7fdb84acca92c8fd3235e26d64ea61bef8a5f98202589fd346989c5720a, expected_transient_boot_preference: None, expected_pending_persistent_boot_preference: None, expected_persistent_boot_preference: A, expected_active_slot: ExpectedActiveRotSlot { slot: A, version: ArtifactVersion("0.0.2") }, expected_inactive_version: NoValidVersion, component: rot, sp_slot: 0, sp_type: Sled, serial_number: serial0, part_number: model0 +INFO configuring SP update, artifact_version: 1.0.0, artifact_hash: 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670, expected_inactive_version: NoValidVersion, expected_active_version: 0.0.1, component: sp, sp_slot: 0, sp_type: Sled, serial_number: serial0, part_number: model0 INFO reached maximum number of pending SP updates, max: 1 INFO will ensure cockroachdb setting, setting: cluster.preserve_downgrade_option, value: DoNotModify generated blueprint af934083-59b5-4bf6-8966-6fb5292c29e1 based on parent blueprint 58d5e830-0884-47d8-a7cd-b2b3751adeb4 @@ -560,11 +559,11 @@ to: blueprint af934083-59b5-4bf6-8966-6fb5292c29e1 PENDING MGS UPDATES: Pending MGS-managed updates (all baseboards): - ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - sp_type slot part_number serial_number artifact_hash artifact_version details - ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -- sled 0 model0 serial0 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670 1.0.0 Sp { expected_active_version: ArtifactVersion("0.0.1"), expected_inactive_version: NoValidVersion } -+ sled 1 model1 serial1 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670 1.0.0 Sp { expected_active_version: ArtifactVersion("0.0.1"), expected_inactive_version: NoValidVersion } + --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + sp_type slot part_number serial_number artifact_hash artifact_version details + --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +* sled 0 model0 serial0 - 04e4a7fdb84acca92c8fd3235e26d64ea61bef8a5f98202589fd346989c5720a 1.0.0 - Rot { expected_active_slot: ExpectedActiveRotSlot { slot: A, version: ArtifactVersion("0.0.2") }, expected_inactive_version: NoValidVersion, expected_persistent_boot_preference: A, expected_pending_persistent_boot_preference: None, expected_transient_boot_preference: None } + └─ + 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670 + Sp { expected_active_version: ArtifactVersion("0.0.1"), expected_inactive_version: NoValidVersion } internal DNS: @@ -706,11 +705,13 @@ external DNS: -> # This time, make it more interesting. Change the inactive slot contents of -> # the simulated SP. This should make the configured update impossible and cause -> # the planner to fix it. -> sled-update-sp 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c --inactive 0.5.0 -set sled 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c SP versions: inactive -> 0.5.0 +> # After the RoT update has completed, we update the simulated SP to reflect that +> # update has completed as well. +> # Like before, collect inventory from it and use that collection for the next step. +> # This should report that the update completed, remove that update, and add one +> # for another sled. +> sled-update-sp 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6 --active 1.0.0 +set sled 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6 SP versions: active -> 1.0.0 > inventory-generate generated inventory collection 61f451b3-2121-4ed6-91c7-a550054f6c21 from configured sleds @@ -726,8 +727,9 @@ INFO sufficient InternalDns zones exist in plan, desired_count: 3, current_count INFO sufficient ExternalDns zones exist in plan, desired_count: 3, current_count: 3 INFO sufficient Nexus zones exist in plan, desired_count: 3, current_count: 3 INFO sufficient Oximeter zones exist in plan, desired_count: 0, current_count: 0 -INFO SP update impossible (will remove it and re-evaluate board), artifact_version: 1.0.0, artifact_hash: 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670, expected_inactive_version: NoValidVersion, expected_active_version: 0.0.1, component: sp, sp_slot: 1, sp_type: Sled, serial_number: serial1, part_number: model1 -INFO configuring SP update, artifact_version: 1.0.0, artifact_hash: 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670, expected_inactive_version: Version(ArtifactVersion("0.5.0")), expected_active_version: 0.0.1, component: sp, sp_slot: 1, sp_type: Sled, serial_number: serial1, part_number: model1 +INFO SP update completed (will remove it and re-evaluate board), artifact_version: 1.0.0, artifact_hash: 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670, expected_inactive_version: NoValidVersion, expected_active_version: 0.0.1, component: sp, sp_slot: 0, sp_type: Sled, serial_number: serial0, part_number: model0 +INFO skipping board for SP update, serial_number: serial0, part_number: model0 +INFO configuring SP update, artifact_version: 1.0.0, artifact_hash: 04e4a7fdb84acca92c8fd3235e26d64ea61bef8a5f98202589fd346989c5720a, expected_transient_boot_preference: None, expected_pending_persistent_boot_preference: None, expected_persistent_boot_preference: A, expected_active_slot: ExpectedActiveRotSlot { slot: A, version: ArtifactVersion("0.0.2") }, expected_inactive_version: NoValidVersion, component: rot, sp_slot: 1, sp_type: Sled, serial_number: serial1, part_number: model1 INFO reached maximum number of pending SP updates, max: 1 INFO will ensure cockroachdb setting, setting: cluster.preserve_downgrade_option, value: DoNotModify generated blueprint df06bb57-ad42-4431-9206-abff322896c7 based on parent blueprint af934083-59b5-4bf6-8966-6fb5292c29e1 @@ -752,11 +754,11 @@ to: blueprint df06bb57-ad42-4431-9206-abff322896c7 PENDING MGS UPDATES: Pending MGS-managed updates (all baseboards): - ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - sp_type slot part_number serial_number artifact_hash artifact_version details - ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -* sled 1 model1 serial1 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670 1.0.0 - Sp { expected_active_version: ArtifactVersion("0.0.1"), expected_inactive_version: NoValidVersion } - └─ + Sp { expected_active_version: ArtifactVersion("0.0.1"), expected_inactive_version: Version(ArtifactVersion("0.5.0")) } + ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + sp_type slot part_number serial_number artifact_hash artifact_version details + ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +- sled 0 model0 serial0 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670 1.0.0 Sp { expected_active_version: ArtifactVersion("0.0.1"), expected_inactive_version: NoValidVersion } ++ sled 1 model1 serial1 04e4a7fdb84acca92c8fd3235e26d64ea61bef8a5f98202589fd346989c5720a 1.0.0 Rot { expected_active_slot: ExpectedActiveRotSlot { slot: A, version: ArtifactVersion("0.0.2") }, expected_inactive_version: NoValidVersion, expected_persistent_boot_preference: A, expected_pending_persistent_boot_preference: None, expected_transient_boot_preference: None } internal DNS: @@ -898,10 +900,11 @@ external DNS: -> # Now simulate the update completing successfully. -> # Another planning step should try to update the last sled. -> sled-update-sp 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c --active 1.0.0 -set sled 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c SP versions: active -> 1.0.0 +> # This time, make it more interesting. Change the inactive slot contents of +> # the simulated RoT. This should make the configured update impossible and cause +> # the planner to fix it. +> sled-update-rot 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c --slot-b 0.5.0 +set sled 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c RoT settings: slot b -> 0.5.0 > inventory-generate generated inventory collection b1bda47d-2c19-4fba-96e3-d9df28db7436 from configured sleds @@ -917,11 +920,9 @@ INFO sufficient InternalDns zones exist in plan, desired_count: 3, current_count INFO sufficient ExternalDns zones exist in plan, desired_count: 3, current_count: 3 INFO sufficient Nexus zones exist in plan, desired_count: 3, current_count: 3 INFO sufficient Oximeter zones exist in plan, desired_count: 0, current_count: 0 -INFO SP update completed (will remove it and re-evaluate board), artifact_version: 1.0.0, artifact_hash: 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670, expected_inactive_version: Version(ArtifactVersion("0.5.0")), expected_active_version: 0.0.1, component: sp, sp_slot: 1, sp_type: Sled, serial_number: serial1, part_number: model1 -INFO skipping board for SP update, serial_number: serial1, part_number: model1 -INFO skipping board for SP update, serial_number: serial0, part_number: model0 -INFO configuring SP update, artifact_version: 1.0.0, artifact_hash: 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670, expected_inactive_version: NoValidVersion, expected_active_version: 0.0.1, component: sp, sp_slot: 2, sp_type: Sled, serial_number: serial2, part_number: model2 -INFO ran out of boards for SP update +INFO SP update impossible (will remove it and re-evaluate board), artifact_version: 1.0.0, artifact_hash: 04e4a7fdb84acca92c8fd3235e26d64ea61bef8a5f98202589fd346989c5720a, expected_transient_boot_preference: None, expected_pending_persistent_boot_preference: None, expected_persistent_boot_preference: A, expected_active_slot: ExpectedActiveRotSlot { slot: A, version: ArtifactVersion("0.0.2") }, expected_inactive_version: NoValidVersion, component: rot, sp_slot: 1, sp_type: Sled, serial_number: serial1, part_number: model1 +INFO configuring SP update, artifact_version: 1.0.0, artifact_hash: 04e4a7fdb84acca92c8fd3235e26d64ea61bef8a5f98202589fd346989c5720a, expected_transient_boot_preference: None, expected_pending_persistent_boot_preference: None, expected_persistent_boot_preference: A, expected_active_slot: ExpectedActiveRotSlot { slot: A, version: ArtifactVersion("0.0.2") }, expected_inactive_version: Version(ArtifactVersion("0.5.0")), component: rot, sp_slot: 1, sp_type: Sled, serial_number: serial1, part_number: model1 +INFO reached maximum number of pending SP updates, max: 1 INFO will ensure cockroachdb setting, setting: cluster.preserve_downgrade_option, value: DoNotModify generated blueprint 7f976e0d-d2a5-4eeb-9e82-c82bc2824aba based on parent blueprint df06bb57-ad42-4431-9206-abff322896c7 @@ -945,11 +946,11 @@ to: blueprint 7f976e0d-d2a5-4eeb-9e82-c82bc2824aba PENDING MGS UPDATES: Pending MGS-managed updates (all baseboards): - ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - sp_type slot part_number serial_number artifact_hash artifact_version details - ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -- sled 1 model1 serial1 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670 1.0.0 Sp { expected_active_version: ArtifactVersion("0.0.1"), expected_inactive_version: Version(ArtifactVersion("0.5.0")) } -+ sled 2 model2 serial2 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670 1.0.0 Sp { expected_active_version: ArtifactVersion("0.0.1"), expected_inactive_version: NoValidVersion } + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + sp_type slot part_number serial_number artifact_hash artifact_version details + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +* sled 1 model1 serial1 04e4a7fdb84acca92c8fd3235e26d64ea61bef8a5f98202589fd346989c5720a 1.0.0 - Rot { expected_active_slot: ExpectedActiveRotSlot { slot: A, version: ArtifactVersion("0.0.2") }, expected_inactive_version: NoValidVersion, expected_persistent_boot_preference: A, expected_pending_persistent_boot_preference: None, expected_transient_boot_preference: None } + └─ + Rot { expected_active_slot: ExpectedActiveRotSlot { slot: A, version: ArtifactVersion("0.0.2") }, expected_inactive_version: Version(ArtifactVersion("0.5.0")), expected_persistent_boot_preference: A, expected_pending_persistent_boot_preference: None, expected_transient_boot_preference: None } internal DNS: @@ -1091,10 +1092,10 @@ external DNS: -> # Finish updating the last sled and do one more planning run. -> # There should be nothing left to do. -> sled-update-sp d81c6a84-79b8-4958-ae41-ea46c9b19763 --active 1.0.0 -set sled d81c6a84-79b8-4958-ae41-ea46c9b19763 SP versions: active -> 1.0.0 +> # Now simulate the update completing successfully. +> # Like before, we should see a pending SP update for this sled. +> sled-update-rot 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c --slot-a 1.0.0 +set sled 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c RoT settings: slot a -> 1.0.0 > inventory-generate generated inventory collection a71f7a73-35a6-45e8-acbe-f1c5925eed69 from configured sleds @@ -1110,12 +1111,9 @@ INFO sufficient InternalDns zones exist in plan, desired_count: 3, current_count INFO sufficient ExternalDns zones exist in plan, desired_count: 3, current_count: 3 INFO sufficient Nexus zones exist in plan, desired_count: 3, current_count: 3 INFO sufficient Oximeter zones exist in plan, desired_count: 0, current_count: 0 -INFO SP update completed (will remove it and re-evaluate board), artifact_version: 1.0.0, artifact_hash: 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670, expected_inactive_version: NoValidVersion, expected_active_version: 0.0.1, component: sp, sp_slot: 2, sp_type: Sled, serial_number: serial2, part_number: model2 -INFO skipping board for SP update, serial_number: serial2, part_number: model2 -INFO skipping board for SP update, serial_number: serial0, part_number: model0 -INFO skipping board for SP update, serial_number: serial1, part_number: model1 -INFO ran out of boards for SP update -INFO all zones up-to-date +INFO SP update completed (will remove it and re-evaluate board), artifact_version: 1.0.0, artifact_hash: 04e4a7fdb84acca92c8fd3235e26d64ea61bef8a5f98202589fd346989c5720a, expected_transient_boot_preference: None, expected_pending_persistent_boot_preference: None, expected_persistent_boot_preference: A, expected_active_slot: ExpectedActiveRotSlot { slot: A, version: ArtifactVersion("0.0.2") }, expected_inactive_version: Version(ArtifactVersion("0.5.0")), component: rot, sp_slot: 1, sp_type: Sled, serial_number: serial1, part_number: model1 +INFO configuring SP update, artifact_version: 1.0.0, artifact_hash: 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670, expected_inactive_version: NoValidVersion, expected_active_version: 0.0.1, component: sp, sp_slot: 1, sp_type: Sled, serial_number: serial1, part_number: model1 +INFO reached maximum number of pending SP updates, max: 1 INFO will ensure cockroachdb setting, setting: cluster.preserve_downgrade_option, value: DoNotModify generated blueprint 9034c710-3e57-45f3-99e5-4316145e87ac based on parent blueprint 7f976e0d-d2a5-4eeb-9e82-c82bc2824aba @@ -1123,6 +1121,584 @@ generated blueprint 9034c710-3e57-45f3-99e5-4316145e87ac based on parent bluepri from: blueprint 7f976e0d-d2a5-4eeb-9e82-c82bc2824aba to: blueprint 9034c710-3e57-45f3-99e5-4316145e87ac + COCKROACHDB SETTINGS: + state fingerprint::::::::::::::::: (none) (unchanged) + cluster.preserve_downgrade_option: (do not modify) (unchanged) + + METADATA: + internal DNS version::: 1 (unchanged) + external DNS version::: 1 (unchanged) + target release min gen: 1 (unchanged) + + OXIMETER SETTINGS: + generation: 1 (unchanged) + read from:: SingleNode (unchanged) + + PENDING MGS UPDATES: + + Pending MGS-managed updates (all baseboards): + ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + sp_type slot part_number serial_number artifact_hash artifact_version details + ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +* sled 1 model1 serial1 - 04e4a7fdb84acca92c8fd3235e26d64ea61bef8a5f98202589fd346989c5720a 1.0.0 - Rot { expected_active_slot: ExpectedActiveRotSlot { slot: A, version: ArtifactVersion("0.0.2") }, expected_inactive_version: Version(ArtifactVersion("0.5.0")), expected_persistent_boot_preference: A, expected_pending_persistent_boot_preference: None, expected_transient_boot_preference: None } + └─ + 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670 + Sp { expected_active_version: ArtifactVersion("0.0.1"), expected_inactive_version: NoValidVersion } + + +internal DNS: + DNS zone: "control-plane.oxide.internal" (unchanged) + name: 058fd5f9-60a8-4e11-9302-15172782e17d.host (records: 1) + AAAA fd00:1122:3344:101::27 + name: 0c71b3b2-6ceb-4e8f-b020-b08675e83038.host (records: 1) + AAAA fd00:1122:3344:101::22 + name: 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c.sled (records: 1) + AAAA fd00:1122:3344:102::1 + name: 353b3b65-20f7-48c3-88f7-495bd5d31545.host (records: 1) + AAAA fd00:1122:3344:102::23 + name: 3eeb8d49-eb1a-43f8-bb64-c2338421c2c6.host (records: 1) + AAAA fd00:1122:3344:103::22 + name: 427ec88f-f467-42fa-9bbb-66a91a36103c.host (records: 1) + AAAA fd00:1122:3344:2::1 + name: 466a9f29-62bf-4e63-924a-b9efdb86afec.host (records: 1) + AAAA fd00:1122:3344:102::22 + name: 5199c033-4cf9-4ab6-8ae7-566bd7606363.host (records: 1) + AAAA fd00:1122:3344:101::25 + name: 62620961-fc4a-481e-968b-f5acbac0dc63.host (records: 1) + AAAA fd00:1122:3344:102::21 + name: 6444f8a5-6465-4f0b-a549-1993c113569c.host (records: 1) + AAAA fd00:1122:3344:101::21 + name: 694bd14f-cb24-4be4-bb19-876e79cda2c8.host (records: 1) + AAAA fd00:1122:3344:103::26 + name: 6c3ae381-04f7-41ea-b0ac-74db387dbc3a.host (records: 1) + AAAA fd00:1122:3344:102::24 + name: 75b220ba-a0f4-4872-8202-dc7c87f062d0.host (records: 1) + AAAA fd00:1122:3344:103::24 + name: 7c252b64-c5af-4ec1-989e-9a03f3b0f111.host (records: 1) + AAAA fd00:1122:3344:103::27 + name: 803bfb63-c246-41db-b0da-d3b87ddfc63d.host (records: 1) + AAAA fd00:1122:3344:101::23 + name: 86a22a56-0168-453d-9df1-cb2a7c64b5d3.host (records: 1) + AAAA fd00:1122:3344:102::28 + name: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6.sled (records: 1) + AAAA fd00:1122:3344:101::1 + name: 99e2f30b-3174-40bf-a78a-90da8abba8ca.host (records: 1) + AAAA fd00:1122:3344:1::1 + name: @ (records: 3) + NS ns1.control-plane.oxide.internal + NS ns2.control-plane.oxide.internal + NS ns3.control-plane.oxide.internal + name: _clickhouse-admin-single-server._tcp (records: 1) + SRV port 8888 353b3b65-20f7-48c3-88f7-495bd5d31545.host.control-plane.oxide.internal + name: _clickhouse-native._tcp (records: 1) + SRV port 9000 353b3b65-20f7-48c3-88f7-495bd5d31545.host.control-plane.oxide.internal + name: _clickhouse._tcp (records: 1) + SRV port 8123 353b3b65-20f7-48c3-88f7-495bd5d31545.host.control-plane.oxide.internal + name: _crucible-pantry._tcp (records: 3) + SRV port 17000 75b220ba-a0f4-4872-8202-dc7c87f062d0.host.control-plane.oxide.internal + SRV port 17000 ad6a3a03-8d0f-4504-99a4-cbf73d69b973.host.control-plane.oxide.internal + SRV port 17000 ba4994a8-23f9-4b1a-a84f-a08d74591389.host.control-plane.oxide.internal + name: _crucible._tcp.058fd5f9-60a8-4e11-9302-15172782e17d (records: 1) + SRV port 32345 058fd5f9-60a8-4e11-9302-15172782e17d.host.control-plane.oxide.internal + name: _crucible._tcp.5199c033-4cf9-4ab6-8ae7-566bd7606363 (records: 1) + SRV port 32345 5199c033-4cf9-4ab6-8ae7-566bd7606363.host.control-plane.oxide.internal + name: _crucible._tcp.694bd14f-cb24-4be4-bb19-876e79cda2c8 (records: 1) + SRV port 32345 694bd14f-cb24-4be4-bb19-876e79cda2c8.host.control-plane.oxide.internal + name: _crucible._tcp.7c252b64-c5af-4ec1-989e-9a03f3b0f111 (records: 1) + SRV port 32345 7c252b64-c5af-4ec1-989e-9a03f3b0f111.host.control-plane.oxide.internal + name: _crucible._tcp.86a22a56-0168-453d-9df1-cb2a7c64b5d3 (records: 1) + SRV port 32345 86a22a56-0168-453d-9df1-cb2a7c64b5d3.host.control-plane.oxide.internal + name: _crucible._tcp.bd354eef-d8a6-4165-9124-283fb5e46d77 (records: 1) + SRV port 32345 bd354eef-d8a6-4165-9124-283fb5e46d77.host.control-plane.oxide.internal + name: _crucible._tcp.dfac80b4-a887-430a-ae87-a4e065dba787 (records: 1) + SRV port 32345 dfac80b4-a887-430a-ae87-a4e065dba787.host.control-plane.oxide.internal + name: _crucible._tcp.e2fdefe7-95b2-4fd2-ae37-56929a06d58c (records: 1) + SRV port 32345 e2fdefe7-95b2-4fd2-ae37-56929a06d58c.host.control-plane.oxide.internal + name: _crucible._tcp.f55647d4-5500-4ad3-893a-df45bd50d622 (records: 1) + SRV port 32345 f55647d4-5500-4ad3-893a-df45bd50d622.host.control-plane.oxide.internal + name: _external-dns._tcp (records: 3) + SRV port 5353 6c3ae381-04f7-41ea-b0ac-74db387dbc3a.host.control-plane.oxide.internal + SRV port 5353 803bfb63-c246-41db-b0da-d3b87ddfc63d.host.control-plane.oxide.internal + SRV port 5353 f6ec9c67-946a-4da3-98d5-581f72ce8bf0.host.control-plane.oxide.internal + name: _internal-ntp._tcp (records: 3) + SRV port 123 62620961-fc4a-481e-968b-f5acbac0dc63.host.control-plane.oxide.internal + SRV port 123 6444f8a5-6465-4f0b-a549-1993c113569c.host.control-plane.oxide.internal + SRV port 123 f10a4fb9-759f-4a65-b25e-5794ad2d07d8.host.control-plane.oxide.internal + name: _nameservice._tcp (records: 3) + SRV port 5353 427ec88f-f467-42fa-9bbb-66a91a36103c.host.control-plane.oxide.internal + SRV port 5353 99e2f30b-3174-40bf-a78a-90da8abba8ca.host.control-plane.oxide.internal + SRV port 5353 ea5b4030-b52f-44b2-8d70-45f15f987d01.host.control-plane.oxide.internal + name: _nexus._tcp (records: 3) + SRV port 12221 0c71b3b2-6ceb-4e8f-b020-b08675e83038.host.control-plane.oxide.internal + SRV port 12221 3eeb8d49-eb1a-43f8-bb64-c2338421c2c6.host.control-plane.oxide.internal + SRV port 12221 466a9f29-62bf-4e63-924a-b9efdb86afec.host.control-plane.oxide.internal + name: _oximeter-reader._tcp (records: 1) + SRV port 9000 353b3b65-20f7-48c3-88f7-495bd5d31545.host.control-plane.oxide.internal + name: _repo-depot._tcp (records: 3) + SRV port 12348 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c.sled.control-plane.oxide.internal + SRV port 12348 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6.sled.control-plane.oxide.internal + SRV port 12348 d81c6a84-79b8-4958-ae41-ea46c9b19763.sled.control-plane.oxide.internal + name: ad6a3a03-8d0f-4504-99a4-cbf73d69b973.host (records: 1) + AAAA fd00:1122:3344:102::25 + name: ba4994a8-23f9-4b1a-a84f-a08d74591389.host (records: 1) + AAAA fd00:1122:3344:101::24 + name: bd354eef-d8a6-4165-9124-283fb5e46d77.host (records: 1) + AAAA fd00:1122:3344:102::26 + name: d81c6a84-79b8-4958-ae41-ea46c9b19763.sled (records: 1) + AAAA fd00:1122:3344:103::1 + name: dfac80b4-a887-430a-ae87-a4e065dba787.host (records: 1) + AAAA fd00:1122:3344:101::26 + name: e2fdefe7-95b2-4fd2-ae37-56929a06d58c.host (records: 1) + AAAA fd00:1122:3344:102::27 + name: ea5b4030-b52f-44b2-8d70-45f15f987d01.host (records: 1) + AAAA fd00:1122:3344:3::1 + name: f10a4fb9-759f-4a65-b25e-5794ad2d07d8.host (records: 1) + AAAA fd00:1122:3344:103::21 + name: f55647d4-5500-4ad3-893a-df45bd50d622.host (records: 1) + AAAA fd00:1122:3344:103::25 + name: f6ec9c67-946a-4da3-98d5-581f72ce8bf0.host (records: 1) + AAAA fd00:1122:3344:103::23 + name: ns1 (records: 1) + AAAA fd00:1122:3344:1::1 + name: ns2 (records: 1) + AAAA fd00:1122:3344:2::1 + name: ns3 (records: 1) + AAAA fd00:1122:3344:3::1 + +external DNS: + DNS zone: "oxide.example" (unchanged) + name: @ (records: 3) + NS ns1.oxide.example + NS ns2.oxide.example + NS ns3.oxide.example + name: example-silo.sys (records: 3) + A 192.0.2.2 + A 192.0.2.3 + A 192.0.2.4 + name: ns1 (records: 1) + A 198.51.100.1 + name: ns2 (records: 1) + A 198.51.100.2 + name: ns3 (records: 1) + A 198.51.100.3 + + + + +> # Let's simulate the successful SP update as well. +> # Another couple of planning steps should try to update the last sled. +> sled-update-sp 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c --active 1.0.0 +set sled 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c SP versions: active -> 1.0.0 + +> inventory-generate +generated inventory collection 0b5efbb3-0b1b-4bbf-b7d8-a2d6fca074c6 from configured sleds + +> blueprint-plan 9034c710-3e57-45f3-99e5-4316145e87ac 0b5efbb3-0b1b-4bbf-b7d8-a2d6fca074c6 +INFO sufficient BoundaryNtp zones exist in plan, desired_count: 0, current_count: 0 +INFO sufficient Clickhouse zones exist in plan, desired_count: 1, current_count: 1 +INFO sufficient ClickhouseKeeper zones exist in plan, desired_count: 0, current_count: 0 +INFO sufficient ClickhouseServer zones exist in plan, desired_count: 0, current_count: 0 +INFO sufficient CockroachDb zones exist in plan, desired_count: 0, current_count: 0 +INFO sufficient CruciblePantry zones exist in plan, desired_count: 0, current_count: 3 +INFO sufficient InternalDns zones exist in plan, desired_count: 3, current_count: 3 +INFO sufficient ExternalDns zones exist in plan, desired_count: 3, current_count: 3 +INFO sufficient Nexus zones exist in plan, desired_count: 3, current_count: 3 +INFO sufficient Oximeter zones exist in plan, desired_count: 0, current_count: 0 +INFO SP update completed (will remove it and re-evaluate board), artifact_version: 1.0.0, artifact_hash: 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670, expected_inactive_version: NoValidVersion, expected_active_version: 0.0.1, component: sp, sp_slot: 1, sp_type: Sled, serial_number: serial1, part_number: model1 +INFO skipping board for SP update, serial_number: serial1, part_number: model1 +INFO skipping board for SP update, serial_number: serial0, part_number: model0 +INFO configuring SP update, artifact_version: 1.0.0, artifact_hash: 04e4a7fdb84acca92c8fd3235e26d64ea61bef8a5f98202589fd346989c5720a, expected_transient_boot_preference: None, expected_pending_persistent_boot_preference: None, expected_persistent_boot_preference: A, expected_active_slot: ExpectedActiveRotSlot { slot: A, version: ArtifactVersion("0.0.2") }, expected_inactive_version: NoValidVersion, component: rot, sp_slot: 2, sp_type: Sled, serial_number: serial2, part_number: model2 +INFO ran out of boards for SP update +INFO will ensure cockroachdb setting, setting: cluster.preserve_downgrade_option, value: DoNotModify +generated blueprint d60afc57-f15d-476c-bd0f-b1071e2bb976 based on parent blueprint 9034c710-3e57-45f3-99e5-4316145e87ac + +> blueprint-diff 9034c710-3e57-45f3-99e5-4316145e87ac d60afc57-f15d-476c-bd0f-b1071e2bb976 +from: blueprint 9034c710-3e57-45f3-99e5-4316145e87ac +to: blueprint d60afc57-f15d-476c-bd0f-b1071e2bb976 + + COCKROACHDB SETTINGS: + state fingerprint::::::::::::::::: (none) (unchanged) + cluster.preserve_downgrade_option: (do not modify) (unchanged) + + METADATA: + internal DNS version::: 1 (unchanged) + external DNS version::: 1 (unchanged) + target release min gen: 1 (unchanged) + + OXIMETER SETTINGS: + generation: 1 (unchanged) + read from:: SingleNode (unchanged) + + PENDING MGS UPDATES: + + Pending MGS-managed updates (all baseboards): + ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + sp_type slot part_number serial_number artifact_hash artifact_version details + ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +- sled 1 model1 serial1 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670 1.0.0 Sp { expected_active_version: ArtifactVersion("0.0.1"), expected_inactive_version: NoValidVersion } ++ sled 2 model2 serial2 04e4a7fdb84acca92c8fd3235e26d64ea61bef8a5f98202589fd346989c5720a 1.0.0 Rot { expected_active_slot: ExpectedActiveRotSlot { slot: A, version: ArtifactVersion("0.0.2") }, expected_inactive_version: NoValidVersion, expected_persistent_boot_preference: A, expected_pending_persistent_boot_preference: None, expected_transient_boot_preference: None } + + +internal DNS: + DNS zone: "control-plane.oxide.internal" (unchanged) + name: 058fd5f9-60a8-4e11-9302-15172782e17d.host (records: 1) + AAAA fd00:1122:3344:101::27 + name: 0c71b3b2-6ceb-4e8f-b020-b08675e83038.host (records: 1) + AAAA fd00:1122:3344:101::22 + name: 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c.sled (records: 1) + AAAA fd00:1122:3344:102::1 + name: 353b3b65-20f7-48c3-88f7-495bd5d31545.host (records: 1) + AAAA fd00:1122:3344:102::23 + name: 3eeb8d49-eb1a-43f8-bb64-c2338421c2c6.host (records: 1) + AAAA fd00:1122:3344:103::22 + name: 427ec88f-f467-42fa-9bbb-66a91a36103c.host (records: 1) + AAAA fd00:1122:3344:2::1 + name: 466a9f29-62bf-4e63-924a-b9efdb86afec.host (records: 1) + AAAA fd00:1122:3344:102::22 + name: 5199c033-4cf9-4ab6-8ae7-566bd7606363.host (records: 1) + AAAA fd00:1122:3344:101::25 + name: 62620961-fc4a-481e-968b-f5acbac0dc63.host (records: 1) + AAAA fd00:1122:3344:102::21 + name: 6444f8a5-6465-4f0b-a549-1993c113569c.host (records: 1) + AAAA fd00:1122:3344:101::21 + name: 694bd14f-cb24-4be4-bb19-876e79cda2c8.host (records: 1) + AAAA fd00:1122:3344:103::26 + name: 6c3ae381-04f7-41ea-b0ac-74db387dbc3a.host (records: 1) + AAAA fd00:1122:3344:102::24 + name: 75b220ba-a0f4-4872-8202-dc7c87f062d0.host (records: 1) + AAAA fd00:1122:3344:103::24 + name: 7c252b64-c5af-4ec1-989e-9a03f3b0f111.host (records: 1) + AAAA fd00:1122:3344:103::27 + name: 803bfb63-c246-41db-b0da-d3b87ddfc63d.host (records: 1) + AAAA fd00:1122:3344:101::23 + name: 86a22a56-0168-453d-9df1-cb2a7c64b5d3.host (records: 1) + AAAA fd00:1122:3344:102::28 + name: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6.sled (records: 1) + AAAA fd00:1122:3344:101::1 + name: 99e2f30b-3174-40bf-a78a-90da8abba8ca.host (records: 1) + AAAA fd00:1122:3344:1::1 + name: @ (records: 3) + NS ns1.control-plane.oxide.internal + NS ns2.control-plane.oxide.internal + NS ns3.control-plane.oxide.internal + name: _clickhouse-admin-single-server._tcp (records: 1) + SRV port 8888 353b3b65-20f7-48c3-88f7-495bd5d31545.host.control-plane.oxide.internal + name: _clickhouse-native._tcp (records: 1) + SRV port 9000 353b3b65-20f7-48c3-88f7-495bd5d31545.host.control-plane.oxide.internal + name: _clickhouse._tcp (records: 1) + SRV port 8123 353b3b65-20f7-48c3-88f7-495bd5d31545.host.control-plane.oxide.internal + name: _crucible-pantry._tcp (records: 3) + SRV port 17000 75b220ba-a0f4-4872-8202-dc7c87f062d0.host.control-plane.oxide.internal + SRV port 17000 ad6a3a03-8d0f-4504-99a4-cbf73d69b973.host.control-plane.oxide.internal + SRV port 17000 ba4994a8-23f9-4b1a-a84f-a08d74591389.host.control-plane.oxide.internal + name: _crucible._tcp.058fd5f9-60a8-4e11-9302-15172782e17d (records: 1) + SRV port 32345 058fd5f9-60a8-4e11-9302-15172782e17d.host.control-plane.oxide.internal + name: _crucible._tcp.5199c033-4cf9-4ab6-8ae7-566bd7606363 (records: 1) + SRV port 32345 5199c033-4cf9-4ab6-8ae7-566bd7606363.host.control-plane.oxide.internal + name: _crucible._tcp.694bd14f-cb24-4be4-bb19-876e79cda2c8 (records: 1) + SRV port 32345 694bd14f-cb24-4be4-bb19-876e79cda2c8.host.control-plane.oxide.internal + name: _crucible._tcp.7c252b64-c5af-4ec1-989e-9a03f3b0f111 (records: 1) + SRV port 32345 7c252b64-c5af-4ec1-989e-9a03f3b0f111.host.control-plane.oxide.internal + name: _crucible._tcp.86a22a56-0168-453d-9df1-cb2a7c64b5d3 (records: 1) + SRV port 32345 86a22a56-0168-453d-9df1-cb2a7c64b5d3.host.control-plane.oxide.internal + name: _crucible._tcp.bd354eef-d8a6-4165-9124-283fb5e46d77 (records: 1) + SRV port 32345 bd354eef-d8a6-4165-9124-283fb5e46d77.host.control-plane.oxide.internal + name: _crucible._tcp.dfac80b4-a887-430a-ae87-a4e065dba787 (records: 1) + SRV port 32345 dfac80b4-a887-430a-ae87-a4e065dba787.host.control-plane.oxide.internal + name: _crucible._tcp.e2fdefe7-95b2-4fd2-ae37-56929a06d58c (records: 1) + SRV port 32345 e2fdefe7-95b2-4fd2-ae37-56929a06d58c.host.control-plane.oxide.internal + name: _crucible._tcp.f55647d4-5500-4ad3-893a-df45bd50d622 (records: 1) + SRV port 32345 f55647d4-5500-4ad3-893a-df45bd50d622.host.control-plane.oxide.internal + name: _external-dns._tcp (records: 3) + SRV port 5353 6c3ae381-04f7-41ea-b0ac-74db387dbc3a.host.control-plane.oxide.internal + SRV port 5353 803bfb63-c246-41db-b0da-d3b87ddfc63d.host.control-plane.oxide.internal + SRV port 5353 f6ec9c67-946a-4da3-98d5-581f72ce8bf0.host.control-plane.oxide.internal + name: _internal-ntp._tcp (records: 3) + SRV port 123 62620961-fc4a-481e-968b-f5acbac0dc63.host.control-plane.oxide.internal + SRV port 123 6444f8a5-6465-4f0b-a549-1993c113569c.host.control-plane.oxide.internal + SRV port 123 f10a4fb9-759f-4a65-b25e-5794ad2d07d8.host.control-plane.oxide.internal + name: _nameservice._tcp (records: 3) + SRV port 5353 427ec88f-f467-42fa-9bbb-66a91a36103c.host.control-plane.oxide.internal + SRV port 5353 99e2f30b-3174-40bf-a78a-90da8abba8ca.host.control-plane.oxide.internal + SRV port 5353 ea5b4030-b52f-44b2-8d70-45f15f987d01.host.control-plane.oxide.internal + name: _nexus._tcp (records: 3) + SRV port 12221 0c71b3b2-6ceb-4e8f-b020-b08675e83038.host.control-plane.oxide.internal + SRV port 12221 3eeb8d49-eb1a-43f8-bb64-c2338421c2c6.host.control-plane.oxide.internal + SRV port 12221 466a9f29-62bf-4e63-924a-b9efdb86afec.host.control-plane.oxide.internal + name: _oximeter-reader._tcp (records: 1) + SRV port 9000 353b3b65-20f7-48c3-88f7-495bd5d31545.host.control-plane.oxide.internal + name: _repo-depot._tcp (records: 3) + SRV port 12348 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c.sled.control-plane.oxide.internal + SRV port 12348 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6.sled.control-plane.oxide.internal + SRV port 12348 d81c6a84-79b8-4958-ae41-ea46c9b19763.sled.control-plane.oxide.internal + name: ad6a3a03-8d0f-4504-99a4-cbf73d69b973.host (records: 1) + AAAA fd00:1122:3344:102::25 + name: ba4994a8-23f9-4b1a-a84f-a08d74591389.host (records: 1) + AAAA fd00:1122:3344:101::24 + name: bd354eef-d8a6-4165-9124-283fb5e46d77.host (records: 1) + AAAA fd00:1122:3344:102::26 + name: d81c6a84-79b8-4958-ae41-ea46c9b19763.sled (records: 1) + AAAA fd00:1122:3344:103::1 + name: dfac80b4-a887-430a-ae87-a4e065dba787.host (records: 1) + AAAA fd00:1122:3344:101::26 + name: e2fdefe7-95b2-4fd2-ae37-56929a06d58c.host (records: 1) + AAAA fd00:1122:3344:102::27 + name: ea5b4030-b52f-44b2-8d70-45f15f987d01.host (records: 1) + AAAA fd00:1122:3344:3::1 + name: f10a4fb9-759f-4a65-b25e-5794ad2d07d8.host (records: 1) + AAAA fd00:1122:3344:103::21 + name: f55647d4-5500-4ad3-893a-df45bd50d622.host (records: 1) + AAAA fd00:1122:3344:103::25 + name: f6ec9c67-946a-4da3-98d5-581f72ce8bf0.host (records: 1) + AAAA fd00:1122:3344:103::23 + name: ns1 (records: 1) + AAAA fd00:1122:3344:1::1 + name: ns2 (records: 1) + AAAA fd00:1122:3344:2::1 + name: ns3 (records: 1) + AAAA fd00:1122:3344:3::1 + +external DNS: + DNS zone: "oxide.example" (unchanged) + name: @ (records: 3) + NS ns1.oxide.example + NS ns2.oxide.example + NS ns3.oxide.example + name: example-silo.sys (records: 3) + A 192.0.2.2 + A 192.0.2.3 + A 192.0.2.4 + name: ns1 (records: 1) + A 198.51.100.1 + name: ns2 (records: 1) + A 198.51.100.2 + name: ns3 (records: 1) + A 198.51.100.3 + + + + +> # Update the RoT on the last sled. +> # There should be one last pending SP update. +> sled-update-rot d81c6a84-79b8-4958-ae41-ea46c9b19763 --slot-a 1.0.0 +set sled d81c6a84-79b8-4958-ae41-ea46c9b19763 RoT settings: slot a -> 1.0.0 + +> inventory-generate +generated inventory collection 78f72e8d-46a9-40a9-8618-602f54454d80 from configured sleds + +> blueprint-plan d60afc57-f15d-476c-bd0f-b1071e2bb976 78f72e8d-46a9-40a9-8618-602f54454d80 +INFO sufficient BoundaryNtp zones exist in plan, desired_count: 0, current_count: 0 +INFO sufficient Clickhouse zones exist in plan, desired_count: 1, current_count: 1 +INFO sufficient ClickhouseKeeper zones exist in plan, desired_count: 0, current_count: 0 +INFO sufficient ClickhouseServer zones exist in plan, desired_count: 0, current_count: 0 +INFO sufficient CockroachDb zones exist in plan, desired_count: 0, current_count: 0 +INFO sufficient CruciblePantry zones exist in plan, desired_count: 0, current_count: 3 +INFO sufficient InternalDns zones exist in plan, desired_count: 3, current_count: 3 +INFO sufficient ExternalDns zones exist in plan, desired_count: 3, current_count: 3 +INFO sufficient Nexus zones exist in plan, desired_count: 3, current_count: 3 +INFO sufficient Oximeter zones exist in plan, desired_count: 0, current_count: 0 +INFO SP update completed (will remove it and re-evaluate board), artifact_version: 1.0.0, artifact_hash: 04e4a7fdb84acca92c8fd3235e26d64ea61bef8a5f98202589fd346989c5720a, expected_transient_boot_preference: None, expected_pending_persistent_boot_preference: None, expected_persistent_boot_preference: A, expected_active_slot: ExpectedActiveRotSlot { slot: A, version: ArtifactVersion("0.0.2") }, expected_inactive_version: NoValidVersion, component: rot, sp_slot: 2, sp_type: Sled, serial_number: serial2, part_number: model2 +INFO configuring SP update, artifact_version: 1.0.0, artifact_hash: 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670, expected_inactive_version: NoValidVersion, expected_active_version: 0.0.1, component: sp, sp_slot: 2, sp_type: Sled, serial_number: serial2, part_number: model2 +INFO reached maximum number of pending SP updates, max: 1 +INFO will ensure cockroachdb setting, setting: cluster.preserve_downgrade_option, value: DoNotModify +generated blueprint a5a8f242-ffa5-473c-8efd-2acf2dc0b736 based on parent blueprint d60afc57-f15d-476c-bd0f-b1071e2bb976 + +> blueprint-diff d60afc57-f15d-476c-bd0f-b1071e2bb976 a5a8f242-ffa5-473c-8efd-2acf2dc0b736 +from: blueprint d60afc57-f15d-476c-bd0f-b1071e2bb976 +to: blueprint a5a8f242-ffa5-473c-8efd-2acf2dc0b736 + + COCKROACHDB SETTINGS: + state fingerprint::::::::::::::::: (none) (unchanged) + cluster.preserve_downgrade_option: (do not modify) (unchanged) + + METADATA: + internal DNS version::: 1 (unchanged) + external DNS version::: 1 (unchanged) + target release min gen: 1 (unchanged) + + OXIMETER SETTINGS: + generation: 1 (unchanged) + read from:: SingleNode (unchanged) + + PENDING MGS UPDATES: + + Pending MGS-managed updates (all baseboards): + --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + sp_type slot part_number serial_number artifact_hash artifact_version details + --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +* sled 2 model2 serial2 - 04e4a7fdb84acca92c8fd3235e26d64ea61bef8a5f98202589fd346989c5720a 1.0.0 - Rot { expected_active_slot: ExpectedActiveRotSlot { slot: A, version: ArtifactVersion("0.0.2") }, expected_inactive_version: NoValidVersion, expected_persistent_boot_preference: A, expected_pending_persistent_boot_preference: None, expected_transient_boot_preference: None } + └─ + 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670 + Sp { expected_active_version: ArtifactVersion("0.0.1"), expected_inactive_version: NoValidVersion } + + +internal DNS: + DNS zone: "control-plane.oxide.internal" (unchanged) + name: 058fd5f9-60a8-4e11-9302-15172782e17d.host (records: 1) + AAAA fd00:1122:3344:101::27 + name: 0c71b3b2-6ceb-4e8f-b020-b08675e83038.host (records: 1) + AAAA fd00:1122:3344:101::22 + name: 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c.sled (records: 1) + AAAA fd00:1122:3344:102::1 + name: 353b3b65-20f7-48c3-88f7-495bd5d31545.host (records: 1) + AAAA fd00:1122:3344:102::23 + name: 3eeb8d49-eb1a-43f8-bb64-c2338421c2c6.host (records: 1) + AAAA fd00:1122:3344:103::22 + name: 427ec88f-f467-42fa-9bbb-66a91a36103c.host (records: 1) + AAAA fd00:1122:3344:2::1 + name: 466a9f29-62bf-4e63-924a-b9efdb86afec.host (records: 1) + AAAA fd00:1122:3344:102::22 + name: 5199c033-4cf9-4ab6-8ae7-566bd7606363.host (records: 1) + AAAA fd00:1122:3344:101::25 + name: 62620961-fc4a-481e-968b-f5acbac0dc63.host (records: 1) + AAAA fd00:1122:3344:102::21 + name: 6444f8a5-6465-4f0b-a549-1993c113569c.host (records: 1) + AAAA fd00:1122:3344:101::21 + name: 694bd14f-cb24-4be4-bb19-876e79cda2c8.host (records: 1) + AAAA fd00:1122:3344:103::26 + name: 6c3ae381-04f7-41ea-b0ac-74db387dbc3a.host (records: 1) + AAAA fd00:1122:3344:102::24 + name: 75b220ba-a0f4-4872-8202-dc7c87f062d0.host (records: 1) + AAAA fd00:1122:3344:103::24 + name: 7c252b64-c5af-4ec1-989e-9a03f3b0f111.host (records: 1) + AAAA fd00:1122:3344:103::27 + name: 803bfb63-c246-41db-b0da-d3b87ddfc63d.host (records: 1) + AAAA fd00:1122:3344:101::23 + name: 86a22a56-0168-453d-9df1-cb2a7c64b5d3.host (records: 1) + AAAA fd00:1122:3344:102::28 + name: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6.sled (records: 1) + AAAA fd00:1122:3344:101::1 + name: 99e2f30b-3174-40bf-a78a-90da8abba8ca.host (records: 1) + AAAA fd00:1122:3344:1::1 + name: @ (records: 3) + NS ns1.control-plane.oxide.internal + NS ns2.control-plane.oxide.internal + NS ns3.control-plane.oxide.internal + name: _clickhouse-admin-single-server._tcp (records: 1) + SRV port 8888 353b3b65-20f7-48c3-88f7-495bd5d31545.host.control-plane.oxide.internal + name: _clickhouse-native._tcp (records: 1) + SRV port 9000 353b3b65-20f7-48c3-88f7-495bd5d31545.host.control-plane.oxide.internal + name: _clickhouse._tcp (records: 1) + SRV port 8123 353b3b65-20f7-48c3-88f7-495bd5d31545.host.control-plane.oxide.internal + name: _crucible-pantry._tcp (records: 3) + SRV port 17000 75b220ba-a0f4-4872-8202-dc7c87f062d0.host.control-plane.oxide.internal + SRV port 17000 ad6a3a03-8d0f-4504-99a4-cbf73d69b973.host.control-plane.oxide.internal + SRV port 17000 ba4994a8-23f9-4b1a-a84f-a08d74591389.host.control-plane.oxide.internal + name: _crucible._tcp.058fd5f9-60a8-4e11-9302-15172782e17d (records: 1) + SRV port 32345 058fd5f9-60a8-4e11-9302-15172782e17d.host.control-plane.oxide.internal + name: _crucible._tcp.5199c033-4cf9-4ab6-8ae7-566bd7606363 (records: 1) + SRV port 32345 5199c033-4cf9-4ab6-8ae7-566bd7606363.host.control-plane.oxide.internal + name: _crucible._tcp.694bd14f-cb24-4be4-bb19-876e79cda2c8 (records: 1) + SRV port 32345 694bd14f-cb24-4be4-bb19-876e79cda2c8.host.control-plane.oxide.internal + name: _crucible._tcp.7c252b64-c5af-4ec1-989e-9a03f3b0f111 (records: 1) + SRV port 32345 7c252b64-c5af-4ec1-989e-9a03f3b0f111.host.control-plane.oxide.internal + name: _crucible._tcp.86a22a56-0168-453d-9df1-cb2a7c64b5d3 (records: 1) + SRV port 32345 86a22a56-0168-453d-9df1-cb2a7c64b5d3.host.control-plane.oxide.internal + name: _crucible._tcp.bd354eef-d8a6-4165-9124-283fb5e46d77 (records: 1) + SRV port 32345 bd354eef-d8a6-4165-9124-283fb5e46d77.host.control-plane.oxide.internal + name: _crucible._tcp.dfac80b4-a887-430a-ae87-a4e065dba787 (records: 1) + SRV port 32345 dfac80b4-a887-430a-ae87-a4e065dba787.host.control-plane.oxide.internal + name: _crucible._tcp.e2fdefe7-95b2-4fd2-ae37-56929a06d58c (records: 1) + SRV port 32345 e2fdefe7-95b2-4fd2-ae37-56929a06d58c.host.control-plane.oxide.internal + name: _crucible._tcp.f55647d4-5500-4ad3-893a-df45bd50d622 (records: 1) + SRV port 32345 f55647d4-5500-4ad3-893a-df45bd50d622.host.control-plane.oxide.internal + name: _external-dns._tcp (records: 3) + SRV port 5353 6c3ae381-04f7-41ea-b0ac-74db387dbc3a.host.control-plane.oxide.internal + SRV port 5353 803bfb63-c246-41db-b0da-d3b87ddfc63d.host.control-plane.oxide.internal + SRV port 5353 f6ec9c67-946a-4da3-98d5-581f72ce8bf0.host.control-plane.oxide.internal + name: _internal-ntp._tcp (records: 3) + SRV port 123 62620961-fc4a-481e-968b-f5acbac0dc63.host.control-plane.oxide.internal + SRV port 123 6444f8a5-6465-4f0b-a549-1993c113569c.host.control-plane.oxide.internal + SRV port 123 f10a4fb9-759f-4a65-b25e-5794ad2d07d8.host.control-plane.oxide.internal + name: _nameservice._tcp (records: 3) + SRV port 5353 427ec88f-f467-42fa-9bbb-66a91a36103c.host.control-plane.oxide.internal + SRV port 5353 99e2f30b-3174-40bf-a78a-90da8abba8ca.host.control-plane.oxide.internal + SRV port 5353 ea5b4030-b52f-44b2-8d70-45f15f987d01.host.control-plane.oxide.internal + name: _nexus._tcp (records: 3) + SRV port 12221 0c71b3b2-6ceb-4e8f-b020-b08675e83038.host.control-plane.oxide.internal + SRV port 12221 3eeb8d49-eb1a-43f8-bb64-c2338421c2c6.host.control-plane.oxide.internal + SRV port 12221 466a9f29-62bf-4e63-924a-b9efdb86afec.host.control-plane.oxide.internal + name: _oximeter-reader._tcp (records: 1) + SRV port 9000 353b3b65-20f7-48c3-88f7-495bd5d31545.host.control-plane.oxide.internal + name: _repo-depot._tcp (records: 3) + SRV port 12348 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c.sled.control-plane.oxide.internal + SRV port 12348 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6.sled.control-plane.oxide.internal + SRV port 12348 d81c6a84-79b8-4958-ae41-ea46c9b19763.sled.control-plane.oxide.internal + name: ad6a3a03-8d0f-4504-99a4-cbf73d69b973.host (records: 1) + AAAA fd00:1122:3344:102::25 + name: ba4994a8-23f9-4b1a-a84f-a08d74591389.host (records: 1) + AAAA fd00:1122:3344:101::24 + name: bd354eef-d8a6-4165-9124-283fb5e46d77.host (records: 1) + AAAA fd00:1122:3344:102::26 + name: d81c6a84-79b8-4958-ae41-ea46c9b19763.sled (records: 1) + AAAA fd00:1122:3344:103::1 + name: dfac80b4-a887-430a-ae87-a4e065dba787.host (records: 1) + AAAA fd00:1122:3344:101::26 + name: e2fdefe7-95b2-4fd2-ae37-56929a06d58c.host (records: 1) + AAAA fd00:1122:3344:102::27 + name: ea5b4030-b52f-44b2-8d70-45f15f987d01.host (records: 1) + AAAA fd00:1122:3344:3::1 + name: f10a4fb9-759f-4a65-b25e-5794ad2d07d8.host (records: 1) + AAAA fd00:1122:3344:103::21 + name: f55647d4-5500-4ad3-893a-df45bd50d622.host (records: 1) + AAAA fd00:1122:3344:103::25 + name: f6ec9c67-946a-4da3-98d5-581f72ce8bf0.host (records: 1) + AAAA fd00:1122:3344:103::23 + name: ns1 (records: 1) + AAAA fd00:1122:3344:1::1 + name: ns2 (records: 1) + AAAA fd00:1122:3344:2::1 + name: ns3 (records: 1) + AAAA fd00:1122:3344:3::1 + +external DNS: + DNS zone: "oxide.example" (unchanged) + name: @ (records: 3) + NS ns1.oxide.example + NS ns2.oxide.example + NS ns3.oxide.example + name: example-silo.sys (records: 3) + A 192.0.2.2 + A 192.0.2.3 + A 192.0.2.4 + name: ns1 (records: 1) + A 198.51.100.1 + name: ns2 (records: 1) + A 198.51.100.2 + name: ns3 (records: 1) + A 198.51.100.3 + + + + +> # Finish updating the last sled and do one more planning run. +> # Now we should see there's nothing left to do! +> sled-update-sp d81c6a84-79b8-4958-ae41-ea46c9b19763 --active 1.0.0 +set sled d81c6a84-79b8-4958-ae41-ea46c9b19763 SP versions: active -> 1.0.0 + +> inventory-generate +generated inventory collection 39363465-89ae-4ac2-9be1-099068da9d45 from configured sleds + +> blueprint-plan a5a8f242-ffa5-473c-8efd-2acf2dc0b736 39363465-89ae-4ac2-9be1-099068da9d45 +INFO sufficient BoundaryNtp zones exist in plan, desired_count: 0, current_count: 0 +INFO sufficient Clickhouse zones exist in plan, desired_count: 1, current_count: 1 +INFO sufficient ClickhouseKeeper zones exist in plan, desired_count: 0, current_count: 0 +INFO sufficient ClickhouseServer zones exist in plan, desired_count: 0, current_count: 0 +INFO sufficient CockroachDb zones exist in plan, desired_count: 0, current_count: 0 +INFO sufficient CruciblePantry zones exist in plan, desired_count: 0, current_count: 3 +INFO sufficient InternalDns zones exist in plan, desired_count: 3, current_count: 3 +INFO sufficient ExternalDns zones exist in plan, desired_count: 3, current_count: 3 +INFO sufficient Nexus zones exist in plan, desired_count: 3, current_count: 3 +INFO sufficient Oximeter zones exist in plan, desired_count: 0, current_count: 0 +INFO SP update completed (will remove it and re-evaluate board), artifact_version: 1.0.0, artifact_hash: 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670, expected_inactive_version: NoValidVersion, expected_active_version: 0.0.1, component: sp, sp_slot: 2, sp_type: Sled, serial_number: serial2, part_number: model2 +INFO skipping board for SP update, serial_number: serial2, part_number: model2 +INFO skipping board for SP update, serial_number: serial0, part_number: model0 +INFO skipping board for SP update, serial_number: serial1, part_number: model1 +INFO ran out of boards for SP update +INFO all zones up-to-date +INFO will ensure cockroachdb setting, setting: cluster.preserve_downgrade_option, value: DoNotModify +generated blueprint 626487fa-7139-45ec-8416-902271fc730b based on parent blueprint a5a8f242-ffa5-473c-8efd-2acf2dc0b736 + +> blueprint-diff a5a8f242-ffa5-473c-8efd-2acf2dc0b736 626487fa-7139-45ec-8416-902271fc730b +from: blueprint a5a8f242-ffa5-473c-8efd-2acf2dc0b736 +to: blueprint 626487fa-7139-45ec-8416-902271fc730b + COCKROACHDB SETTINGS: state fingerprint::::::::::::::::: (none) (unchanged) cluster.preserve_downgrade_option: (do not modify) (unchanged) diff --git a/gateway-types/src/rot.rs b/gateway-types/src/rot.rs index 44dd8b8d8d6..77b200b29ca 100644 --- a/gateway-types/src/rot.rs +++ b/gateway-types/src/rot.rs @@ -5,6 +5,7 @@ use daft::Diffable; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use std::fmt::Display; use std::str::FromStr; #[derive( @@ -200,6 +201,16 @@ impl RotSlot { } } +impl Display for RotSlot { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let s = match self { + RotSlot::A => "A", + RotSlot::B => "B", + }; + write!(f, "{s}") + } +} + impl FromStr for RotSlot { type Err = String; diff --git a/nexus/reconfigurator/planning/Cargo.toml b/nexus/reconfigurator/planning/Cargo.toml index 756d7dd604c..9852af62fa5 100644 --- a/nexus/reconfigurator/planning/Cargo.toml +++ b/nexus/reconfigurator/planning/Cargo.toml @@ -13,6 +13,7 @@ chrono.workspace = true debug-ignore.workspace = true daft.workspace = true gateway-client.workspace = true +gateway-types.workspace = true id-map.workspace = true illumos-utils.workspace = true indexmap.workspace = true diff --git a/nexus/reconfigurator/planning/src/mgs_updates/mod.rs b/nexus/reconfigurator/planning/src/mgs_updates/mod.rs index ee3dc21f24f..5da7b302fe5 100644 --- a/nexus/reconfigurator/planning/src/mgs_updates/mod.rs +++ b/nexus/reconfigurator/planning/src/mgs_updates/mod.rs @@ -4,6 +4,8 @@ //! Facilities for making choices about MGS-managed updates +use gateway_types::rot::RotSlot; +use nexus_types::deployment::ExpectedActiveRotSlot; use nexus_types::deployment::ExpectedVersion; use nexus_types::deployment::PendingMgsUpdate; use nexus_types::deployment::PendingMgsUpdateDetails; @@ -17,7 +19,9 @@ use slog_error_chain::InlineErrorChain; use std::collections::BTreeSet; use std::sync::Arc; use thiserror::Error; +use tufaceous_artifact::ArtifactKind; use tufaceous_artifact::ArtifactVersion; +use tufaceous_artifact::ArtifactVersionError; use tufaceous_artifact::KnownArtifactKind; /// Generates a new set of `PendingMgsUpdates` based on: @@ -166,8 +170,12 @@ enum MgsUpdateStatusError { MissingSpInfo, #[error("no caboose found for active slot in inventory")] MissingActiveCaboose, + #[error("no RoT state found in inventory")] + MissingRotState, #[error("not yet implemented")] NotYetImplemented, + #[error("unable to parse input into ArtifactVersion: {0:?}")] + FailedArtifactVersionParse(ArtifactVersionError), } /// Determine the status of a single MGS update based on what's in inventory for @@ -208,8 +216,59 @@ fn mgs_update_status( found_inactive_version, )) } - PendingMgsUpdateDetails::Rot { .. } - | PendingMgsUpdateDetails::RotBootloader { .. } => { + PendingMgsUpdateDetails::Rot { + expected_active_slot, + expected_inactive_version, + expected_persistent_boot_preference, + expected_pending_persistent_boot_preference, + expected_transient_boot_preference, + } => { + let active_caboose_which = match &expected_active_slot.slot { + RotSlot::A => CabooseWhich::RotSlotA, + RotSlot::B => CabooseWhich::RotSlotB, + }; + + let Some(active_caboose) = + inventory.caboose_for(active_caboose_which, baseboard_id) + else { + return Err(MgsUpdateStatusError::MissingActiveCaboose); + }; + + let found_inactive_version = inventory + .caboose_for(active_caboose_which.toggled_slot(), baseboard_id) + .map(|c| c.caboose.version.as_ref()); + + let rot_state = inventory + .rots + .get(baseboard_id) + .ok_or(MgsUpdateStatusError::MissingRotState)?; + + let found_active_version = + ArtifactVersion::new(active_caboose.caboose.version.clone()) + .map_err(|e| { + MgsUpdateStatusError::FailedArtifactVersionParse(e) + })?; + + let found_active_slot = ExpectedActiveRotSlot { + slot: rot_state.active_slot, + version: found_active_version, + }; + + Ok(mgs_update_status_rot( + desired_version, + &expected_active_slot, + expected_inactive_version, + expected_persistent_boot_preference, + expected_pending_persistent_boot_preference, + expected_transient_boot_preference, + &found_active_slot, + found_inactive_version, + &rot_state.persistent_boot_preference, + &rot_state.pending_persistent_boot_preference, + &rot_state.transient_boot_preference, + )) + } + PendingMgsUpdateDetails::RotBootloader { .. } => { return Err(MgsUpdateStatusError::NotYetImplemented); } }; @@ -327,6 +386,120 @@ fn mgs_update_status_sp( } } +#[allow(clippy::too_many_arguments)] +fn mgs_update_status_rot( + desired_version: &ArtifactVersion, + expected_active_slot: &ExpectedActiveRotSlot, + expected_inactive_version: &ExpectedVersion, + expected_persistent_boot_preference: &RotSlot, + expected_pending_persistent_boot_preference: &Option, + expected_transient_boot_preference: &Option, + found_active_slot: &ExpectedActiveRotSlot, + found_inactive_version: Option<&str>, + found_persistent_boot_preference: &RotSlot, + found_pending_persistent_boot_preference: &Option, + found_transient_boot_preference: &Option, +) -> MgsUpdateStatus { + if &found_active_slot.version() == desired_version { + // If we find the desired version in the active slot, we're done. + return MgsUpdateStatus::Done; + } + + // The update hasn't completed. + // + // Check to make sure the contents of the active slot, persistent boot + // preference, pending persistent boot preference, and transient boot + // preference are still what they were when we configured this update. + // If not, then this update cannot proceed as currently configured. + // It will fail its precondition check. + if found_active_slot.version() != expected_active_slot.version() { + return MgsUpdateStatus::Impossible; + } + + if found_persistent_boot_preference != expected_persistent_boot_preference { + return MgsUpdateStatus::Impossible; + } + + if found_pending_persistent_boot_preference + != expected_pending_persistent_boot_preference + { + return MgsUpdateStatus::Impossible; + } + + if found_transient_boot_preference != expected_transient_boot_preference { + return MgsUpdateStatus::Impossible; + } + + // If either found pending persistent boot preference or found transient + // boot preference are not empty, then an update is not done + if found_pending_persistent_boot_preference.is_some() + || found_transient_boot_preference.is_some() + { + return MgsUpdateStatus::NotDone; + } + + // If there is a mismatch between the found persistent boot preference + // and the found active slot then the update is not done. + // + // TODO: Alternatively, this could also mean a failed update. See + // https://github.com/oxidecomputer/omicron/issues/8414 for context + // about when we'll be able to know whether an it's an ongoing update + // or an RoT in a failed state. + if found_persistent_boot_preference != &found_active_slot.slot { + // TODO-K: I am not 100% sure on this one. It may be impossible? + return MgsUpdateStatus::NotDone; + } + + // Similarly, check the contents of the inactive slot to determine if it + // still matches what we saw when we configured this update. If not, then + // this update cannot proceed as currently configured. It will fail its + // precondition check. + // + // This logic is more complex than for the active slot because unlike the + // active slot, it's possible for both the found contents and the expected + // contents to be missing and that's not necessarily an error. + match (found_inactive_version, expected_inactive_version) { + (Some(_), ExpectedVersion::NoValidVersion) => { + // We expected nothing in the inactive slot, but found something. + MgsUpdateStatus::Impossible + } + (Some(found), ExpectedVersion::Version(expected)) => { + if found == expected.as_str() { + // We found something in the inactive slot that matches what we + // expected. + MgsUpdateStatus::NotDone + } else { + // We found something in the inactive slot that differs from + // what we expected. + MgsUpdateStatus::Impossible + } + } + (None, ExpectedVersion::Version(_)) => { + // We expected something in the inactive slot, but found nothing. + // This case is tricky because we can't tell from the inventory + // whether we transiently failed to fetch the caboose for some + // reason or whether the caboose is actually garbage. We choose to + // assume that it's actually garbage, which would mean that this + // update as-configured is impossible. This will cause us to + // generate a new update that expects garbage in the inactive slot. + // If we're right, great. If we're wrong, then *that* update will + // be impossible to complete, but we should fix this again if the + // transient error goes away. + // + // If we instead assumed that this was a transient error, we'd do + // nothing here instead. But if the caboose was really missing, + // then we'd get stuck forever waiting for something that would + // never happen. + MgsUpdateStatus::Impossible + } + (None, ExpectedVersion::NoValidVersion) => { + // We expected nothing in the inactive slot and found nothing there. + // No problem! + MgsUpdateStatus::NotDone + } + } +} + /// Determine if the given baseboard needs any MGS-driven update (e.g., update /// to its SP, RoT, etc.). If so, returns the update. If not, returns `None`. fn try_make_update( @@ -335,11 +508,14 @@ fn try_make_update( inventory: &Collection, current_artifacts: &TufRepoDescription, ) -> Option { - // TODO When we add support for planning RoT, RoT bootloader, and host OS + // TODO When we add support for planning RoT bootloader, and host OS // updates, we'll try these in a hardcoded priority order until any of them // returns `Some`. The order is described in RFD 565 section "Update - // Sequence". For now, we only plan SP updates. - try_make_update_sp(log, baseboard_id, inventory, current_artifacts) + // Sequence". For now, we only plan SP and RoT updates. + try_make_update_rot(log, baseboard_id, inventory, current_artifacts) + .or_else(|| { + try_make_update_sp(log, baseboard_id, inventory, current_artifacts) + }) } /// Determine if the given baseboard needs an SP update and, if so, returns it. @@ -476,6 +652,179 @@ fn try_make_update_sp( }) } +/// Determine if the given baseboard needs an SP update and, if so, returns it. +fn try_make_update_rot( + log: &slog::Logger, + baseboard_id: &Arc, + inventory: &Collection, + current_artifacts: &TufRepoDescription, +) -> Option { + let Some(sp_info) = inventory.sps.get(baseboard_id) else { + warn!( + log, + "cannot configure RoT update for board \ + (missing SP info from inventory)"; + baseboard_id + ); + return None; + }; + + let Some(rot_state) = inventory.rots.get(baseboard_id) else { + warn!( + log, + "cannot configure RoT update for board \ + (missing RoT state from inventory)"; + baseboard_id + ); + return None; + }; + + let active_slot = rot_state.active_slot; + + let active_caboose = match active_slot { + RotSlot::A => CabooseWhich::RotSlotA, + RotSlot::B => CabooseWhich::RotSlotB, + }; + + let Some(active_caboose) = + inventory.caboose_for(active_caboose, baseboard_id) + else { + warn!( + log, + "cannot configure RoT update for board \ + (missing active slot {active_slot} caboose from inventory)"; + baseboard_id, + ); + return None; + }; + + let Ok(expected_active_version) = active_caboose.caboose.version.parse() + else { + warn!( + log, + "cannot configure RoT update for board \ + (cannot parse current active version as an ArtifactVersion)"; + baseboard_id, + "found_version" => &active_caboose.caboose.version, + ); + return None; + }; + + let board = &active_caboose.caboose.board; + let matching_artifacts: Vec<_> = current_artifacts + .artifacts + .iter() + .filter(|a| { + // A matching RoT artifact will have: + // + // - "name" matching the board name (found above from caboose) + // - "kind" matching one of the known RoT kinds + + if a.id.name != *board { + return false; + } + + match active_slot { + RotSlot::A => { + let slot_a_artifacts = [ + ArtifactKind::GIMLET_ROT_IMAGE_A, + ArtifactKind::PSC_ROT_IMAGE_A, + ArtifactKind::SWITCH_ROT_IMAGE_A, + ]; + + if slot_a_artifacts.contains(&a.id.kind) { + return true; + } + } + RotSlot::B => { + let slot_b_artifacts = [ + ArtifactKind::GIMLET_ROT_IMAGE_B, + ArtifactKind::PSC_ROT_IMAGE_B, + ArtifactKind::SWITCH_ROT_IMAGE_B, + ]; + + if slot_b_artifacts.contains(&a.id.kind) { + return true; + } + } + } + + false + }) + .collect(); + if matching_artifacts.is_empty() { + warn!( + log, + "cannot configure RoT update for board (no matching artifact)"; + baseboard_id, + ); + return None; + } + + if matching_artifacts.len() > 1 { + // This should be impossible unless we shipped a TUF repo with more + // than 1 artifact for the same board and slot. But it doesn't prevent + // us from picking one and proceeding. Make a note and proceed. + warn!(log, "found more than one matching artifact for RoT update"); + } + + let artifact = matching_artifacts[0]; + + // If the artifact's version matches what's deployed, then no update is + // needed. + if artifact.id.version == expected_active_version { + debug!(log, "no RoT update needed for board"; baseboard_id); + return None; + } + + let expected_active_slot = ExpectedActiveRotSlot { + slot: active_slot, + version: expected_active_version, + }; + + // Begin configuring an update. + let inactive_caboose = match active_slot.toggled() { + RotSlot::A => CabooseWhich::RotSlotA, + RotSlot::B => CabooseWhich::RotSlotB, + }; + + let expected_inactive_version = match inventory + .caboose_for(inactive_caboose, baseboard_id) + .map(|c| c.caboose.version.parse::()) + .transpose() + { + Ok(None) => ExpectedVersion::NoValidVersion, + Ok(Some(v)) => ExpectedVersion::Version(v), + Err(_) => { + warn!( + log, + "cannot configure RoT update for board \ + (found inactive slot contents but version was not valid)"; + baseboard_id + ); + return None; + } + }; + + Some(PendingMgsUpdate { + baseboard_id: baseboard_id.clone(), + sp_type: sp_info.sp_type, + slot_id: u32::from(sp_info.sp_slot), + details: PendingMgsUpdateDetails::Rot { + expected_active_slot, + expected_inactive_version, + expected_persistent_boot_preference: rot_state + .persistent_boot_preference, + expected_pending_persistent_boot_preference: rot_state + .pending_persistent_boot_preference, + expected_transient_boot_preference: rot_state + .transient_boot_preference, + }, + artifact_hash: artifact.hash, + artifact_version: artifact.id.version.clone(), + }) +} + #[cfg(test)] mod test { use crate::mgs_updates::plan_mgs_updates; diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index bf065e95cdf..0a1dd5c8f8e 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -33,6 +33,7 @@ use nexus_types::deployment::ZpoolFilter; use nexus_types::external_api::views::PhysicalDiskPolicy; use nexus_types::external_api::views::SledPolicy; use nexus_types::external_api::views::SledState; +use nexus_types::inventory::BaseboardId; use nexus_types::inventory::Collection; use omicron_common::policy::INTERNAL_DNS_REDUNDANCY; use omicron_uuid_kinds::PhysicalDiskUuid; @@ -956,7 +957,8 @@ impl<'a> Planner<'a> { .all_sleds(SledFilter::SpsUpdatedByReconfigurator) .map(|(_sled_id, details)| &details.baseboard_id) .collect(); - let included_baseboards = + + let included_baseboards: BTreeSet> = self.inventory .sps .iter() diff --git a/nexus/reconfigurator/planning/src/system.rs b/nexus/reconfigurator/planning/src/system.rs index 3ccce7e0588..e04fc2eebe6 100644 --- a/nexus/reconfigurator/planning/src/system.rs +++ b/nexus/reconfigurator/planning/src/system.rs @@ -469,6 +469,136 @@ impl SystemDescription { Ok(sled.sp_inactive_caboose().map(|c| c.version.as_ref())) } + pub fn sled_sp_state( + &self, + sled_id: SledUuid, + ) -> anyhow::Result> { + let sled = self.sleds.get(&sled_id).with_context(|| { + format!("attempted to access sled {} not found in system", sled_id) + })?; + Ok(sled.sp_state()) + } + + /// Update the RoT versions reported for a sled. + /// + /// Where `None` is provided, no changes are made. + pub fn sled_update_rot_versions( + &mut self, + sled_id: SledUuid, + slot_a_version: Option, + slot_b_version: Option, + ) -> anyhow::Result<&mut Self> { + let sled = self.sleds.get_mut(&sled_id).with_context(|| { + format!("attempted to access sled {} not found in system", sled_id) + })?; + let sled = Arc::make_mut(sled); + sled.set_rot_versions(slot_a_version, slot_b_version); + Ok(self) + } + + pub fn sled_rot_active_slot( + &self, + sled_id: SledUuid, + ) -> anyhow::Result<&RotSlot> { + let sp_state = self.sled_sp_state(sled_id)?; + sp_state + .ok_or_else(|| { + anyhow!("failed to retrieve SP state from sled id: {sled_id}") + }) + .and_then(|(_hw_slot, sp_state)| match &sp_state.rot { + RotState::V2 { active, .. } | RotState::V3 { active, .. } => { + Ok(active) + } + RotState::CommunicationFailed { message } => Err(anyhow!( + "failed to retrieve active RoT slot due to \ + communication failure: {message}" + )), + }) + } + + pub fn sled_rot_persistent_boot_preference( + &self, + sled_id: SledUuid, + ) -> anyhow::Result<&RotSlot> { + let sp_state = self.sled_sp_state(sled_id)?; + sp_state + .ok_or_else(|| { + anyhow!("failed to retrieve SP state from sled id: {sled_id}") + }) + .and_then(|(_hw_slot, sp_state)| match &sp_state.rot { + RotState::V2 { persistent_boot_preference, .. } + | RotState::V3 { persistent_boot_preference, .. } => { + Ok(persistent_boot_preference) + } + RotState::CommunicationFailed { message } => Err(anyhow!( + "failed to retrieve persistent boot preference slot \ + due to communication failure: {message}" + )), + }) + } + + pub fn sled_rot_pending_persistent_boot_preference( + &self, + sled_id: SledUuid, + ) -> anyhow::Result<&Option> { + let sp_state = self.sled_sp_state(sled_id)?; + sp_state + .ok_or_else(|| { + anyhow!("failed to retrieve SP state from sled id: {sled_id}") + }) + .and_then(|(_hw_slot, sp_state)| match &sp_state.rot { + RotState::V2 { pending_persistent_boot_preference, .. } + | RotState::V3 { pending_persistent_boot_preference, .. } => { + Ok(pending_persistent_boot_preference) + } + RotState::CommunicationFailed { message } => Err(anyhow!( + "failed to retrieve pending persistent boot \ + preference slot due to communication failure: {message}" + )), + }) + } + + pub fn sled_rot_transient_boot_preference( + &self, + sled_id: SledUuid, + ) -> anyhow::Result<&Option> { + let sp_state = self.sled_sp_state(sled_id)?; + sp_state + .ok_or_else(|| { + anyhow!("failed to retrieve SP state from sled id: {sled_id}") + }) + .and_then(|(_hw_slot, sp_state)| match &sp_state.rot { + RotState::V2 { transient_boot_preference, .. } + | RotState::V3 { transient_boot_preference, .. } => { + Ok(transient_boot_preference) + } + RotState::CommunicationFailed { message } => Err(anyhow!( + "failed to retrieve transient boot preference slot \ + due to communication failure: {message}" + )), + }) + } + + pub fn sled_rot_slot_a_version( + &self, + sled_id: SledUuid, + ) -> anyhow::Result> { + let sled = self.sleds.get(&sled_id).with_context(|| { + format!("attempted to access sled {} not found in system", sled_id) + })?; + Ok(sled.rot_slot_a_caboose().map(|c| c.version.as_ref())) + } + + pub fn sled_rot_slot_b_version( + &self, + sled_id: SledUuid, + ) -> anyhow::Result> { + let sled = self.sleds.get(&sled_id).with_context(|| { + format!("attempted to access sled {} not found in system", sled_id) + })?; + Ok(sled.rot_slot_b_caboose().map(|c| c.version.as_ref())) + } + pub fn set_tuf_repo(&mut self, tuf_repo: TufRepoPolicy) { self.tuf_repo = tuf_repo; } @@ -561,6 +691,42 @@ impl SystemDescription { ) .context("recording SP inactive caboose")?; } + + if let Some(slot_a) = &s.rot_slot_a_caboose() { + builder + .found_caboose( + &baseboard_id, + CabooseWhich::RotSlotA, + "fake MGS 1", + SpComponentCaboose { + board: slot_a.board.clone(), + epoch: None, + git_commit: slot_a.git_commit.clone(), + name: slot_a.name.clone(), + sign: slot_a.sign.clone(), + version: slot_a.version.clone(), + }, + ) + .context("recording RoT slot a caboose")?; + } + + if let Some(slot_b) = &s.rot_slot_b_caboose() { + builder + .found_caboose( + &baseboard_id, + CabooseWhich::RotSlotB, + "fake MGS 1", + SpComponentCaboose { + board: slot_b.board.clone(), + epoch: None, + git_commit: slot_b.git_commit.clone(), + name: slot_b.name.clone(), + sign: slot_b.sign.clone(), + version: slot_b.version.clone(), + }, + ) + .context("recording RoT slot b caboose")?; + } } builder @@ -727,6 +893,8 @@ pub struct SledHwInventory<'a> { pub rot: &'a nexus_types::inventory::RotState, pub sp_active: Option>, pub sp_inactive: Option>, + pub rot_slot_a: Option>, + pub rot_slot_b: Option>, } /// Our abstract description of a `Sled` @@ -743,6 +911,8 @@ pub struct Sled { resources: SledResources, sp_active_caboose: Option>, sp_inactive_caboose: Option>, + rot_slot_a_caboose: Option>, + rot_slot_b_caboose: Option>, } impl Sled { @@ -891,10 +1061,14 @@ impl Sled { }, state: SledState::Active, resources: SledResources { subnet: sled_subnet, zpools }, - sp_active_caboose: Some(Arc::new(Self::default_sp_caboose( - String::from("0.0.1"), - ))), + sp_active_caboose: Some(Arc::new( + Self::default_sp_component_caboose(String::from("0.0.1")), + )), sp_inactive_caboose: None, + rot_slot_a_caboose: Some(Arc::new( + Self::default_rot_component_caboose(String::from("0.0.2")), + )), + rot_slot_b_caboose: None, } } @@ -929,6 +1103,10 @@ impl Sled { inventory_sp.as_ref().and_then(|hw| hw.sp_active.clone()); let sp_inactive_caboose = inventory_sp.as_ref().and_then(|hw| hw.sp_inactive.clone()); + let rot_slot_a_caboose = + inventory_sp.as_ref().and_then(|hw| hw.rot_slot_a.clone()); + let rot_slot_b_caboose = + inventory_sp.as_ref().and_then(|hw| hw.rot_slot_b.clone()); let inventory_sp = inventory_sp.map(|sledhw| { // RotStateV3 unconditionally sets all of these let sp_state = if sledhw.rot.slot_a_sha3_256_digest.is_some() @@ -1039,6 +1217,8 @@ impl Sled { resources: sled_resources, sp_active_caboose, sp_inactive_caboose, + rot_slot_a_caboose, + rot_slot_b_caboose, } } @@ -1069,6 +1249,14 @@ impl Sled { &self.inventory_sled_agent } + fn rot_slot_a_caboose(&self) -> Option<&Caboose> { + self.rot_slot_a_caboose.as_deref() + } + + fn rot_slot_b_caboose(&self) -> Option<&Caboose> { + self.rot_slot_b_caboose.as_deref() + } + fn sp_active_caboose(&self) -> Option<&Caboose> { self.sp_active_caboose.as_deref() } @@ -1092,7 +1280,7 @@ impl Sled { Arc::make_mut(caboose).version = active_version.to_string() } new @ None => { - *new = Some(Arc::new(Self::default_sp_caboose( + *new = Some(Arc::new(Self::default_sp_component_caboose( active_version.to_string(), ))); } @@ -1110,9 +1298,11 @@ impl Sled { Arc::make_mut(caboose).version = v.to_string() } new @ None => { - *new = Some(Arc::new(Self::default_sp_caboose( - v.to_string(), - ))); + *new = Some(Arc::new( + Self::default_sp_component_caboose( + v.to_string(), + ), + )); } } } @@ -1120,7 +1310,61 @@ impl Sled { } } - fn default_sp_caboose(version: String) -> Caboose { + /// Update the reported RoT versions + /// + /// If either field is `None`, that field is _unchanged_. + // Note that this means there's no way to _unset_ the version. + fn set_rot_versions( + &mut self, + slot_a_version: Option, + slot_b_version: Option, + ) { + if let Some(slot_a_version) = slot_a_version { + match slot_a_version { + ExpectedVersion::NoValidVersion => { + self.rot_slot_a_caboose = None; + } + ExpectedVersion::Version(v) => { + match &mut self.rot_slot_a_caboose { + Some(caboose) => { + Arc::make_mut(caboose).version = v.to_string() + } + new @ None => { + *new = Some(Arc::new( + Self::default_rot_component_caboose( + v.to_string(), + ), + )); + } + } + } + } + } + + if let Some(slot_b_version) = slot_b_version { + match slot_b_version { + ExpectedVersion::NoValidVersion => { + self.rot_slot_b_caboose = None; + } + ExpectedVersion::Version(v) => { + match &mut self.rot_slot_b_caboose { + Some(caboose) => { + Arc::make_mut(caboose).version = v.to_string() + } + new @ None => { + *new = Some(Arc::new( + Self::default_rot_component_caboose( + v.to_string(), + ), + )); + } + } + } + } + } + } + + fn default_sp_component_caboose(version: String) -> Caboose { let board = sp_sim::SIM_GIMLET_BOARD.to_string(); Caboose { board: board.clone(), @@ -1130,6 +1374,17 @@ impl Sled { sign: None, } } + + fn default_rot_component_caboose(version: String) -> Caboose { + let board = sp_sim::SIM_ROT_BOARD.to_string(); + Caboose { + board: board.clone(), + git_commit: String::from("unknown"), + name: board, + version: version.to_string(), + sign: None, + } + } } #[derive(Clone, Copy, Debug)] diff --git a/nexus/reconfigurator/simulation/src/system.rs b/nexus/reconfigurator/simulation/src/system.rs index 91af1c5d7b8..9f998d551a9 100644 --- a/nexus/reconfigurator/simulation/src/system.rs +++ b/nexus/reconfigurator/simulation/src/system.rs @@ -705,6 +705,12 @@ impl SimSystemBuilderInner { let sp_inactive = primary_collection .caboose_for(CabooseWhich::SpSlot1, baseboard_id) .map(|c| c.caboose.clone()); + let rot_slot_a = primary_collection + .caboose_for(CabooseWhich::RotSlotA, baseboard_id) + .map(|c| c.caboose.clone()); + let rot_slot_b = primary_collection + .caboose_for(CabooseWhich::RotSlotB, baseboard_id) + .map(|c| c.caboose.clone()); if let (Some(inv_sp), Some(inv_rot)) = (inv_sp, inv_rot) { Some(SledHwInventory { baseboard_id: &baseboard_id, @@ -712,6 +718,8 @@ impl SimSystemBuilderInner { rot: inv_rot, sp_active, sp_inactive, + rot_slot_a, + rot_slot_b, }) } else { None diff --git a/nexus/types/src/inventory.rs b/nexus/types/src/inventory.rs index 81b21034e9e..a6c9ae7a3d5 100644 --- a/nexus/types/src/inventory.rs +++ b/nexus/types/src/inventory.rs @@ -369,6 +369,19 @@ pub enum CabooseWhich { Stage0Next, } +impl CabooseWhich { + pub fn toggled_slot(&self) -> Self { + match self { + CabooseWhich::RotSlotA => CabooseWhich::RotSlotB, + CabooseWhich::RotSlotB => CabooseWhich::RotSlotA, + CabooseWhich::SpSlot0 => CabooseWhich::SpSlot1, + CabooseWhich::SpSlot1 => CabooseWhich::SpSlot0, + CabooseWhich::Stage0 => CabooseWhich::Stage0Next, + CabooseWhich::Stage0Next => CabooseWhich::Stage0, + } + } +} + /// Root of trust page contents found during a collection /// /// These are normalized in the database. Each distinct `RotPage` is assigned a diff --git a/update-common/manifests/fake.toml b/update-common/manifests/fake.toml index 705abf226ee..b0027227fda 100644 --- a/update-common/manifests/fake.toml +++ b/update-common/manifests/fake.toml @@ -10,7 +10,7 @@ version = "1.0.0" source = { kind = "fake", size = "1MiB" } [[artifact.gimlet_rot]] -name = "fake-gimlet-rot" +name = "SimRot" version = "1.0.0" [artifact.gimlet_rot.source] kind = "composite-rot"