From f5437f64d35e535975b813e640335903e644e24e Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 4 Sep 2024 04:30:39 +0000 Subject: [PATCH 01/42] wip: teach propolis-server to understand configurable boot order --- bin/propolis-server/src/lib/initializer.rs | 137 +++++++++++++++++- bin/propolis-server/src/lib/vm/ensure.rs | 1 + bin/propolis-standalone/src/config.rs | 4 +- bin/propolis-standalone/src/main.rs | 23 +-- .../src/instance_spec/v0/mod.rs | 11 ++ lib/propolis-client/src/instance_spec.rs | 29 ++++ openapi/propolis-server.json | 25 ++++ phd-tests/framework/src/test_vm/config.rs | 81 +++++++---- phd-tests/tests/src/boot_order.rs | 45 ++++++ phd-tests/tests/src/disk.rs | 1 + phd-tests/tests/src/lib.rs | 1 + 11 files changed, 316 insertions(+), 42 deletions(-) create mode 100644 phd-tests/tests/src/boot_order.rs diff --git a/bin/propolis-server/src/lib/initializer.rs b/bin/propolis-server/src/lib/initializer.rs index 060934d74..9cbc38aaf 100644 --- a/bin/propolis-server/src/lib/initializer.rs +++ b/bin/propolis-server/src/lib/initializer.rs @@ -28,12 +28,18 @@ use propolis::hw::ibmpc; use propolis::hw::pci; use propolis::hw::ps2::ctrl::PS2Ctrl; use propolis::hw::qemu::pvpanic::QemuPvpanic; -use propolis::hw::qemu::{debug::QemuDebugPort, fwcfg, ramfb}; +use propolis::hw::qemu::{ + debug::QemuDebugPort, + fwcfg::{self, Entry}, + ramfb, +}; use propolis::hw::uart::LpcUart; use propolis::hw::{nvme, virtio}; use propolis::intr_pins; use propolis::vmm::{self, Builder, Machine}; -use propolis_api_types::instance_spec::{self, v0::InstanceSpecV0}; +use propolis_api_types::instance_spec::{ + self, v0::BootDeclaration, v0::InstanceSpecV0, +}; use propolis_api_types::InstanceProperties; use slog::info; @@ -117,6 +123,7 @@ pub struct MachineInitializer<'a> { pub(crate) properties: &'a InstanceProperties, pub(crate) toml_config: &'a crate::server::VmTomlConfig, pub(crate) producer_registry: Option, + pub(crate) boot_order: Option>, pub(crate) state: MachineInitializerState, } @@ -994,6 +1001,121 @@ impl<'a> MachineInitializer<'a> { smb_tables.commit() } + fn generate_bootorder(&self) -> Result, Error> { + eprintln!( + "generating bootorder with order: {:?}", + self.boot_order.as_ref() + ); + let Some(boot_names) = self.boot_order.as_ref() else { + return Ok(None); + }; + + let mut order = fwcfg::formats::BootOrder::new(); + + let parse_bdf = + |pci_path: &propolis_api_types::instance_spec::PciPath| { + let bdf: Result = + pci_path.to_owned().try_into().map_err(|e| { + Error::new( + ErrorKind::InvalidInput, + format!( + "Couldn't get PCI BDF for {}: {}", + pci_path, e + ), + ) + }); + + bdf + }; + + for boot_entry in boot_names.iter() { + if boot_entry.first_boot_only { + continue; + } + // names may refer to a storage device or network device. + // + // realistically we won't be booting from network devices, but leave that as a question + // for plumbing on the next layer up. + + let storage_backend = |spec| { + let spec: &instance_spec::v0::StorageDeviceV0 = spec; + match spec { + instance_spec::v0::StorageDeviceV0::VirtioDisk(disk) => { + &disk.backend_name + } + instance_spec::v0::StorageDeviceV0::NvmeDisk(disk) => { + &disk.backend_name + } + } + }; + + let network_backend = |spec| { + let spec: &instance_spec::v0::NetworkDeviceV0 = spec; + let instance_spec::v0::NetworkDeviceV0::VirtioNic(vnic_spec) = + spec; + + &vnic_spec.backend_name + }; + + if let Some(device_spec) = + self.spec.devices.storage_devices.iter().find_map( + |(_name, spec)| { + if storage_backend(spec) == &boot_entry.name { + Some(spec) + } else { + None + } + }, + ) + { + match device_spec { + instance_spec::v0::StorageDeviceV0::VirtioDisk(disk) => { + let bdf = parse_bdf(&disk.pci_path)?; + // TODO: check that bus is 0. only support boot devices + // directly attached to the root bus for now. + order.add_disk(bdf.location); + } + instance_spec::v0::StorageDeviceV0::NvmeDisk(disk) => { + let bdf = parse_bdf(&disk.pci_path)?; + // TODO: check that bus is 0. only support boot devices + // directly attached to the root bus for now. + // + // TODO: separately, propolis-standalone passes an eui64 + // of 0, so do that here too. is that.. ok? + order.add_nvme(bdf.location, 0); + } + }; + } else if let Some(vnic_spec) = + self.spec.devices.network_devices.iter().find_map( + |(name, spec)| { + if network_backend(spec) == &boot_entry.name { + Some(spec) + } else { + None + } + }, + ) + { + let instance_spec::v0::NetworkDeviceV0::VirtioNic(vnic_spec) = + vnic_spec; + + let bdf = parse_bdf(&vnic_spec.pci_path)?; + + order.add_pci(bdf.location, "ethernet"); + } else { + eprintln!( + "could not find {:?} in {:?} or {:?}", + boot_entry, + self.spec.devices.storage_devices, + self.spec.devices.network_devices + ); + panic!("TODO: return an error; the device name doesn't exist?"); + } + } + + Ok(Some(order.finish())) + } + /// Initialize qemu `fw_cfg` device, and populate it with data including CPU /// count, SMBIOS tables, and attached RAM-FB device. /// @@ -1010,6 +1132,13 @@ impl<'a> MachineInitializer<'a> { ) .unwrap(); + // TODO(ixi): extract `generate_smbios` and expect `smbios::TableBytes` as an argument to + // initialize_fwcfg. make `generate_smbios` accept rom size as an explicit parameter. this + // avoids the implicit panic if these are called out of order (and makes it harder to call + // out of order). + // + // one step towards sharing smbios init code between propolis-standalone and + // propolis-server. let smbios::TableBytes { entry_point, structure_table } = self.generate_smbios(); fwcfg @@ -1025,6 +1154,10 @@ impl<'a> MachineInitializer<'a> { ) .unwrap(); + if let Some(boot_order) = self.generate_bootorder()? { + fwcfg.insert_named("bootorder", boot_order).unwrap(); + } + let ramfb = ramfb::RamFb::create( self.log.new(slog::o!("component" => "ramfb")), ); diff --git a/bin/propolis-server/src/lib/vm/ensure.rs b/bin/propolis-server/src/lib/vm/ensure.rs index 75232e9f7..881917638 100644 --- a/bin/propolis-server/src/lib/vm/ensure.rs +++ b/bin/propolis-server/src/lib/vm/ensure.rs @@ -177,6 +177,7 @@ impl<'a> VmEnsureNotStarted<'a> { properties, toml_config: &options.toml_config, producer_registry: options.oximeter_registry.clone(), + boot_order: v0_spec.devices.boot_order.clone(), state: MachineInitializerState::default(), }; diff --git a/bin/propolis-standalone/src/config.rs b/bin/propolis-standalone/src/config.rs index 3aff6c0da..69aec140e 100644 --- a/bin/propolis-standalone/src/config.rs +++ b/bin/propolis-standalone/src/config.rs @@ -136,7 +136,9 @@ pub fn block_backend( log: &slog::Logger, ) -> (Arc, String) { let backend_name = dev.options.get("block_dev").unwrap().as_str().unwrap(); - let be = config.block_devs.get(backend_name).unwrap(); + let Some(be) = config.block_devs.get(backend_name) else { + panic!("no configured block device named \"{}\"", backend_name); + }; let opts = block::BackendOpts { block_size: be.block_opts.block_size, read_only: be.block_opts.read_only, diff --git a/bin/propolis-standalone/src/main.rs b/bin/propolis-standalone/src/main.rs index 1f210b661..7b3cf8496 100644 --- a/bin/propolis-standalone/src/main.rs +++ b/bin/propolis-standalone/src/main.rs @@ -959,8 +959,13 @@ fn generate_smbios(params: SmbiosParams) -> anyhow::Result { Ok(smb_tables.commit()) } -fn generate_bootorder(config: &config::Config) -> anyhow::Result { - let names = config.main.boot_order.as_ref().unwrap(); +fn generate_bootorder( + config: &config::Config, +) -> anyhow::Result> { + eprintln!("bootorder declared as {:?}", config.main.boot_order.as_ref()); + let Some(names) = config.main.boot_order.as_ref() else { + return Ok(None); + }; let mut order = fwcfg::formats::BootOrder::new(); for name in names.iter() { @@ -994,7 +999,7 @@ fn generate_bootorder(config: &config::Config) -> anyhow::Result { } } } - Ok(order.finish()) + Ok(Some(order.finish())) } fn setup_instance( @@ -1306,14 +1311,10 @@ fn setup_instance( // It is "safe" to generate bootorder (if requested) now, given that PCI // device configuration has been validated by preceding logic - if config.main.boot_order.is_some() { - fwcfg - .insert_named( - "bootorder", - generate_bootorder(&config) - .context("Failed to generate boot order")?, - ) - .unwrap(); + if let Some(boot_config) = + generate_bootorder(&config).context("Failed to generate boot order")? + { + fwcfg.insert_named("bootorder", boot_config).unwrap(); } fwcfg.attach(pio, &machine.acc_mem); diff --git a/crates/propolis-api-types/src/instance_spec/v0/mod.rs b/crates/propolis-api-types/src/instance_spec/v0/mod.rs index 344f83fcb..46fff1a1e 100644 --- a/crates/propolis-api-types/src/instance_spec/v0/mod.rs +++ b/crates/propolis-api-types/src/instance_spec/v0/mod.rs @@ -109,6 +109,11 @@ pub struct DeviceSpecV0 { #[serde(skip_serializing_if = "Option::is_none")] pub qemu_pvpanic: Option, + // same as above + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub boot_order: Option>, + #[cfg(feature = "falcon")] pub softnpu_pci_port: Option, #[cfg(feature = "falcon")] @@ -177,6 +182,12 @@ impl DeviceSpecV0 { } } +#[derive(Clone, Deserialize, Serialize, Debug, JsonSchema)] +pub struct BootDeclaration { + pub name: SpecKey, + pub first_boot_only: bool, +} + #[derive(Clone, Deserialize, Serialize, Debug, JsonSchema)] #[serde(deny_unknown_fields, tag = "type", content = "component")] pub enum StorageBackendV0 { diff --git a/lib/propolis-client/src/instance_spec.rs b/lib/propolis-client/src/instance_spec.rs index e99303cd3..c6347a89b 100644 --- a/lib/propolis-client/src/instance_spec.rs +++ b/lib/propolis-client/src/instance_spec.rs @@ -184,6 +184,35 @@ impl SpecBuilderV0 { } } + /// Sets a boot order. Names here refer to devices included in this spec. + /// + /// Permissible to not this if the implicit boot order is desired, but the implicit boot order + /// may be unstable across device addition and removal. + /// + /// XXX: talk about what happens if names are included that do not name real devices..? + /// + /// XXX: this should certainly return `&mut Self` - all the builders here should. check if any + /// of these are chained..? + pub fn set_boot_order( + &mut self, + boot_order: Vec, + ) -> Result<&Self, SpecBuilderError> { + let boot_declarations = boot_order + .into_iter() + .map(|name| crate::types::BootDeclaration { + name, + first_boot_only: false, + }) + .collect(); + eprintln!("setting boot order to {:?}", boot_declarations); + self.spec.devices.boot_order = Some(boot_declarations); + + // TODO: would be nice to warn if any of the devices named here are not devices in the spec + // though. + + Ok(self) + } + /// Yields the completed spec, consuming the builder. pub fn finish(self) -> InstanceSpecV0 { self.spec diff --git a/openapi/propolis-server.json b/openapi/propolis-server.json index 9d4eee36e..bcee6ce33 100644 --- a/openapi/propolis-server.json +++ b/openapi/propolis-server.json @@ -662,6 +662,13 @@ "additionalProperties": { "$ref": "#/components/schemas/StorageDeviceV0" } + }, + "boot_order": { + "nullable": true, + "type": "array", + "items": { + "$ref": "#/components/schemas/BootDeclaration" + } } }, "required": [ @@ -1463,6 +1470,24 @@ "vcr_matches" ] }, + "BootDeclaration": { + "description": "A boot preference for some device.", + "type": "object", + "properties": { + "name": { + "description": "The name of the device to attempt booting from.", + "type": "string" + }, + "first_boot_only": { + "description": "Should we include this device in the boot order for reboots of a started instance?", + "type": "boolean" + } + }, + "required": [ + "name", + "first_boot_only" + ] + }, "SerialPort": { "description": "A serial port device.", "type": "object", diff --git a/phd-tests/framework/src/test_vm/config.rs b/phd-tests/framework/src/test_vm/config.rs index 77673132d..90495928e 100644 --- a/phd-tests/framework/src/test_vm/config.rs +++ b/phd-tests/framework/src/test_vm/config.rs @@ -37,6 +37,7 @@ pub enum DiskBackend { #[derive(Clone, Debug)] struct DiskRequest<'a> { + name: &'a str, interface: DiskInterface, backend: DiskBackend, source: DiskSource<'a>, @@ -48,8 +49,8 @@ pub struct VmConfig<'dr> { cpus: u8, memory_mib: u64, bootrom_artifact: String, - boot_disk: DiskRequest<'dr>, - data_disks: Vec>, + boot_order: Option>, + disks: Vec>, devices: BTreeMap, } @@ -63,22 +64,25 @@ impl<'dr> VmConfig<'dr> { bootrom: &str, guest_artifact: &'dr str, ) -> Self { - let boot_disk = DiskRequest { - interface: DiskInterface::Nvme, - backend: DiskBackend::File, - source: DiskSource::Artifact(guest_artifact), - pci_device_num: 4, - }; - - Self { + let mut config = Self { vm_name: vm_name.to_owned(), cpus, memory_mib, bootrom_artifact: bootrom.to_owned(), - boot_disk, - data_disks: Vec::new(), + boot_order: None, + disks: Vec::new(), devices: BTreeMap::new(), - } + }; + + config.data_disk( + "boot-disk", + DiskSource::Artifact(guest_artifact), + DiskInterface::Nvme, + DiskBackend::File, + 4, + ); + + config } pub fn cpus(&mut self, cpus: u8) -> &mut Self { @@ -119,6 +123,11 @@ impl<'dr> VmConfig<'dr> { self } + pub fn boot_order(&mut self, disks: Vec<&'dr str>) -> &mut Self { + self.boot_order = Some(disks); + self + } + pub fn boot_disk( &mut self, artifact: &'dr str, @@ -126,7 +135,17 @@ impl<'dr> VmConfig<'dr> { backend: DiskBackend, pci_device_num: u8, ) -> &mut Self { - self.boot_disk = DiskRequest { + // If you're calling `boot_disk` directly, you're probably not specifying an explicit boot + // order. the _implicit_ boot order seems to be the order disks are created in, so preserve + // that implied behavior by having a boot disk inserted up front. + // + // XXX: i thought that the implicit order would be *pci device order*, not disk creation + // order? + // XXX: figure out what to do if there is both an explicit boot order and a call to + // `.boot_disk`. + + self.disks[0] = DiskRequest { + name: "boot-disk", interface, backend, source: DiskSource::Artifact(artifact), @@ -138,12 +157,14 @@ impl<'dr> VmConfig<'dr> { pub fn data_disk( &mut self, + name: &'dr str, source: DiskSource<'dr>, interface: DiskInterface, backend: DiskBackend, pci_device_num: u8, ) -> &mut Self { - self.data_disks.push(DiskRequest { + self.disks.push(DiskRequest { + name, interface, backend, source, @@ -172,7 +193,10 @@ impl<'dr> VmConfig<'dr> { }) .context("serializing Propolis server config")?; - let DiskSource::Artifact(boot_artifact) = self.boot_disk.source else { + let boot_disk = &self.disks[0]; + + // TODO: adapt this to check all listed boot devices + let DiskSource::Artifact(boot_artifact) = boot_disk.source else { unreachable!("boot disks always use artifacts as sources"); }; @@ -183,16 +207,12 @@ impl<'dr> VmConfig<'dr> { .context("getting guest OS kind for boot disk")?; let mut disk_handles = Vec::new(); - disk_handles.push( - make_disk("boot-disk".to_owned(), framework, &self.boot_disk) - .await - .context("creating boot disk")?, - ); - for (idx, data_disk) in self.data_disks.iter().enumerate() { + for disk in self.disks.iter() { disk_handles.push( - make_disk(format!("data-disk-{}", idx), framework, data_disk) + // format!("data-disk-{}", idx) + make_disk(disk.name.to_owned(), framework, disk) .await - .context("creating data disk")?, + .context("creating disk")?, ); } @@ -203,10 +223,7 @@ impl<'dr> VmConfig<'dr> { // elements for all of them. This assumes the disk handles were created // in the correct order: boot disk first, then in the data disks' // iteration order. - let all_disks = [&self.boot_disk] - .into_iter() - .chain(self.data_disks.iter()) - .zip(disk_handles.iter()); + let all_disks = self.disks.iter().zip(disk_handles.iter()); for (idx, (req, hdl)) in all_disks.enumerate() { let device_name = format!("disk-device{}", idx); let pci_path = PciPath::new(0, req.pci_device_num, 0).unwrap(); @@ -238,6 +255,14 @@ impl<'dr> VmConfig<'dr> { .add_serial_port(SerialPortNumber::Com1) .context("adding serial port to spec")?; + if let Some(boot_order) = self.boot_order.as_ref() { + spec_builder + .set_boot_order( + boot_order.iter().map(|x| x.to_string()).collect(), + ) + .context("adding boot order to spec")?; + } + let instance_spec = spec_builder.finish(); // Generate random identifiers for this instance's timeseries metadata. diff --git a/phd-tests/tests/src/boot_order.rs b/phd-tests/tests/src/boot_order.rs new file mode 100644 index 000000000..e24019818 --- /dev/null +++ b/phd-tests/tests/src/boot_order.rs @@ -0,0 +1,45 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use phd_framework::{ + disk::DiskSource, + test_vm::{DiskBackend, DiskInterface}, +}; +use phd_testcase::*; +// use tracing::{info, warn}; + +#[phd_testcase] +async fn configurable_boot_order(ctx: &Framework) { + let mut cfg = ctx.vm_config_builder("configurable_boot_order"); + cfg.data_disk( + "alpine-3-20", + DiskSource::Artifact("alpine-3-20"), + DiskInterface::Virtio, + DiskBackend::File, + 16, + ); + + cfg.data_disk( + "alpine-3-16", + DiskSource::Artifact("alpine-3-16"), + DiskInterface::Virtio, + DiskBackend::File, + 24, + ); + + cfg.boot_order(vec!["alpine-3-20", "alpine-3-16"]); + + let mut vm = ctx.spawn_vm(&cfg, None).await?; + vm.launch().await?; + vm.wait_to_boot().await?; + + let kernel = vm.run_shell_command("uname -r").await?; + if kernel.contains("6.6.41-0-virt") { + panic!("booted 6.6.41"); + } else if kernel.contains("5.15.41-0-virt") { + panic!("booted 5.15.41"); + } else { + eprintln!("dunno what we booted? {}", kernel); + } +} diff --git a/phd-tests/tests/src/disk.rs b/phd-tests/tests/src/disk.rs index 11a6e90c7..1db141c90 100644 --- a/phd-tests/tests/src/disk.rs +++ b/phd-tests/tests/src/disk.rs @@ -17,6 +17,7 @@ async fn in_memory_backend_smoke_test(ctx: &Framework) { let mut data = FatFilesystem::new(); data.add_file_from_str("hello_oxide.txt", HELLO_MSG)?; cfg.data_disk( + "data-disk-0", DiskSource::FatFilesystem(data), DiskInterface::Virtio, DiskBackend::InMemory { readonly: true }, diff --git a/phd-tests/tests/src/lib.rs b/phd-tests/tests/src/lib.rs index a4968487a..37c9efcb3 100644 --- a/phd-tests/tests/src/lib.rs +++ b/phd-tests/tests/src/lib.rs @@ -4,6 +4,7 @@ pub use phd_testcase; +mod boot_order; mod crucible; mod disk; mod framework; From ca972784692a1df58bb66054941d7d2a7c9f09ae Mon Sep 17 00:00:00 2001 From: iximeow Date: Fri, 6 Sep 2024 02:29:52 +0000 Subject: [PATCH 02/42] log plumbing --- bin/propolis-server/src/lib/initializer.rs | 20 +++++++++++--------- bin/propolis-standalone/src/main.rs | 6 ++++-- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/bin/propolis-server/src/lib/initializer.rs b/bin/propolis-server/src/lib/initializer.rs index 1e7ebae70..cb6c48fd0 100644 --- a/bin/propolis-server/src/lib/initializer.rs +++ b/bin/propolis-server/src/lib/initializer.rs @@ -46,7 +46,7 @@ use propolis_api_types::instance_spec::{ self, v0::BootDeclaration, v0::InstanceSpecV0, }; use propolis_api_types::InstanceProperties; -use slog::info; +use slog::{info, warn}; /// Arbitrary ROM limit for now const MAX_ROM_SIZE: usize = 0x20_0000; @@ -1077,8 +1077,9 @@ impl<'a> MachineInitializer<'a> { } fn generate_bootorder(&self) -> Result, Error> { - eprintln!( - "generating bootorder with order: {:?}", + info!( + self.log, + "Generating bootorder with order: {:?}", self.boot_order.as_ref() ); let Some(boot_names) = self.boot_order.as_ref() else { @@ -1178,13 +1179,14 @@ impl<'a> MachineInitializer<'a> { order.add_pci(bdf.location, "ethernet"); } else { - eprintln!( - "could not find {:?} in {:?} or {:?}", - boot_entry, - self.spec.devices.storage_devices, - self.spec.devices.network_devices + let message = format!( + "Instance spec included boot entry which does not refer to an existing device: `{}`", + boot_entry.name.as_str(), ); - panic!("TODO: return an error; the device name doesn't exist?"); + // TODO(ixi): this is actually duplicative with the top-level `error!` that this + // unhandled `error` will bubble out to. Maybe just don't log here? + warn!(self.log, message); + return Err(Error::new(ErrorKind::Other, message)); } } diff --git a/bin/propolis-standalone/src/main.rs b/bin/propolis-standalone/src/main.rs index 7b3cf8496..04add5ddb 100644 --- a/bin/propolis-standalone/src/main.rs +++ b/bin/propolis-standalone/src/main.rs @@ -961,12 +961,14 @@ fn generate_smbios(params: SmbiosParams) -> anyhow::Result { fn generate_bootorder( config: &config::Config, + log: &slog::Logger, ) -> anyhow::Result> { - eprintln!("bootorder declared as {:?}", config.main.boot_order.as_ref()); let Some(names) = config.main.boot_order.as_ref() else { return Ok(None); }; + slog::info!(log, "Bootorder declared as {:?}", config.main.boot_order.as_ref()); + let mut order = fwcfg::formats::BootOrder::new(); for name in names.iter() { let dev = config @@ -1312,7 +1314,7 @@ fn setup_instance( // It is "safe" to generate bootorder (if requested) now, given that PCI // device configuration has been validated by preceding logic if let Some(boot_config) = - generate_bootorder(&config).context("Failed to generate boot order")? + generate_bootorder(&config, log).context("Failed to generate boot order")? { fwcfg.insert_named("bootorder", boot_config).unwrap(); } From d5e43c4f707049a99b7009828001a7bfaa4d8b5a Mon Sep 17 00:00:00 2001 From: iximeow Date: Sat, 7 Sep 2024 01:41:20 +0000 Subject: [PATCH 03/42] shelve the first_boot_only attribute for a moment this could have been made to work, but i'm walking this back as it would land fairly untested at first --- bin/propolis-server/src/lib/initializer.rs | 3 --- crates/propolis-api-types/src/instance_spec/v0.rs | 1 - lib/propolis-client/src/instance_spec.rs | 1 - openapi/propolis-server.json | 7 +------ 4 files changed, 1 insertion(+), 11 deletions(-) diff --git a/bin/propolis-server/src/lib/initializer.rs b/bin/propolis-server/src/lib/initializer.rs index cb6c48fd0..2691611ce 100644 --- a/bin/propolis-server/src/lib/initializer.rs +++ b/bin/propolis-server/src/lib/initializer.rs @@ -1105,9 +1105,6 @@ impl<'a> MachineInitializer<'a> { }; for boot_entry in boot_names.iter() { - if boot_entry.first_boot_only { - continue; - } // names may refer to a storage device or network device. // // realistically we won't be booting from network devices, but leave that as a question diff --git a/crates/propolis-api-types/src/instance_spec/v0.rs b/crates/propolis-api-types/src/instance_spec/v0.rs index 46fff1a1e..4d50cc7af 100644 --- a/crates/propolis-api-types/src/instance_spec/v0.rs +++ b/crates/propolis-api-types/src/instance_spec/v0.rs @@ -185,7 +185,6 @@ impl DeviceSpecV0 { #[derive(Clone, Deserialize, Serialize, Debug, JsonSchema)] pub struct BootDeclaration { pub name: SpecKey, - pub first_boot_only: bool, } #[derive(Clone, Deserialize, Serialize, Debug, JsonSchema)] diff --git a/lib/propolis-client/src/instance_spec.rs b/lib/propolis-client/src/instance_spec.rs index c6347a89b..0cf557e41 100644 --- a/lib/propolis-client/src/instance_spec.rs +++ b/lib/propolis-client/src/instance_spec.rs @@ -201,7 +201,6 @@ impl SpecBuilderV0 { .into_iter() .map(|name| crate::types::BootDeclaration { name, - first_boot_only: false, }) .collect(); eprintln!("setting boot order to {:?}", boot_declarations); diff --git a/openapi/propolis-server.json b/openapi/propolis-server.json index 92975d647..e2ec407b9 100644 --- a/openapi/propolis-server.json +++ b/openapi/propolis-server.json @@ -1482,15 +1482,10 @@ "name": { "description": "The name of the device to attempt booting from.", "type": "string" - }, - "first_boot_only": { - "description": "Should we include this device in the boot order for reboots of a started instance?", - "type": "boolean" } }, "required": [ - "name", - "first_boot_only" + "name" ] }, "SerialPort": { From 3f921f0e32d0c58cdd00bc959c9cfafbb798b6e3 Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 10 Sep 2024 23:03:26 +0000 Subject: [PATCH 04/42] rustfmt and log tweaks --- bin/propolis-server/src/lib/initializer.rs | 29 ++++++++++++---------- bin/propolis-standalone/src/main.rs | 10 +++++--- lib/propolis-client/src/instance_spec.rs | 4 +-- 3 files changed, 24 insertions(+), 19 deletions(-) diff --git a/bin/propolis-server/src/lib/initializer.rs b/bin/propolis-server/src/lib/initializer.rs index 2691611ce..8ccab8960 100644 --- a/bin/propolis-server/src/lib/initializer.rs +++ b/bin/propolis-server/src/lib/initializer.rs @@ -46,7 +46,7 @@ use propolis_api_types::instance_spec::{ self, v0::BootDeclaration, v0::InstanceSpecV0, }; use propolis_api_types::InstanceProperties; -use slog::{info, warn}; +use slog::{error, info}; /// Arbitrary ROM limit for now const MAX_ROM_SIZE: usize = 0x20_0000; @@ -1130,16 +1130,19 @@ impl<'a> MachineInitializer<'a> { &vnic_spec.backend_name }; - if let Some(device_spec) = - self.spec.devices.storage_devices.iter().find_map( - |(_name, spec)| { - if storage_backend(spec) == &boot_entry.name { - Some(spec) - } else { - None - } - }, - ) + if let Some(device_spec) = self + .spec + .devices + .storage_devices + .iter() + .find_map(|(_name, spec)| { + eprintln!("checking name \"{}\", spec {:?}", _name, spec); + if storage_backend(spec) == &boot_entry.name { + Some(spec) + } else { + None + } + }) { match device_spec { instance_spec::v0::StorageDeviceV0::VirtioDisk(disk) => { @@ -1160,7 +1163,7 @@ impl<'a> MachineInitializer<'a> { }; } else if let Some(vnic_spec) = self.spec.devices.network_devices.iter().find_map( - |(name, spec)| { + |(_name, spec)| { if network_backend(spec) == &boot_entry.name { Some(spec) } else { @@ -1182,7 +1185,7 @@ impl<'a> MachineInitializer<'a> { ); // TODO(ixi): this is actually duplicative with the top-level `error!` that this // unhandled `error` will bubble out to. Maybe just don't log here? - warn!(self.log, message); + error!(self.log, "{}", message); return Err(Error::new(ErrorKind::Other, message)); } } diff --git a/bin/propolis-standalone/src/main.rs b/bin/propolis-standalone/src/main.rs index 04add5ddb..a54bca1e2 100644 --- a/bin/propolis-standalone/src/main.rs +++ b/bin/propolis-standalone/src/main.rs @@ -967,7 +967,11 @@ fn generate_bootorder( return Ok(None); }; - slog::info!(log, "Bootorder declared as {:?}", config.main.boot_order.as_ref()); + slog::info!( + log, + "Bootorder declared as {:?}", + config.main.boot_order.as_ref() + ); let mut order = fwcfg::formats::BootOrder::new(); for name in names.iter() { @@ -1313,8 +1317,8 @@ fn setup_instance( // It is "safe" to generate bootorder (if requested) now, given that PCI // device configuration has been validated by preceding logic - if let Some(boot_config) = - generate_bootorder(&config, log).context("Failed to generate boot order")? + if let Some(boot_config) = generate_bootorder(&config, log) + .context("Failed to generate boot order")? { fwcfg.insert_named("bootorder", boot_config).unwrap(); } diff --git a/lib/propolis-client/src/instance_spec.rs b/lib/propolis-client/src/instance_spec.rs index 0cf557e41..e1ff0f06e 100644 --- a/lib/propolis-client/src/instance_spec.rs +++ b/lib/propolis-client/src/instance_spec.rs @@ -199,9 +199,7 @@ impl SpecBuilderV0 { ) -> Result<&Self, SpecBuilderError> { let boot_declarations = boot_order .into_iter() - .map(|name| crate::types::BootDeclaration { - name, - }) + .map(|name| crate::types::BootDeclaration { name }) .collect(); eprintln!("setting boot order to {:?}", boot_declarations); self.spec.devices.boot_order = Some(boot_declarations); From e0c7c71c5c14f5db64d1b9bfe19d8b585add7e57 Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 10 Sep 2024 23:03:55 +0000 Subject: [PATCH 05/42] include actually-usable tests these still depend on having a guest artifact with an EFI partition to persist NvVars into, but with such a partition they actually show useful behavior! --- Cargo.lock | 2 + phd-tests/framework/src/guest_os/mod.rs | 4 + phd-tests/tests/Cargo.toml | 2 + phd-tests/tests/src/boot_order.rs | 648 +++++++++++++++++++++++- 4 files changed, 646 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ed236c32c..99de39a92 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3679,6 +3679,8 @@ dependencies = [ name = "phd-tests" version = "0.1.0" dependencies = [ + "anyhow", + "byteorder", "futures", "http 0.2.12", "phd-testcase", diff --git a/phd-tests/framework/src/guest_os/mod.rs b/phd-tests/framework/src/guest_os/mod.rs index b993bd175..3d1f094cc 100644 --- a/phd-tests/framework/src/guest_os/mod.rs +++ b/phd-tests/framework/src/guest_os/mod.rs @@ -13,6 +13,7 @@ mod alpine; mod debian11_nocloud; mod shell_commands; mod ubuntu22_04; +mod ubuntu24_04; mod windows; mod windows_server_2016; mod windows_server_2019; @@ -88,6 +89,7 @@ pub enum GuestOsKind { Alpine, Debian11NoCloud, Ubuntu2204, + Ubuntu2404, WindowsServer2016, WindowsServer2019, WindowsServer2022, @@ -101,6 +103,7 @@ impl FromStr for GuestOsKind { "alpine" => Ok(Self::Alpine), "debian11nocloud" => Ok(Self::Debian11NoCloud), "ubuntu2204" => Ok(Self::Ubuntu2204), + "ubuntu2404" => Ok(Self::Ubuntu2404), "windowsserver2016" => Ok(Self::WindowsServer2016), "windowsserver2019" => Ok(Self::WindowsServer2019), "windowsserver2022" => Ok(Self::WindowsServer2022), @@ -119,6 +122,7 @@ pub(super) fn get_guest_os_adapter(kind: GuestOsKind) -> Box { Box::new(debian11_nocloud::Debian11NoCloud) } GuestOsKind::Ubuntu2204 => Box::new(ubuntu22_04::Ubuntu2204), + GuestOsKind::Ubuntu2404 => Box::new(ubuntu24_04::Ubuntu2404), GuestOsKind::WindowsServer2016 => { Box::new(windows_server_2016::WindowsServer2016) } diff --git a/phd-tests/tests/Cargo.toml b/phd-tests/tests/Cargo.toml index 499878762..ab28ba371 100644 --- a/phd-tests/tests/Cargo.toml +++ b/phd-tests/tests/Cargo.toml @@ -8,6 +8,8 @@ test = false doctest = false [dependencies] +anyhow.workspace = true +byteorder.workspace = true futures.workspace = true http.workspace = true propolis-client.workspace = true diff --git a/phd-tests/tests/src/boot_order.rs b/phd-tests/tests/src/boot_order.rs index e24019818..1398c0bef 100644 --- a/phd-tests/tests/src/boot_order.rs +++ b/phd-tests/tests/src/boot_order.rs @@ -2,12 +2,386 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +use anyhow::{bail, Error}; +use byteorder::{LittleEndian, ReadBytesExt}; use phd_framework::{ - disk::DiskSource, + disk::{fat::FatFilesystem, DiskSource}, test_vm::{DiskBackend, DiskInterface}, }; use phd_testcase::*; -// use tracing::{info, warn}; +use std::io::{Cursor, Read}; +use tracing::{info, warn}; + +// This test checks that with a specified boot order, the guest boots whichever disk we wanted to +// come first. This is simple enough, until you want to know "what you booted from".. +// +// For live CDs, such as Alpine's, the system boots into a tmpfs loaded from a boot disk, but +// there's no clear line to what disk that live image *came from*. If you had two Alpine 3.20.3 +// images attached to one VM, you'd ceretainly boot into Alpine 3.20.3, but I don't see a way to +// tell *which disk* that Alpine would be sourced from, from Alpine alone. +// +// So instead, check EFI variables. To do this, then, we have to.. parse EFI variables. That is +// what this test does below, but it's probably not fully robust to what we might do with PCI +// devices in the future. +// +// A more "future-proof" setup might be to just boot an OS, see that we ended up in the OS we +// expected, and check some attribute about it like that the kernel version is what we expected the +// booted OS to be. That's still a good fallback if we discover that parsing EFI variables is +// difficult to stick with for any reason. It has a downside though: we'd have to keep a specific +// image around with a specific kernel version as either the "we expect to boot into this" image +// or the "we expected to boot into not this" cases. + +// That all said, while this test checks for specific EDK2/OVMF magic values, those magic values +// should be explained. The rest of this comment does just that. +// +// First, soem GUIDs. These GUIDs come from EDK2, and OVMF reuses them. Notably these are the raw +// bytes of the GUID: textual values will have slightly different ordering of bytes. +// +// Some source references, as you won't find these GUIDs in a UEFI or related spec document.. The +// firmware volume is identified by what seems to be the DXE firmware volume: +// https://github.com/tianocore/edk2/blob/712797c/OvmfPkg/OvmfPkgIa32.fdf#L181 +// introduced in +// https://github.com/tianocore/edk2/commit/16f26de663967b5a64140b6abba2c145ea50194c, note this +// is the DXEFV entry. +// +// The *files* we'll care about in this test are identified by other GUIDs in the above *volume*. +// +// EFI Internal Shell: https://github.com/tianocore/edk2/blob/a445e1a/ShellPkg/ShellPkg.dec#L59-L60 +// UiApp: +// https://github.com/tianocore/edk2/blob/a445e1a/MdeModulePkg/Application/UiApp/UiApp.inf#L13 +const EDK2_FIRMWARE_VOL_GUID: &'static [u8; 16] = &[ + 0xc9, 0xbd, 0xb8, 0x7c, 0xeb, 0xf8, 0x34, 0x4f, 0xaa, 0xea, 0x3e, 0xe4, + 0xaf, 0x65, 0x16, 0xa1, +]; +const EDK2_UI_APP_GUID: &'static [u8; 16] = &[ + 0x21, 0xaa, 0x2c, 0x46, 0x14, 0x76, 0x03, 0x45, 0x83, 0x6e, 0x8a, 0xb6, + 0xf4, 0x66, 0x23, 0x31, +]; +const EDK2_EFI_SHELL_GUID: &'static [u8; 16] = &[ + 0x83, 0xa5, 0x04, 0x7c, 0x3e, 0x9e, 0x1c, 0x4f, 0xad, 0x65, 0xe0, 0x52, + 0x68, 0xd0, 0xb4, 0xd1, +]; + +// The variable namespace `8be4df61-93ca-11d2-aa0d-00e098032b8c` comes from UEFI, as do the +// variable names here. The presentation as `{varname}-{namespace}`, and at a path like +// `/sys/firmware/efi/efivars/`, are both Linux `efivars`-isms. +// +// These tests likely will not pass when run with other guest OSes. +const BOOT_CURRENT_VAR: &'static str = + "BootCurrent-8be4df61-93ca-11d2-aa0d-00e098032b8c"; +const BOOT_ORDER_VAR: &'static str = + "BootOrder-8be4df61-93ca-11d2-aa0d-00e098032b8c"; + +fn bootvar(num: u16) -> String { + format!("Boot{:04X}-8be4df61-93ca-11d2-aa0d-00e098032b8c", num) +} + +fn efipath(varname: &str) -> String { + format!("/sys/firmware/efi/efivars/{}", varname) +} + +/// A (very limited) parse of an `EFI_LOAD_OPTION` descriptor. +#[derive(Debug)] +struct EfiLoadOption { + description: String, + path: EfiLoadPath, +} + +#[derive(Debug)] +enum EfiLoadPath { + Device { acpi_root: DevicePath, pci_device: DevicePath }, + FirmwareFile { volume: DevicePath, file: DevicePath }, +} + +impl EfiLoadPath { + fn matches_fw_file(&self, fw_vol: &[u8; 16], fw_file: &[u8; 16]) -> bool { + if let EfiLoadPath::FirmwareFile { + volume: DevicePath::FirmwareVolume { guid: vol_gid }, + file: DevicePath::FirmwareFile { guid: vol_file }, + } = self + { + vol_gid == fw_vol && vol_file == fw_file + } else { + false + } + } + + fn matches_pci_device_function( + &self, + pci_device: u8, + pci_function: u8, + ) -> bool { + if let EfiLoadPath::Device { + acpi_root: DevicePath::Acpi { .. }, + pci_device: DevicePath::Pci { device, function }, + } = self + { + pci_device == *device && pci_function == *function + } else { + false + } + } +} + +#[derive(Debug, Clone, Copy)] +enum DevicePath { + Acpi { hid: u32, uid: u32 }, + Pci { device: u8, function: u8 }, + + // These two are described in sections 8.2 and 8.3 of the UEFI PI spec, respectively. + // Version 1.6 can be found at https://uefi.org/sites/default/files/resources/PI_Spec_1.6.pdf + FirmwareVolume { guid: [u8; 16] }, + FirmwareFile { guid: [u8; 16] }, +} + +impl DevicePath { + fn parse_from(bytes: &mut Cursor<&[u8]>) -> Result { + let ty = bytes.read_u8()?; + let subtype = bytes.read_u8()?; + + match (ty, subtype) { + (2, 1) => { + // ACPI Device Path + let size = bytes.read_u16::()?; + if size != 0xc { + bail!("ACPI Device Path size is wrong (was {:#04x}, not 0x000c)", size); + } + let hid = bytes.read_u32::().unwrap(); + let uid = bytes.read_u32::().unwrap(); + Ok(DevicePath::Acpi { hid, uid }) + } + (1, 1) => { + // PCI device path + let size = bytes.read_u16::()?; + if size != 0x6 { + bail!("PCI Device Path size is wrong (was {:#04x}, not 0x0006)", size); + } + let function = bytes.read_u8().unwrap(); + let device = bytes.read_u8().unwrap(); + Ok(DevicePath::Pci { device, function }) + } + (4, 6) => { + // "PIWG Firmware File" aka "Firmware File" in UEFI PI spec + let size = bytes.read_u16::()?; + if size != 0x14 { + bail!( + "Firmware File size is wrong (was {:#04x}, not 0x0014)", + size + ); + } + let mut guid = [0u8; 16]; + bytes.read_exact(&mut guid)?; + Ok(DevicePath::FirmwareFile { guid }) + } + (4, 7) => { + // "PIWG Firmware Volume" aka "Firmware Volume" in UEFI PI spec + let size = bytes.read_u16::()?; + if size != 0x14 { + bail!("Firmware Volume size is wrong (was {:#04x}, not 0x0014)", size); + } + let mut guid = [0u8; 16]; + bytes.read_exact(&mut guid)?; + Ok(DevicePath::FirmwareVolume { guid }) + } + (ty, subtype) => { + bail!( + "Device path type/subtype unsupported: ({:#02x}/{:#02x})", + ty, + subtype + ); + } + } + } +} + +impl EfiLoadOption { + // parsing here brought to you by rereading + // * https://uefi.org/specs/UEFI/2.10/10_Protocols_Device_Path_Protocol.html + // * https://uefi.org/specs/UEFI/2.10/03_Boot_Manager.html + fn parse_from(bytes: &mut Cursor<&[u8]>) -> Result { + let _attributes = bytes.read_u32::()?; + let file_path_list_length = bytes.read_u16::()?; + + // The `Description` field is a null-terminated string of char16. + let mut description_chars: Vec = Vec::new(); + + loop { + let c = bytes.read_u16::()?; + description_chars.push(c); + if c == 0 { + break; + } + } + + let description = String::from_utf16(&description_chars) + .expect("description is valid utf16"); + + let mut device_path_cursor = Cursor::new( + &bytes.get_ref()[bytes.position() as usize..] + [..file_path_list_length as usize], + ); + + let path_entry = DevicePath::parse_from(&mut device_path_cursor) + .expect("can read device path element"); + let load_path = match path_entry { + acpi_root @ DevicePath::Acpi { .. } => { + let pci_device = + DevicePath::parse_from(&mut device_path_cursor) + .expect("can read device path element"); + if !matches!(pci_device, DevicePath::Pci { .. }) { + bail!("expected ACPI Device Path entry to be followed by a PCI Device Path, but was {:?}", pci_device); + } + + EfiLoadPath::Device { acpi_root, pci_device } + } + volume @ DevicePath::FirmwareVolume { .. } => { + let file = DevicePath::parse_from(&mut device_path_cursor) + .expect("can read device path element"); + if !matches!(file, DevicePath::FirmwareFile { .. }) { + bail!("expected Firmware Volume entry to be followed by a Firmware File, but was {:?}", file); + } + + EfiLoadPath::FirmwareFile { volume, file } + } + other => { + bail!("unexpected root EFI Load Option path item: {:?}", other); + } + }; + + // Not strictly necessary, but advance `bytes` by the number of bytes we read from + // `device_path_cursor`. To callers, this keeps it as if we had just been reading `bytes` + // all along. + bytes.set_position(bytes.position() + device_path_cursor.position()); + + Ok(EfiLoadOption { description, path: load_path }) + } + + fn pci_device_function(&self) -> (u8, u8) { + let EfiLoadPath::Device { + pci_device: DevicePath::Pci { device, function }, + .. + } = self.path + else { + panic!( + "expected load path to be an ACPI/PCI pair, but was {:?}", + self.path + ); + }; + (device, function) + } +} + +fn unhex(s: &str) -> Vec { + let s = s.replace("\n", ""); + let mut res = Vec::new(); + for chunk in s.as_bytes().chunks(2) { + assert_eq!(chunk.len(), 2); + + let s = std::str::from_utf8(chunk).expect("valid string"); + + let b = u8::from_str_radix(s, 16).expect("can parse"); + + res.push(b); + } + return res; +} + +async fn run_long_command( + vm: &phd_framework::TestVm, + cmd: &str, +) -> Result { + // Ok, this is a bit whacky: something about the line wrapping for long commands causes + // `run_shell_command` to hang instead of ever actually seeing a response prompt. + // + // I haven't gone and debugged that; instead, chunk the input command up into segments short + // enough to not wrap when input, put them all in a file, then run the file. + vm.run_shell_command("rm cmd").await?; + let mut offset = 0; + // Escape any internal `\`. This isn't comprehensive escaping (doesn't handle \n, for + // example).. + let cmd = cmd.replace("\\", "\\\\"); + while offset < cmd.len() { + let lim = std::cmp::min(cmd.len() - offset, 50); + let chunk = &cmd[offset..][..lim]; + offset += lim; + + // Catch this before it causes weird issues in half-executed commands. + // + // Could escape these here, but right now that's not really necessary. + assert!(!chunk.contains("\n")); + + vm.run_shell_command(&format!("echo -n \'{}\' >>cmd", chunk)).await?; + } + vm.run_shell_command(". cmd").await +} + +/// Read the EFI variable `varname` from inside the VM, and return the data therein as a byte +/// array. +async fn read_efivar( + vm: &phd_framework::TestVm, + varname: &str, +) -> Result, Error> { + // Linux's `efivarfs` prepends 4 bytes of attributes to EFI variables. + let cmd = format!( + "dd status=none if={} bs=1 skip=4 | xxd -p -", + efipath(varname) + ); + + let hex = run_long_command(vm, &cmd).await?; + + Ok(unhex(&hex)) +} + +/// Write the provided bytes to the EFI variable `varname`. +/// +/// For Linux guests: variables automatically have their prior attributes prepended. Provide only +/// the variable's data. +async fn write_efivar( + vm: &phd_framework::TestVm, + varname: &str, + data: &[u8], +) -> Result<(), Error> { + let attr_cmd = format!( + "dd status=none if={} bs=1 count=4 | xxd -p -", + efipath(varname) + ); + + let attr_read_bytes = run_long_command(vm, &attr_cmd).await?; + let attrs = if attr_read_bytes.ends_with(": No such file or directory") { + // Default attributes if the variable does not exist yet. We expect it to be non-volatile + // because we are writing it, we expect it to be available to boot services (not strictly + // required, but for boot configuration we need it), and we expect it to be available at + // runtime (e.g. where we are reading and writing it). + // + // so: + // NON_VOLATILE | BOOTSERVICE_ACCESS | RUNTIME_ACCESS + const FRESH_ATTRS: u32 = 0x00_00_00_07; + FRESH_ATTRS.to_le_bytes().to_vec() + } else { + unhex(&attr_read_bytes) + }; + + let mut new_value = attrs; + new_value.extend_from_slice(data); + + // The command to write this data back out will be, roughtly: + // ``` + // printf "\xAA\xAA\xAA\xAA\xDD\xDD\xDD\xDD" > /sys/firmware/efi/efivars/... + // ``` + // where AAAAAAAA are the attribute bytes and DDDDDDDD are caller-provided data. + let escaped: String = + new_value.into_iter().map(|b| format!("\\x{:02x}", b)).collect(); + + let cmd = format!("printf \"{}\" > {}", escaped, efipath(varname)); + + let res = run_long_command(vm, &cmd).await?; + // If something went sideways and the write failed with something like `invalid argument`... + if res.len() != 0 { + bail!("writing efi produced unexpected output: {}", res); + } + + Ok(()) +} #[phd_testcase] async fn configurable_boot_order(ctx: &Framework) { @@ -28,18 +402,272 @@ async fn configurable_boot_order(ctx: &Framework) { 24, ); - cfg.boot_order(vec!["alpine-3-20", "alpine-3-16"]); + cfg.boot_order(vec!["alpine-3-16", "alpine-3-20"]); let mut vm = ctx.spawn_vm(&cfg, None).await?; vm.launch().await?; vm.wait_to_boot().await?; - let kernel = vm.run_shell_command("uname -r").await?; - if kernel.contains("6.6.41-0-virt") { - panic!("booted 6.6.41"); - } else if kernel.contains("5.15.41-0-virt") { - panic!("booted 5.15.41"); - } else { - eprintln!("dunno what we booted? {}", kernel); + let boot_num_bytes = read_efivar(&vm, BOOT_CURRENT_VAR).await?; + + let boot_num: u16 = u16::from_le_bytes(boot_num_bytes.try_into().unwrap()); + + let boot_option_bytes = read_efivar(&vm, &bootvar(boot_num)).await?; + + let mut cursor = Cursor::new(boot_option_bytes.as_slice()); + + let load_option = EfiLoadOption::parse_from(&mut cursor).unwrap(); + + // If we were going to test the PCI bus number too, we'd check the AHCI Device Path entry that + // precedes these PCI values. But we only use PCI bus 0 today, and the mapping from an AHCI + // Device Path to a PCI root is not immediately obvious? + assert!(load_option.path.matches_pci_device_function(16, 0)); +} + +// This is very similar to the `in_memory_backend_smoke_test` test, but specifically asserts that +// the unbootable disk is first in the boot order; the system booting means that boot order is +// respected and a non-bootable disk does not wedge startup. +#[phd_testcase] +async fn unbootable_disk_skipped(ctx: &Framework) { + const HELLO_MSG: &str = "hello oxide!"; + + let mut cfg = ctx.vm_config_builder("unbootable_disk_skipped"); + let mut unbootable = FatFilesystem::new(); + unbootable.add_file_from_str("hello_oxide.txt", HELLO_MSG)?; + cfg.data_disk( + "unbootable", + DiskSource::FatFilesystem(unbootable), + DiskInterface::Virtio, + DiskBackend::InMemory { readonly: true }, + 16, + ); + + // `boot-disk` is the implicitly-created boot disk made from the default guest OS artifact. + // + // explicitly boot from it later, so OVMF has to try and fail to boot `unbootable`. + cfg.boot_order(vec!["unbootable", "boot-disk"]); + + let mut vm = ctx.spawn_vm(&cfg, None).await?; + vm.launch().await?; + vm.wait_to_boot().await?; + + let boot_num_bytes = read_efivar(&vm, BOOT_CURRENT_VAR).await?; + + let boot_num: u16 = u16::from_le_bytes(boot_num_bytes.try_into().unwrap()); + + let boot_option_bytes = read_efivar(&vm, &bootvar(boot_num)).await?; + + let mut cursor = Cursor::new(boot_option_bytes.as_slice()); + + let load_option = EfiLoadOption::parse_from(&mut cursor).unwrap(); + + // Device 4 is the implicitly-used `boot-disk` PCI device number. This is not 16, for example, + // as we expect to not boot `unbootable`. + assert_eq!(load_option.pci_device_function(), (4, 0)); + + let boot_order_bytes = read_efivar(&vm, BOOT_ORDER_VAR).await?; + + // Interestingly, when we specify a boot order via fwcfg, OVMF includes two additional entries: + // * "UiApp", which I can't find much about + // * "EFI Internal Shell", the EFI shell the system drops into if no disks are bootable + // + // Exactly where these end up in the boot order is not entirely important; we really just need + // to make sure that the boot order we specified comes first (and before "EFI Internal Shell") + #[derive(Debug, PartialEq, Eq)] + enum TestState { + SeekingUnbootable, + FoundUnbootable, + AfterBootOrder, } + + let mut state = TestState::SeekingUnbootable; + + for item in boot_order_bytes.chunks(2) { + let option_num: u16 = u16::from_le_bytes(item.try_into().unwrap()); + + let option_bytes = read_efivar(&vm, &bootvar(option_num)).await?; + + let mut cursor = Cursor::new(option_bytes.as_slice()); + + let load_option = EfiLoadOption::parse_from(&mut cursor).unwrap(); + + match state { + TestState::SeekingUnbootable => { + if load_option.path.matches_pci_device_function(16, 0) { + state = TestState::FoundUnbootable; + continue; + } else if load_option + .path + .matches_fw_file(EDK2_FIRMWARE_VOL_GUID, EDK2_UI_APP_GUID) + { + // `UiApp`. Ignore it and continue. + continue; + } else { + bail!( + "Did not expect to find {:?} yet (test state = {:?})", + load_option, + state + ); + } + } + TestState::FoundUnbootable => { + if load_option.path.matches_pci_device_function(4, 0) { + state = TestState::AfterBootOrder; + continue; + } else { + bail!( + "Did not expect to find {:?} (test state = {:?})", + load_option, + state + ); + } + } + TestState::AfterBootOrder => { + let is_ui_app = load_option + .path + .matches_fw_file(EDK2_FIRMWARE_VOL_GUID, EDK2_UI_APP_GUID); + let is_efi_shell = load_option.path.matches_fw_file( + EDK2_FIRMWARE_VOL_GUID, + EDK2_EFI_SHELL_GUID, + ); + if !is_ui_app && !is_efi_shell { + bail!( + "Did not expect to find {:?} (test state = {:?})", + load_option, + state + ); + } + } + } + } + + assert_eq!(state, TestState::AfterBootOrder); } + +// Start with the boot order being `["boot-disk", "unbootable"]`, then change it so that next boot +// we'll boot from `unbootable` first. Then reboot and verify that the boot order is still +// "boot-disk" first. +// +// TODO(ixi): This would be a bit nicer if there was a secondary image to confirm we've booted into +// instead... +#[phd_testcase] +async fn guest_can_adjust_boot_order(ctx: &Framework) { + const HELLO_MSG: &str = "hello oxide!"; + + let mut cfg = ctx.vm_config_builder("guest_can_adjust_boot_order"); + let mut unbootable = FatFilesystem::new(); + unbootable.add_file_from_str("hello_oxide.txt", HELLO_MSG)?; + cfg.boot_disk("ubuntu-noble", DiskInterface::Virtio, DiskBackend::File, 4); + + cfg.data_disk( + "unbootable", + DiskSource::FatFilesystem(unbootable), + DiskInterface::Virtio, + DiskBackend::InMemory { readonly: true }, + 16, + ); + + // `boot-disk` is the implicitly-created boot disk made from the default guest OS artifact. + // + // explicitly boot from it later, so OVMF has to try and fail to boot `unbootable`. + cfg.boot_order(vec!["boot-disk"]); //, "unbootable"]); + + let mut vm = ctx.spawn_vm(&cfg, None).await?; + vm.launch().await?; + vm.wait_to_boot().await?; + + // If the guest doesn't have an EFI partition then there's no way for boot order preferences to + // be persisted. + let mountline = vm.run_shell_command("mount | grep /boot/efi").await?; + + if !mountline.contains(" on /boot/efi type vfat") { + warn!("guest doesn't have an EFI partition, cannot manage boot order"); + } + + // Try adding a few new boot options, then add them to the boot order, reboot, and make sure + // they're all as we set them. + if !run_long_command(&vm, &format!("ls {}", efipath(&bootvar(0xffff)))) + .await? + .is_empty() + { + warn!("guest environment already has a BootFFFF entry; is this not a fresh image?"); + } + + let boot_num: u16 = { + let bytes = read_efivar(&vm, BOOT_CURRENT_VAR).await?; + u16::from_le_bytes(bytes.try_into().unwrap()) + }; + + // The entry we booted from is clearly valid, so we should be able to insert a few duplicate + // entries. We won't boot into them, but if something bogus happens and we did boot one of + // these, at least it'll work and we can detect the misbehavior. + // + // But here's a weird one: if we just append these to the end, on reboot they'll be moved + // somewhat up the boot order. This occurrs both if setting variables through `efibootmgr` or + // by writing to /sys/firmware/efi/efivars/BootOrder-* directly. As an example, say we had a + // boot order of "0004,0001,0003,0000" where boot options were as follows: + // * 0000: UiApp + // * 0001: PCI device 4, function 0 + // * 0003: EFI shell (Firmware volume+file) + // * 0004: Ubuntu (HD partition 15, GPT formatted) + // + // If we duplicate entry 4 to new options FFF0 and FFFF, reset the boot order to + // "0004,0001,0003,0000,FFF0,FFFF", then reboot the VM, the boot order when it comes back + // up will be "0004,0001,FFF0,FFFF,0003,0000". + // + // This almost makes sense, but with other devices in the mix I've seen reorderings like + // `0004,0001,,0003,0000,FFF0,FFFF` turning into + // `0004,0001,FFF0,FFFF,,0003,0000`. This is particularly strange in that the new + // options were reordered around some other PCI device. It's not the boot order we set! + // + // So, to at least confirm we *can* modify the boot order in a stable way, make a somewhat less + // ambitious change: insert the duplicate boot options in the order directly after the option + // they are duplicates of. This seems to not get reordered. + let boot_option_bytes = read_efivar(&vm, &bootvar(boot_num)).await?; + + let boot_order_bytes = read_efivar(&vm, BOOT_ORDER_VAR).await?; + + let mut new_boot_order = Vec::new(); + new_boot_order.extend_from_slice(&boot_order_bytes); + + let mut new_boot_order = boot_order_bytes.clone(); + let booted_idx = new_boot_order + .chunks(2) + .enumerate() + .find(|(_i, chunk)| *chunk == boot_num.to_le_bytes()) + .map(|(i, _chunk)| i) + .expect("booted entry exists"); + let suffix = new_boot_order.split_off((booted_idx + 1) * 2); + new_boot_order.extend_from_slice(&[0xf0, 0xff]); + new_boot_order.extend_from_slice(&[0xff, 0xff]); + new_boot_order.extend_from_slice(&suffix); + + write_efivar(&vm, &bootvar(0xfff0), &boot_option_bytes).await?; + assert_eq!(boot_option_bytes, read_efivar(&vm, &bootvar(0xfff0)).await?); + write_efivar(&vm, &bootvar(0xffff), &boot_option_bytes).await?; + assert_eq!(boot_option_bytes, read_efivar(&vm, &bootvar(0xffff)).await?); + + write_efivar(&vm, BOOT_ORDER_VAR, &new_boot_order).await?; + let written_boot_order = read_efivar(&vm, BOOT_ORDER_VAR).await?; + assert_eq!(new_boot_order, written_boot_order); + + // Now, reboot and check that the settings stuck. + vm.run_shell_command("reboot").await?; + vm.wait_to_boot().await?; + + let boot_order_after_reboot = read_efivar(&vm, BOOT_ORDER_VAR).await?; + assert_eq!(new_boot_order, boot_order_after_reboot); + + let boot_num_after_reboot: u16 = { + let bytes = read_efivar(&vm, BOOT_CURRENT_VAR).await?; + u16::from_le_bytes(bytes.try_into().unwrap()) + }; + assert_eq!(boot_num, boot_num_after_reboot); + + let boot_option_bytes_after_reboot = + read_efivar(&vm, &bootvar(boot_num)).await?; + assert_eq!(boot_option_bytes, boot_option_bytes_after_reboot); +} + +// and then one more test about what happens if configure the boot order in the guest, stop the VM, +// change the configured boot order, and start the VM again... From 54bda84665eadf7dd023c524b3cb4010eaba3498 Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 10 Sep 2024 23:05:50 +0000 Subject: [PATCH 06/42] actually include guest OS definitions for 24.04 (shouldn't land) functionally the guest adaptations for 22.04 and 24.04 are the same. these "24.04" definitions are actually in support of my weird 24.04 base image that has a user "root" instead of ubuntu-with-a-password --- .../framework/src/guest_os/ubuntu24_04.rs | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 phd-tests/framework/src/guest_os/ubuntu24_04.rs diff --git a/phd-tests/framework/src/guest_os/ubuntu24_04.rs b/phd-tests/framework/src/guest_os/ubuntu24_04.rs new file mode 100644 index 000000000..40d182b4c --- /dev/null +++ b/phd-tests/framework/src/guest_os/ubuntu24_04.rs @@ -0,0 +1,28 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Guest OS adaptations for Ubuntu 24.04 images. These must be prepped with +//! a cloud-init disk that is configured with the appropriate user and password. + +use super::{CommandSequence, CommandSequenceEntry, GuestOs}; + +pub(super) struct Ubuntu2404; + +impl GuestOs for Ubuntu2404 { + fn get_login_sequence(&self) -> CommandSequence { + CommandSequence(vec![ + CommandSequenceEntry::wait_for("ubuntu login: "), + CommandSequenceEntry::write_str("root"), + CommandSequenceEntry::wait_for(self.get_shell_prompt()), + ]) + } + + fn get_shell_prompt(&self) -> &'static str { + "root@ubuntu:~#" + } + + fn read_only_fs(&self) -> bool { + false + } +} From cc485b04d70bc0e7f44152ca8d100fd462f0cb2a Mon Sep 17 00:00:00 2001 From: iximeow Date: Thu, 12 Sep 2024 07:13:16 +0000 Subject: [PATCH 07/42] move all the EFI variable stuff to a module this makes the tests kind of legible.. --- phd-tests/tests/src/boot_order.rs | 354 +------------- phd-tests/tests/src/boot_order/efi_utils.rs | 492 ++++++++++++++++++++ 2 files changed, 502 insertions(+), 344 deletions(-) create mode 100644 phd-tests/tests/src/boot_order/efi_utils.rs diff --git a/phd-tests/tests/src/boot_order.rs b/phd-tests/tests/src/boot_order.rs index 1398c0bef..a61177b82 100644 --- a/phd-tests/tests/src/boot_order.rs +++ b/phd-tests/tests/src/boot_order.rs @@ -3,290 +3,24 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use anyhow::{bail, Error}; -use byteorder::{LittleEndian, ReadBytesExt}; use phd_framework::{ disk::{fat::FatFilesystem, DiskSource}, test_vm::{DiskBackend, DiskInterface}, }; use phd_testcase::*; -use std::io::{Cursor, Read}; -use tracing::{info, warn}; +use std::io::Cursor; +use tracing::warn; -// This test checks that with a specified boot order, the guest boots whichever disk we wanted to -// come first. This is simple enough, until you want to know "what you booted from".. -// -// For live CDs, such as Alpine's, the system boots into a tmpfs loaded from a boot disk, but -// there's no clear line to what disk that live image *came from*. If you had two Alpine 3.20.3 -// images attached to one VM, you'd ceretainly boot into Alpine 3.20.3, but I don't see a way to -// tell *which disk* that Alpine would be sourced from, from Alpine alone. -// -// So instead, check EFI variables. To do this, then, we have to.. parse EFI variables. That is -// what this test does below, but it's probably not fully robust to what we might do with PCI -// devices in the future. -// -// A more "future-proof" setup might be to just boot an OS, see that we ended up in the OS we -// expected, and check some attribute about it like that the kernel version is what we expected the -// booted OS to be. That's still a good fallback if we discover that parsing EFI variables is -// difficult to stick with for any reason. It has a downside though: we'd have to keep a specific -// image around with a specific kernel version as either the "we expect to boot into this" image -// or the "we expected to boot into not this" cases. - -// That all said, while this test checks for specific EDK2/OVMF magic values, those magic values -// should be explained. The rest of this comment does just that. -// -// First, soem GUIDs. These GUIDs come from EDK2, and OVMF reuses them. Notably these are the raw -// bytes of the GUID: textual values will have slightly different ordering of bytes. -// -// Some source references, as you won't find these GUIDs in a UEFI or related spec document.. The -// firmware volume is identified by what seems to be the DXE firmware volume: -// https://github.com/tianocore/edk2/blob/712797c/OvmfPkg/OvmfPkgIa32.fdf#L181 -// introduced in -// https://github.com/tianocore/edk2/commit/16f26de663967b5a64140b6abba2c145ea50194c, note this -// is the DXEFV entry. -// -// The *files* we'll care about in this test are identified by other GUIDs in the above *volume*. -// -// EFI Internal Shell: https://github.com/tianocore/edk2/blob/a445e1a/ShellPkg/ShellPkg.dec#L59-L60 -// UiApp: -// https://github.com/tianocore/edk2/blob/a445e1a/MdeModulePkg/Application/UiApp/UiApp.inf#L13 -const EDK2_FIRMWARE_VOL_GUID: &'static [u8; 16] = &[ - 0xc9, 0xbd, 0xb8, 0x7c, 0xeb, 0xf8, 0x34, 0x4f, 0xaa, 0xea, 0x3e, 0xe4, - 0xaf, 0x65, 0x16, 0xa1, -]; -const EDK2_UI_APP_GUID: &'static [u8; 16] = &[ - 0x21, 0xaa, 0x2c, 0x46, 0x14, 0x76, 0x03, 0x45, 0x83, 0x6e, 0x8a, 0xb6, - 0xf4, 0x66, 0x23, 0x31, -]; -const EDK2_EFI_SHELL_GUID: &'static [u8; 16] = &[ - 0x83, 0xa5, 0x04, 0x7c, 0x3e, 0x9e, 0x1c, 0x4f, 0xad, 0x65, 0xe0, 0x52, - 0x68, 0xd0, 0xb4, 0xd1, -]; - -// The variable namespace `8be4df61-93ca-11d2-aa0d-00e098032b8c` comes from UEFI, as do the -// variable names here. The presentation as `{varname}-{namespace}`, and at a path like -// `/sys/firmware/efi/efivars/`, are both Linux `efivars`-isms. -// -// These tests likely will not pass when run with other guest OSes. -const BOOT_CURRENT_VAR: &'static str = - "BootCurrent-8be4df61-93ca-11d2-aa0d-00e098032b8c"; -const BOOT_ORDER_VAR: &'static str = - "BootOrder-8be4df61-93ca-11d2-aa0d-00e098032b8c"; - -fn bootvar(num: u16) -> String { - format!("Boot{:04X}-8be4df61-93ca-11d2-aa0d-00e098032b8c", num) -} - -fn efipath(varname: &str) -> String { - format!("/sys/firmware/efi/efivars/{}", varname) -} - -/// A (very limited) parse of an `EFI_LOAD_OPTION` descriptor. -#[derive(Debug)] -struct EfiLoadOption { - description: String, - path: EfiLoadPath, -} - -#[derive(Debug)] -enum EfiLoadPath { - Device { acpi_root: DevicePath, pci_device: DevicePath }, - FirmwareFile { volume: DevicePath, file: DevicePath }, -} - -impl EfiLoadPath { - fn matches_fw_file(&self, fw_vol: &[u8; 16], fw_file: &[u8; 16]) -> bool { - if let EfiLoadPath::FirmwareFile { - volume: DevicePath::FirmwareVolume { guid: vol_gid }, - file: DevicePath::FirmwareFile { guid: vol_file }, - } = self - { - vol_gid == fw_vol && vol_file == fw_file - } else { - false - } - } - - fn matches_pci_device_function( - &self, - pci_device: u8, - pci_function: u8, - ) -> bool { - if let EfiLoadPath::Device { - acpi_root: DevicePath::Acpi { .. }, - pci_device: DevicePath::Pci { device, function }, - } = self - { - pci_device == *device && pci_function == *function - } else { - false - } - } -} - -#[derive(Debug, Clone, Copy)] -enum DevicePath { - Acpi { hid: u32, uid: u32 }, - Pci { device: u8, function: u8 }, - - // These two are described in sections 8.2 and 8.3 of the UEFI PI spec, respectively. - // Version 1.6 can be found at https://uefi.org/sites/default/files/resources/PI_Spec_1.6.pdf - FirmwareVolume { guid: [u8; 16] }, - FirmwareFile { guid: [u8; 16] }, -} +mod efi_utils; -impl DevicePath { - fn parse_from(bytes: &mut Cursor<&[u8]>) -> Result { - let ty = bytes.read_u8()?; - let subtype = bytes.read_u8()?; - - match (ty, subtype) { - (2, 1) => { - // ACPI Device Path - let size = bytes.read_u16::()?; - if size != 0xc { - bail!("ACPI Device Path size is wrong (was {:#04x}, not 0x000c)", size); - } - let hid = bytes.read_u32::().unwrap(); - let uid = bytes.read_u32::().unwrap(); - Ok(DevicePath::Acpi { hid, uid }) - } - (1, 1) => { - // PCI device path - let size = bytes.read_u16::()?; - if size != 0x6 { - bail!("PCI Device Path size is wrong (was {:#04x}, not 0x0006)", size); - } - let function = bytes.read_u8().unwrap(); - let device = bytes.read_u8().unwrap(); - Ok(DevicePath::Pci { device, function }) - } - (4, 6) => { - // "PIWG Firmware File" aka "Firmware File" in UEFI PI spec - let size = bytes.read_u16::()?; - if size != 0x14 { - bail!( - "Firmware File size is wrong (was {:#04x}, not 0x0014)", - size - ); - } - let mut guid = [0u8; 16]; - bytes.read_exact(&mut guid)?; - Ok(DevicePath::FirmwareFile { guid }) - } - (4, 7) => { - // "PIWG Firmware Volume" aka "Firmware Volume" in UEFI PI spec - let size = bytes.read_u16::()?; - if size != 0x14 { - bail!("Firmware Volume size is wrong (was {:#04x}, not 0x0014)", size); - } - let mut guid = [0u8; 16]; - bytes.read_exact(&mut guid)?; - Ok(DevicePath::FirmwareVolume { guid }) - } - (ty, subtype) => { - bail!( - "Device path type/subtype unsupported: ({:#02x}/{:#02x})", - ty, - subtype - ); - } - } - } -} - -impl EfiLoadOption { - // parsing here brought to you by rereading - // * https://uefi.org/specs/UEFI/2.10/10_Protocols_Device_Path_Protocol.html - // * https://uefi.org/specs/UEFI/2.10/03_Boot_Manager.html - fn parse_from(bytes: &mut Cursor<&[u8]>) -> Result { - let _attributes = bytes.read_u32::()?; - let file_path_list_length = bytes.read_u16::()?; - - // The `Description` field is a null-terminated string of char16. - let mut description_chars: Vec = Vec::new(); - - loop { - let c = bytes.read_u16::()?; - description_chars.push(c); - if c == 0 { - break; - } - } - - let description = String::from_utf16(&description_chars) - .expect("description is valid utf16"); - - let mut device_path_cursor = Cursor::new( - &bytes.get_ref()[bytes.position() as usize..] - [..file_path_list_length as usize], - ); - - let path_entry = DevicePath::parse_from(&mut device_path_cursor) - .expect("can read device path element"); - let load_path = match path_entry { - acpi_root @ DevicePath::Acpi { .. } => { - let pci_device = - DevicePath::parse_from(&mut device_path_cursor) - .expect("can read device path element"); - if !matches!(pci_device, DevicePath::Pci { .. }) { - bail!("expected ACPI Device Path entry to be followed by a PCI Device Path, but was {:?}", pci_device); - } - - EfiLoadPath::Device { acpi_root, pci_device } - } - volume @ DevicePath::FirmwareVolume { .. } => { - let file = DevicePath::parse_from(&mut device_path_cursor) - .expect("can read device path element"); - if !matches!(file, DevicePath::FirmwareFile { .. }) { - bail!("expected Firmware Volume entry to be followed by a Firmware File, but was {:?}", file); - } - - EfiLoadPath::FirmwareFile { volume, file } - } - other => { - bail!("unexpected root EFI Load Option path item: {:?}", other); - } - }; - - // Not strictly necessary, but advance `bytes` by the number of bytes we read from - // `device_path_cursor`. To callers, this keeps it as if we had just been reading `bytes` - // all along. - bytes.set_position(bytes.position() + device_path_cursor.position()); - - Ok(EfiLoadOption { description, path: load_path }) - } - - fn pci_device_function(&self) -> (u8, u8) { - let EfiLoadPath::Device { - pci_device: DevicePath::Pci { device, function }, - .. - } = self.path - else { - panic!( - "expected load path to be an ACPI/PCI pair, but was {:?}", - self.path - ); - }; - (device, function) - } -} - -fn unhex(s: &str) -> Vec { - let s = s.replace("\n", ""); - let mut res = Vec::new(); - for chunk in s.as_bytes().chunks(2) { - assert_eq!(chunk.len(), 2); - - let s = std::str::from_utf8(chunk).expect("valid string"); - - let b = u8::from_str_radix(s, 16).expect("can parse"); - - res.push(b); - } - return res; -} +use efi_utils::{ + bootvar, discover_boot_option_numbers, efipath, find_option_in_boot_order, + read_efivar, remove_boot_entry, write_efivar, EfiLoadOption, + BOOT_CURRENT_VAR, BOOT_ORDER_VAR, EDK2_EFI_SHELL_GUID, + EDK2_FIRMWARE_VOL_GUID, EDK2_UI_APP_GUID, +}; -async fn run_long_command( +pub(crate) async fn run_long_command( vm: &phd_framework::TestVm, cmd: &str, ) -> Result { @@ -315,74 +49,6 @@ async fn run_long_command( vm.run_shell_command(". cmd").await } -/// Read the EFI variable `varname` from inside the VM, and return the data therein as a byte -/// array. -async fn read_efivar( - vm: &phd_framework::TestVm, - varname: &str, -) -> Result, Error> { - // Linux's `efivarfs` prepends 4 bytes of attributes to EFI variables. - let cmd = format!( - "dd status=none if={} bs=1 skip=4 | xxd -p -", - efipath(varname) - ); - - let hex = run_long_command(vm, &cmd).await?; - - Ok(unhex(&hex)) -} - -/// Write the provided bytes to the EFI variable `varname`. -/// -/// For Linux guests: variables automatically have their prior attributes prepended. Provide only -/// the variable's data. -async fn write_efivar( - vm: &phd_framework::TestVm, - varname: &str, - data: &[u8], -) -> Result<(), Error> { - let attr_cmd = format!( - "dd status=none if={} bs=1 count=4 | xxd -p -", - efipath(varname) - ); - - let attr_read_bytes = run_long_command(vm, &attr_cmd).await?; - let attrs = if attr_read_bytes.ends_with(": No such file or directory") { - // Default attributes if the variable does not exist yet. We expect it to be non-volatile - // because we are writing it, we expect it to be available to boot services (not strictly - // required, but for boot configuration we need it), and we expect it to be available at - // runtime (e.g. where we are reading and writing it). - // - // so: - // NON_VOLATILE | BOOTSERVICE_ACCESS | RUNTIME_ACCESS - const FRESH_ATTRS: u32 = 0x00_00_00_07; - FRESH_ATTRS.to_le_bytes().to_vec() - } else { - unhex(&attr_read_bytes) - }; - - let mut new_value = attrs; - new_value.extend_from_slice(data); - - // The command to write this data back out will be, roughtly: - // ``` - // printf "\xAA\xAA\xAA\xAA\xDD\xDD\xDD\xDD" > /sys/firmware/efi/efivars/... - // ``` - // where AAAAAAAA are the attribute bytes and DDDDDDDD are caller-provided data. - let escaped: String = - new_value.into_iter().map(|b| format!("\\x{:02x}", b)).collect(); - - let cmd = format!("printf \"{}\" > {}", escaped, efipath(varname)); - - let res = run_long_command(vm, &cmd).await?; - // If something went sideways and the write failed with something like `invalid argument`... - if res.len() != 0 { - bail!("writing efi produced unexpected output: {}", res); - } - - Ok(()) -} - #[phd_testcase] async fn configurable_boot_order(ctx: &Framework) { let mut cfg = ctx.vm_config_builder("configurable_boot_order"); diff --git a/phd-tests/tests/src/boot_order/efi_utils.rs b/phd-tests/tests/src/boot_order/efi_utils.rs new file mode 100644 index 000000000..7572d9c14 --- /dev/null +++ b/phd-tests/tests/src/boot_order/efi_utils.rs @@ -0,0 +1,492 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! EFI variable parsing and manipulation utilities. +//! +//! Conceptually, this would be a separate crate. Something like `uefi`, or maybe more accurately, +//! `uefi-raw`. Those crates are very oriented towards *being* the platform firmware though - it's +//! not clear how to use them to parse a boot option into a device path, for example, though they +//! clearly are able to support processing device paths. +//! +//! So instead, this is enough supporting logic for our tests in Propolis. + +use anyhow::{bail, Error}; +use byteorder::{LittleEndian, ReadBytesExt}; +use phd_testcase::*; +use std::collections::HashMap; +use std::io::{Cursor, Read}; +use tracing::{info, warn}; + +use super::run_long_command; + +// First, some GUIDs. These GUIDs come from EDK2, and OVMF reuses them. Notably these are the raw +// bytes of the GUID: textual values will have slightly different ordering of bytes. +// +// Some source references, as you won't find these GUIDs in a UEFI or related spec document.. The +// firmware volume is identified by what seems to be the DXE firmware volume: +// https://github.com/tianocore/edk2/blob/712797c/OvmfPkg/OvmfPkgIa32.fdf#L181 +// introduced in +// https://github.com/tianocore/edk2/commit/16f26de663967b5a64140b6abba2c145ea50194c, note this +// is the DXEFV entry. +// +// The *files* we'll care about in this test are identified by other GUIDs in the above *volume*. +// +// EFI Internal Shell: https://github.com/tianocore/edk2/blob/a445e1a/ShellPkg/ShellPkg.dec#L59-L60 +// UiApp: +// https://github.com/tianocore/edk2/blob/a445e1a/MdeModulePkg/Application/UiApp/UiApp.inf#L13 +pub(crate) const EDK2_FIRMWARE_VOL_GUID: &'static [u8; 16] = &[ + 0xc9, 0xbd, 0xb8, 0x7c, 0xeb, 0xf8, 0x34, 0x4f, 0xaa, 0xea, 0x3e, 0xe4, + 0xaf, 0x65, 0x16, 0xa1, +]; +pub(crate) const EDK2_UI_APP_GUID: &'static [u8; 16] = &[ + 0x21, 0xaa, 0x2c, 0x46, 0x14, 0x76, 0x03, 0x45, 0x83, 0x6e, 0x8a, 0xb6, + 0xf4, 0x66, 0x23, 0x31, +]; +pub(crate) const EDK2_EFI_SHELL_GUID: &'static [u8; 16] = &[ + 0x83, 0xa5, 0x04, 0x7c, 0x3e, 0x9e, 0x1c, 0x4f, 0xad, 0x65, 0xe0, 0x52, + 0x68, 0xd0, 0xb4, 0xd1, +]; + +// The variable namespace `8be4df61-93ca-11d2-aa0d-00e098032b8c` comes from UEFI, as do the +// variable names here. The presentation as `{varname}-{namespace}`, and at a path like +// `/sys/firmware/efi/efivars/`, are both Linux `efivars`-isms. +// +// These tests likely will not pass when run with other guest OSes. +pub(crate) const BOOT_CURRENT_VAR: &'static str = + "BootCurrent-8be4df61-93ca-11d2-aa0d-00e098032b8c"; +pub(crate) const BOOT_ORDER_VAR: &'static str = + "BootOrder-8be4df61-93ca-11d2-aa0d-00e098032b8c"; + +pub(crate) fn bootvar(num: u16) -> String { + format!("Boot{:04X}-8be4df61-93ca-11d2-aa0d-00e098032b8c", num) +} + +pub(crate) fn efipath(varname: &str) -> String { + format!("/sys/firmware/efi/efivars/{}", varname) +} + +/// A (very limited) parse of an `EFI_LOAD_OPTION` descriptor. +#[derive(Debug)] +pub(crate) struct EfiLoadOption { + pub description: String, + pub path: EfiLoadPath, +} + +#[derive(Debug)] +pub(crate) enum EfiLoadPath { + Device { acpi_root: DevicePath, pci_device: DevicePath }, + FirmwareFile { volume: DevicePath, file: DevicePath }, +} + +impl EfiLoadPath { + pub fn matches_fw_file( + &self, + fw_vol: &[u8; 16], + fw_file: &[u8; 16], + ) -> bool { + if let EfiLoadPath::FirmwareFile { + volume: DevicePath::FirmwareVolume { guid: vol_gid }, + file: DevicePath::FirmwareFile { guid: vol_file }, + } = self + { + vol_gid == fw_vol && vol_file == fw_file + } else { + false + } + } + + pub fn matches_pci_device_function( + &self, + pci_device: u8, + pci_function: u8, + ) -> bool { + if let EfiLoadPath::Device { + acpi_root: DevicePath::Acpi { .. }, + pci_device: DevicePath::Pci { device, function }, + } = self + { + pci_device == *device && pci_function == *function + } else { + false + } + } + + pub fn as_pci_device_function(&self) -> Option<(u8, u8)> { + if let EfiLoadPath::Device { + acpi_root: DevicePath::Acpi { .. }, + pci_device: DevicePath::Pci { device, function }, + } = self + { + Some((*device, *function)) + } else { + None + } + } +} + +#[allow(dead_code)] +// The `Acpi` fields are not explicitly used (yet), but are useful for `Debug` purposes. +#[derive(Debug, Clone, Copy)] +pub(crate) enum DevicePath { + Acpi { hid: u32, uid: u32 }, + Pci { device: u8, function: u8 }, + + // These two are described in sections 8.2 and 8.3 of the UEFI PI spec, respectively. + // Version 1.6 can be found at https://uefi.org/sites/default/files/resources/PI_Spec_1.6.pdf + FirmwareVolume { guid: [u8; 16] }, + FirmwareFile { guid: [u8; 16] }, +} + +impl DevicePath { + fn parse_from(bytes: &mut Cursor<&[u8]>) -> Result { + let ty = bytes.read_u8()?; + let subtype = bytes.read_u8()?; + + match (ty, subtype) { + (2, 1) => { + // ACPI Device Path + let size = bytes.read_u16::()?; + if size != 0xc { + bail!("ACPI Device Path size is wrong (was {:#04x}, not 0x000c)", size); + } + let hid = bytes.read_u32::().unwrap(); + let uid = bytes.read_u32::().unwrap(); + Ok(DevicePath::Acpi { hid, uid }) + } + (1, 1) => { + // PCI device path + let size = bytes.read_u16::()?; + if size != 0x6 { + bail!("PCI Device Path size is wrong (was {:#04x}, not 0x0006)", size); + } + let function = bytes.read_u8().unwrap(); + let device = bytes.read_u8().unwrap(); + Ok(DevicePath::Pci { device, function }) + } + (4, 6) => { + // "PIWG Firmware File" aka "Firmware File" in UEFI PI spec + let size = bytes.read_u16::()?; + if size != 0x14 { + bail!( + "Firmware File size is wrong (was {:#04x}, not 0x0014)", + size + ); + } + let mut guid = [0u8; 16]; + bytes.read_exact(&mut guid)?; + Ok(DevicePath::FirmwareFile { guid }) + } + (4, 7) => { + // "PIWG Firmware Volume" aka "Firmware Volume" in UEFI PI spec + let size = bytes.read_u16::()?; + if size != 0x14 { + bail!("Firmware Volume size is wrong (was {:#04x}, not 0x0014)", size); + } + let mut guid = [0u8; 16]; + bytes.read_exact(&mut guid)?; + Ok(DevicePath::FirmwareVolume { guid }) + } + (ty, subtype) => { + bail!( + "Device path type/subtype unsupported: ({:#02x}/{:#02x})", + ty, + subtype + ); + } + } + } +} + +impl EfiLoadOption { + // parsing here brought to you by rereading + // * https://uefi.org/specs/UEFI/2.10/10_Protocols_Device_Path_Protocol.html + // * https://uefi.org/specs/UEFI/2.10/03_Boot_Manager.html + pub(crate) fn parse_from( + bytes: &mut Cursor<&[u8]>, + ) -> Result { + let _attributes = bytes.read_u32::()?; + let file_path_list_length = bytes.read_u16::()?; + + // The `Description` field is a null-terminated string of char16. + let mut description_chars: Vec = Vec::new(); + + loop { + let c = bytes.read_u16::()?; + description_chars.push(c); + if c == 0 { + break; + } + } + + let description = String::from_utf16(&description_chars) + .expect("description is valid utf16"); + + let mut device_path_cursor = Cursor::new( + &bytes.get_ref()[bytes.position() as usize..] + [..file_path_list_length as usize], + ); + + let path_entry = DevicePath::parse_from(&mut device_path_cursor) + .map_err(|e| { + anyhow::anyhow!("unable to parse device path element: {:?}", e) + })?; + let load_path = match path_entry { + acpi_root @ DevicePath::Acpi { .. } => { + let pci_device = + DevicePath::parse_from(&mut device_path_cursor) + .expect("can read device path element"); + if !matches!(pci_device, DevicePath::Pci { .. }) { + bail!("expected ACPI Device Path entry to be followed by a PCI Device Path, but was {:?}", pci_device); + } + + EfiLoadPath::Device { acpi_root, pci_device } + } + volume @ DevicePath::FirmwareVolume { .. } => { + let file = DevicePath::parse_from(&mut device_path_cursor) + .expect("can read device path element"); + if !matches!(file, DevicePath::FirmwareFile { .. }) { + bail!("expected Firmware Volume entry to be followed by a Firmware File, but was {:?}", file); + } + + EfiLoadPath::FirmwareFile { volume, file } + } + other => { + bail!("unexpected root EFI Load Option path item: {:?}", other); + } + }; + + // Not strictly necessary, but advance `bytes` by the number of bytes we read from + // `device_path_cursor`. To callers, this keeps it as if we had just been reading `bytes` + // all along. + bytes.set_position(bytes.position() + device_path_cursor.position()); + + Ok(EfiLoadOption { description, path: load_path }) + } + + pub fn pci_device_function(&self) -> (u8, u8) { + let EfiLoadPath::Device { + pci_device: DevicePath::Pci { device, function }, + .. + } = self.path + else { + panic!( + "expected load path to be an ACPI/PCI pair, but was {:?}", + self.path + ); + }; + (device, function) + } +} + +fn unhex(s: &str) -> Vec { + let s = s.replace("\n", ""); + eprintln!("unhexing {}", s); + let mut res = Vec::new(); + for chunk in s.as_bytes().chunks(2) { + assert_eq!(chunk.len(), 2); + + let s = std::str::from_utf8(chunk).expect("valid string"); + + let b = u8::from_str_radix(s, 16).expect("can parse"); + + res.push(b); + } + return res; +} + +/// Read the EFI variable `varname` from inside the VM, and return the data therein as a byte +/// array. +pub(crate) async fn read_efivar( + vm: &phd_framework::TestVm, + varname: &str, +) -> Result, Error> { + // Linux's `efivarfs` prepends 4 bytes of attributes to EFI variables. + let cmd = format!( + "dd status=none if={} bs=1 skip=4 | xxd -p -", + efipath(varname) + ); + + let hex = run_long_command(vm, &cmd).await?; + + Ok(unhex(&hex)) +} + +/// Write the provided bytes to the EFI variable `varname`. +/// +/// For Linux guests: variables automatically have their prior attributes prepended. Provide only +/// the variable's data. +pub(crate) async fn write_efivar( + vm: &phd_framework::TestVm, + varname: &str, + data: &[u8], +) -> Result<(), Error> { + let attr_cmd = format!( + "dd status=none if={} bs=1 count=4 | xxd -p -", + efipath(varname) + ); + + let attr_read_bytes = run_long_command(vm, &attr_cmd).await?; + let attrs = if attr_read_bytes.ends_with(": No such file or directory") { + // Default attributes if the variable does not exist yet. We expect it to be non-volatile + // because we are writing it, we expect it to be available to boot services (not strictly + // required, but for boot configuration we need it), and we expect it to be available at + // runtime (e.g. where we are reading and writing it). + // + // so: + // NON_VOLATILE | BOOTSERVICE_ACCESS | RUNTIME_ACCESS + const FRESH_ATTRS: u32 = 0x00_00_00_07; + FRESH_ATTRS.to_le_bytes().to_vec() + } else { + unhex(&attr_read_bytes) + }; + + let mut new_value = attrs; + new_value.extend_from_slice(data); + + // The command to write this data back out will be, roughtly: + // ``` + // printf "\xAA\xAA\xAA\xAA\xDD\xDD\xDD\xDD" > /sys/firmware/efi/efivars/... + // ``` + // where AAAAAAAA are the attribute bytes and DDDDDDDD are caller-provided data. + let escaped: String = + new_value.into_iter().map(|b| format!("\\x{:02x}", b)).collect(); + + let cmd = format!("printf \"{}\" > {}", escaped, efipath(varname)); + + let res = run_long_command(vm, &cmd).await?; + // If something went sideways and the write failed with something like `invalid argument`... + if res.len() != 0 { + bail!("writing efi produced unexpected output: {}", res); + } + + Ok(()) +} + +/// Learn the boot option numbers associated with various boot options that may or should +/// exist. +/// +/// The fundamental wrinkle here is that we don't necessarily know what `Boot####` entries +/// exist, or which numbers they have, because NvVar is handled through persistence in guest +/// disks. This means a guest image may have some prior NvVar state with `Boot####` entries +/// that aren't removed, and cause entries reflecting the current system to have later numbers +/// than a fully blank initial set of variables. +pub(crate) async fn discover_boot_option_numbers( + vm: &phd_framework::TestVm, + device_names: &[((u8, u8), &'static str)], +) -> Result> { + let mut option_mappings: HashMap = HashMap::new(); + + let boot_order_bytes = read_efivar(&vm, BOOT_ORDER_VAR).await?; + warn!("initial boot order var: {:?}", boot_order_bytes); + + for chunk in boot_order_bytes.chunks(2) { + assert_eq!(chunk.len(), 2); + let option_num = u16::from_le_bytes(chunk.try_into().unwrap()); + + let option_bytes = read_efivar(&vm, &bootvar(option_num)).await?; + + let mut cursor = Cursor::new(option_bytes.as_slice()); + + let load_option = match EfiLoadOption::parse_from(&mut cursor) { + Ok(option) => option, + Err(e) => { + warn!("unhandled boot option: {:?}", e); + continue; + } + }; + + if load_option + .path + .matches_fw_file(EDK2_FIRMWARE_VOL_GUID, EDK2_UI_APP_GUID) + { + let prev = option_mappings.insert("uiapp".to_string(), option_num); + assert_eq!(prev, None); + } else if load_option + .path + .matches_fw_file(EDK2_FIRMWARE_VOL_GUID, EDK2_EFI_SHELL_GUID) + { + let prev = + option_mappings.insert("efi shell".to_string(), option_num); + assert_eq!(prev, None); + } else if let Some((device, function)) = + load_option.path.as_pci_device_function() + { + let description = device_names.iter().find_map(|(path, desc)| { + if path.0 == device && path.1 == function { + Some(desc) + } else { + None + } + }); + + if let Some(description) = description { + option_mappings.insert(description.to_string(), option_num); + } else { + warn!("unknown PCI boot device {:#x}.{:#x}", device, function); + } + } else { + warn!("unknown boot option: {:?}", load_option); + + let prev = option_mappings + .insert(load_option.description.to_string(), option_num); + assert_eq!(prev, None); + } + } + + info!("found boot options: {:?}", option_mappings); + + Ok(option_mappings) +} + +pub(crate) fn find_option_in_boot_order( + order: &[u8], + option: u16, +) -> Option { + let option = option.to_le_bytes(); + order + .chunks(2) + .enumerate() + .find(|(_i, chunk)| *chunk == option) + .map(|(i, _chunk)| i) +} + +/// Remove the boot option from `vm`'s EFI BootOrder variable. `boot_option_num` is assumed to +/// refer to a boot option named like `format!("Boot{boot_option_num:4X}-*")`. +/// +/// If the boot order was actually modified, returns the index that `boot_option_num` was +/// removed at. +pub(crate) async fn remove_boot_entry( + vm: &phd_framework::TestVm, + boot_option_num: u16, +) -> Result> { + let mut without_option = read_efivar(&vm, BOOT_ORDER_VAR).await?; + + let Some(option_idx) = + find_option_in_boot_order(&without_option, boot_option_num) + else { + return Ok(None); + }; + + info!( + "Removing Boot{:4X} from the boot order. It was at index {}", + boot_option_num, option_idx + ); + + without_option.remove(option_idx * 2); + without_option.remove(option_idx * 2); + + // Technically it's fine if an option is present multiple times, but typically an option is + // present only once. This function intends to remove all copies of the specified option, + // so assert that we have done so in the new order. + assert_eq!( + find_option_in_boot_order(&without_option, boot_option_num), + None + ); + + write_efivar(&vm, BOOT_ORDER_VAR, &without_option).await?; + + Ok(Some(option_idx)) +} + +// And finally, actual test cases using the above! From 7ddf7355fcaf6cc2fd9f60fac721cd102ad03eb9 Mon Sep 17 00:00:00 2001 From: iximeow Date: Thu, 12 Sep 2024 07:14:53 +0000 Subject: [PATCH 08/42] lose dependency on specific test images --- phd-tests/tests/src/boot_order.rs | 62 +++++++++++++++++++++++++++---- 1 file changed, 54 insertions(+), 8 deletions(-) diff --git a/phd-tests/tests/src/boot_order.rs b/phd-tests/tests/src/boot_order.rs index a61177b82..d117d5f19 100644 --- a/phd-tests/tests/src/boot_order.rs +++ b/phd-tests/tests/src/boot_order.rs @@ -49,26 +49,72 @@ pub(crate) async fn run_long_command( vm.run_shell_command(". cmd").await } +// This test checks that with a specified boot order, the guest boots whichever disk we wanted to +// come first. This is simple enough, until you want to know "what you booted from".. +// +// For live CDs, such as Alpine's, the system boots into a tmpfs loaded from a boot disk, but +// there's no clear line to what disk that live image *came from*. If you had two Alpine 3.20.3 +// images attached to one VM, you'd ceretainly boot into Alpine 3.20.3, but I don't see a way to +// tell *which disk* that Alpine would be sourced from, from Alpine alone. +// +// So instead, check EFI variables. To do this, then, we have to.. parse EFI variables. That is +// what this test does below, but it's probably not fully robust to what we might do with PCI +// devices in the future. +// +// A more "future-proof" setup might be to just boot an OS, see that we ended up in the OS we +// expected, and check some attribute about it like that the kernel version is what we expected the +// booted OS to be. That's still a good fallback if we discover that parsing EFI variables is +// difficult to stick with for any reason. It has a downside though: we'd have to keep a specific +// image around with a specific kernel version as either the "we expect to boot into this" image +// or the "we expected to boot into not this" cases. +// +// The simplest case: show that we can configure the guest's boot order from outside the machine. +// This is the most likely common case, where Propolis is told what the boot order should be by +// Nexus and we simply make it happen. +// +// Unlike later tests, this test does not manipulate boot configuration from inside the guest OS. #[phd_testcase] async fn configurable_boot_order(ctx: &Framework) { let mut cfg = ctx.vm_config_builder("configurable_boot_order"); - cfg.data_disk( - "alpine-3-20", - DiskSource::Artifact("alpine-3-20"), + cfg.boot_disk( + ctx.default_guest_os_artifact(), DiskInterface::Virtio, DiskBackend::File, - 16, + 4, ); + // Create a second disk backed by the same guest OS. This way we'll boot to the same + // environment regardless of which disk is used; we'll check EFI variables to figure out if the + // right disk was booted. cfg.data_disk( - "alpine-3-16", - DiskSource::Artifact("alpine-3-16"), + "alt-boot", + DiskSource::Artifact(ctx.default_guest_os_artifact()), DiskInterface::Virtio, DiskBackend::File, 24, ); - cfg.boot_order(vec!["alpine-3-16", "alpine-3-20"]); + // We haven't specified a boot order. So, we'll expect that we boot to the lower-numbered PCI + // device (4) and end up in Alpine 3.20. + let mut vm = ctx.spawn_vm(&cfg, None).await?; + vm.launch().await?; + vm.wait_to_boot().await?; + + let boot_num_bytes = read_efivar(&vm, BOOT_CURRENT_VAR).await?; + + let boot_num: u16 = u16::from_le_bytes(boot_num_bytes.try_into().unwrap()); + + let boot_option_bytes = read_efivar(&vm, &bootvar(boot_num)).await?; + + let mut cursor = Cursor::new(boot_option_bytes.as_slice()); + + let load_option = EfiLoadOption::parse_from(&mut cursor).unwrap(); + + assert!(load_option.path.matches_pci_device_function(4, 0)); + + // Now specify a boot order and do the whole thing again. Note that this order puts the later + // PCI device first, so this changes the boot order! + cfg.boot_order(vec!["alt-boot", "boot-disk"]); let mut vm = ctx.spawn_vm(&cfg, None).await?; vm.launch().await?; @@ -87,7 +133,7 @@ async fn configurable_boot_order(ctx: &Framework) { // If we were going to test the PCI bus number too, we'd check the AHCI Device Path entry that // precedes these PCI values. But we only use PCI bus 0 today, and the mapping from an AHCI // Device Path to a PCI root is not immediately obvious? - assert!(load_option.path.matches_pci_device_function(16, 0)); + assert!(load_option.path.matches_pci_device_function(24, 0)); } // This is very similar to the `in_memory_backend_smoke_test` test, but specifically asserts that From b112aa681eea778b171061d0921d7b9a4867c952 Mon Sep 17 00:00:00 2001 From: iximeow Date: Fri, 13 Sep 2024 02:41:10 +0000 Subject: [PATCH 09/42] fill out other half of test "boot_order_source_priority" --- phd-tests/tests/src/boot_order.rs | 121 +++++++++++++++++++- phd-tests/tests/src/boot_order/efi_utils.rs | 2 - 2 files changed, 119 insertions(+), 4 deletions(-) diff --git a/phd-tests/tests/src/boot_order.rs b/phd-tests/tests/src/boot_order.rs index d117d5f19..753ec4db7 100644 --- a/phd-tests/tests/src/boot_order.rs +++ b/phd-tests/tests/src/boot_order.rs @@ -381,5 +381,122 @@ async fn guest_can_adjust_boot_order(ctx: &Framework) { assert_eq!(boot_option_bytes, boot_option_bytes_after_reboot); } -// and then one more test about what happens if configure the boot order in the guest, stop the VM, -// change the configured boot order, and start the VM again... +// This test is less demonstrating specific desired behavior, and more the observed behavior of +// OVMF with configuration we can offer today. If Propolis or other changes break this test, the +// test may well be what needs changing. +// +// If a `bootorder` file is present in fwcfg, there two relevant consequences demonstrated here: +// * The order of devices in `bootorder` is the order that will be used; on reboot any persisted +// configuration will be replaced with one derived from `bootorder` and corresponding OVMF logic. +// * Guests cannot meaningfully change boot order. If an entry is in `bootorder`, that determines +// its' order. If it is not in `bootorder` but is retained for booting, it is appended to the end +// of the boot order in what seems to be the order that OVMF discovers the device. +// +// If `bootorder` is removed for subsequent reboots, the EFI System Partition's store of NvVar +// variables is the source of boot order, and guests can control their boot fates. +#[phd_testcase] +async fn boot_order_source_priority(ctx: &Framework) { + let mut cfg = ctx.vm_config_builder("boot_order_source_priority"); + + cfg.data_disk( + "unbootable", + DiskSource::FatFilesystem(FatFilesystem::new()), + DiskInterface::Virtio, + DiskBackend::InMemory { readonly: true }, + 16, + ); + + cfg.data_disk( + "unbootable-2", + DiskSource::FatFilesystem(FatFilesystem::new()), + DiskInterface::Virtio, + DiskBackend::InMemory { readonly: true }, + 20, + ); + + let mut vm_no_bootorder = ctx.spawn_vm(&cfg, None).await?; + vm_no_bootorder.launch().await?; + vm_no_bootorder.wait_to_boot().await?; + + // If the guest doesn't have an EFI partition then there's no way for boot order preferences to + // be persisted. + let mountline = vm_no_bootorder.run_shell_command("mount | grep /boot/efi").await?; + + if !mountline.contains(" on /boot/efi type vfat") { + warn!("guest doesn't have an EFI partition, cannot manage boot order"); + } + + let boot_option_numbers = discover_boot_option_numbers( + &vm_no_bootorder, + &[ + ((4, 0), "boot-disk"), + ((16, 0), "unbootable"), + ((20, 0), "unbootable-2"), + ], + ) + .await?; + + // `unbootable` should be somewhere in the middle of the boot order: definitely between + // `boot-disk` and `unbootable-2`, for the options enumerated from PCI devices. + let unbootable_num = boot_option_numbers["unbootable"]; + + let unbootable_idx = remove_boot_entry(&vm_no_bootorder, unbootable_num) + .await? + .expect("unbootable was in the boot order"); + + vm_no_bootorder.run_shell_command("reboot").await?; + vm_no_bootorder.wait_to_boot().await?; + + let reloaded_order = read_efivar(&vm_no_bootorder, BOOT_ORDER_VAR).await?; + + // Somewhat unexpected, but where OVMF gets us: `unbootable` is back in the boot order, but at + // the end of the list. One might hope it would be entirely removed from the boot order now, + // but no such luck. The good news is that we can in fact influence the boot order. + let unbootable_idx_after_reboot = + find_option_in_boot_order(&reloaded_order, unbootable_num) + .expect("unbootable is back in the order"); + + let last_boot_option = &reloaded_order[reloaded_order.len() - 2..]; + assert_eq!(last_boot_option, &unbootable_num.to_le_bytes()); + + // But this new position for `unbootable` definitely should be different from before. + assert_ne!(unbootable_idx, unbootable_idx_after_reboot); + + // And if we do the whole dance again with an explicit boot order provided to the guest, we'll + // get different results! + drop(vm_no_bootorder); + cfg.boot_order(vec!["boot-disk", "unbootable", "unbootable-2"]); + + let mut vm = ctx.spawn_vm(&cfg, None).await?; + vm.launch().await?; + vm.wait_to_boot().await?; + + let boot_option_numbers = discover_boot_option_numbers( + &vm, + &[ + ((4, 0), "boot-disk"), + ((16, 0), "unbootable"), + ((20, 0), "unbootable-2"), + ], + ) + .await?; + + let unbootable_num = boot_option_numbers["unbootable"]; + + // Try removing a fw_cfg-defined boot option. + let unbootable_idx = remove_boot_entry(&vm, unbootable_num) + .await? + .expect("unbootable was in the boot order"); + + vm.run_shell_command("reboot").await?; + vm.wait_to_boot().await?; + + let reloaded_order = read_efivar(&vm, BOOT_ORDER_VAR).await?; + + // The option will be back in the boot order, where it was before! This is because fwcfg still + // has a `bootorder` file. + assert_eq!( + find_option_in_boot_order(&reloaded_order, unbootable_num), + Some(unbootable_idx) + ); +} diff --git a/phd-tests/tests/src/boot_order/efi_utils.rs b/phd-tests/tests/src/boot_order/efi_utils.rs index 7572d9c14..0c8aaecaf 100644 --- a/phd-tests/tests/src/boot_order/efi_utils.rs +++ b/phd-tests/tests/src/boot_order/efi_utils.rs @@ -488,5 +488,3 @@ pub(crate) async fn remove_boot_entry( Ok(Some(option_idx)) } - -// And finally, actual test cases using the above! From f0a09db74f307cb4d1a1b64b2bb575982f17013a Mon Sep 17 00:00:00 2001 From: iximeow Date: Fri, 13 Sep 2024 02:41:56 +0000 Subject: [PATCH 10/42] use the implicit boot-disk entry like every other test --- phd-tests/tests/src/boot_order.rs | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/phd-tests/tests/src/boot_order.rs b/phd-tests/tests/src/boot_order.rs index 753ec4db7..caddbb845 100644 --- a/phd-tests/tests/src/boot_order.rs +++ b/phd-tests/tests/src/boot_order.rs @@ -76,16 +76,10 @@ pub(crate) async fn run_long_command( #[phd_testcase] async fn configurable_boot_order(ctx: &Framework) { let mut cfg = ctx.vm_config_builder("configurable_boot_order"); - cfg.boot_disk( - ctx.default_guest_os_artifact(), - DiskInterface::Virtio, - DiskBackend::File, - 4, - ); - // Create a second disk backed by the same guest OS. This way we'll boot to the same - // environment regardless of which disk is used; we'll check EFI variables to figure out if the - // right disk was booted. + // Create a second disk backed by the same artifact as the default `boot-disk`. This way we'll + // boot to the same environment regardless of which disk is used; we'll check EFI variables to + // figure out if the right disk was booted. cfg.data_disk( "alt-boot", DiskSource::Artifact(ctx.default_guest_os_artifact()), From ed4a3e89f18a35146f12953f6dc3c25373787f37 Mon Sep 17 00:00:00 2001 From: iximeow Date: Fri, 13 Sep 2024 02:42:34 +0000 Subject: [PATCH 11/42] use empty FAT filesystems for unbootable disks don't *really* need the file present for any test purpose --- phd-tests/tests/src/boot_order.rs | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/phd-tests/tests/src/boot_order.rs b/phd-tests/tests/src/boot_order.rs index caddbb845..efc8f2e09 100644 --- a/phd-tests/tests/src/boot_order.rs +++ b/phd-tests/tests/src/boot_order.rs @@ -135,14 +135,11 @@ async fn configurable_boot_order(ctx: &Framework) { // respected and a non-bootable disk does not wedge startup. #[phd_testcase] async fn unbootable_disk_skipped(ctx: &Framework) { - const HELLO_MSG: &str = "hello oxide!"; - let mut cfg = ctx.vm_config_builder("unbootable_disk_skipped"); - let mut unbootable = FatFilesystem::new(); - unbootable.add_file_from_str("hello_oxide.txt", HELLO_MSG)?; + cfg.data_disk( "unbootable", - DiskSource::FatFilesystem(unbootable), + DiskSource::FatFilesystem(FatFilesystem::new()), DiskInterface::Virtio, DiskBackend::InMemory { readonly: true }, 16, @@ -253,30 +250,19 @@ async fn unbootable_disk_skipped(ctx: &Framework) { // Start with the boot order being `["boot-disk", "unbootable"]`, then change it so that next boot // we'll boot from `unbootable` first. Then reboot and verify that the boot order is still // "boot-disk" first. -// -// TODO(ixi): This would be a bit nicer if there was a secondary image to confirm we've booted into -// instead... #[phd_testcase] async fn guest_can_adjust_boot_order(ctx: &Framework) { - const HELLO_MSG: &str = "hello oxide!"; - let mut cfg = ctx.vm_config_builder("guest_can_adjust_boot_order"); - let mut unbootable = FatFilesystem::new(); - unbootable.add_file_from_str("hello_oxide.txt", HELLO_MSG)?; - cfg.boot_disk("ubuntu-noble", DiskInterface::Virtio, DiskBackend::File, 4); cfg.data_disk( "unbootable", - DiskSource::FatFilesystem(unbootable), + DiskSource::FatFilesystem(FatFilesystem::new()), DiskInterface::Virtio, DiskBackend::InMemory { readonly: true }, 16, ); - // `boot-disk` is the implicitly-created boot disk made from the default guest OS artifact. - // - // explicitly boot from it later, so OVMF has to try and fail to boot `unbootable`. - cfg.boot_order(vec!["boot-disk"]); //, "unbootable"]); + cfg.boot_order(vec!["boot-disk", "unbootable"]); let mut vm = ctx.spawn_vm(&cfg, None).await?; vm.launch().await?; From 7484eb4087c34cf59a172772f6cff3cb45c07f0c Mon Sep 17 00:00:00 2001 From: iximeow Date: Fri, 13 Sep 2024 02:42:45 +0000 Subject: [PATCH 12/42] log formatting consistency --- phd-tests/tests/src/boot_order/efi_utils.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/phd-tests/tests/src/boot_order/efi_utils.rs b/phd-tests/tests/src/boot_order/efi_utils.rs index 0c8aaecaf..c42714304 100644 --- a/phd-tests/tests/src/boot_order/efi_utils.rs +++ b/phd-tests/tests/src/boot_order/efi_utils.rs @@ -378,7 +378,7 @@ pub(crate) async fn discover_boot_option_numbers( let mut option_mappings: HashMap = HashMap::new(); let boot_order_bytes = read_efivar(&vm, BOOT_ORDER_VAR).await?; - warn!("initial boot order var: {:?}", boot_order_bytes); + info!("Initial boot order var: {:?}", boot_order_bytes); for chunk in boot_order_bytes.chunks(2) { assert_eq!(chunk.len(), 2); @@ -391,7 +391,7 @@ pub(crate) async fn discover_boot_option_numbers( let load_option = match EfiLoadOption::parse_from(&mut cursor) { Ok(option) => option, Err(e) => { - warn!("unhandled boot option: {:?}", e); + warn!("Unhandled boot option: {:?}", e); continue; } }; @@ -423,10 +423,10 @@ pub(crate) async fn discover_boot_option_numbers( if let Some(description) = description { option_mappings.insert(description.to_string(), option_num); } else { - warn!("unknown PCI boot device {:#x}.{:#x}", device, function); + warn!("Unknown PCI boot device {:#x}.{:#x}", device, function); } } else { - warn!("unknown boot option: {:?}", load_option); + warn!("Unknown boot option: {:?}", load_option); let prev = option_mappings .insert(load_option.description.to_string(), option_num); @@ -434,7 +434,7 @@ pub(crate) async fn discover_boot_option_numbers( } } - info!("found boot options: {:?}", option_mappings); + info!("Found boot options: {:?}", option_mappings); Ok(option_mappings) } From ed455ea695be0b6b03a7e39212ae6ec5b84b9fdc Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 17 Sep 2024 01:59:59 +0000 Subject: [PATCH 13/42] review feedback: stray eprintln, include new boot_order in api-types also rustfmt has different opinions now? --- bin/propolis-cli/src/main.rs | 2 ++ bin/propolis-server/src/lib/initializer.rs | 23 +++++++++------------ crates/propolis-api-types/src/lib.rs | 8 +++++++ openapi/propolis-server.json | 7 +++++++ phd-tests/tests/src/boot_order.rs | 5 +++-- phd-tests/tests/src/boot_order/efi_utils.rs | 2 +- 6 files changed, 31 insertions(+), 16 deletions(-) diff --git a/bin/propolis-cli/src/main.rs b/bin/propolis-cli/src/main.rs index ebf010858..78b6caac5 100644 --- a/bin/propolis-cli/src/main.rs +++ b/bin/propolis-cli/src/main.rs @@ -266,6 +266,7 @@ async fn new_instance( // TODO: Allow specifying NICs nics: vec![], disks, + boot_order: None, migrate: None, cloud_init_bytes, }; @@ -517,6 +518,7 @@ async fn migrate_instance( // TODO: Handle migrating NICs nics: vec![], disks, + boot_order: None, migrate: Some(InstanceMigrateInitiateRequest { migration_id: Uuid::new_v4(), src_addr: src_addr.to_string(), diff --git a/bin/propolis-server/src/lib/initializer.rs b/bin/propolis-server/src/lib/initializer.rs index 8ccab8960..ddf4e770c 100644 --- a/bin/propolis-server/src/lib/initializer.rs +++ b/bin/propolis-server/src/lib/initializer.rs @@ -1130,19 +1130,16 @@ impl<'a> MachineInitializer<'a> { &vnic_spec.backend_name }; - if let Some(device_spec) = self - .spec - .devices - .storage_devices - .iter() - .find_map(|(_name, spec)| { - eprintln!("checking name \"{}\", spec {:?}", _name, spec); - if storage_backend(spec) == &boot_entry.name { - Some(spec) - } else { - None - } - }) + if let Some(device_spec) = + self.spec.devices.storage_devices.iter().find_map( + |(_name, spec)| { + if storage_backend(spec) == &boot_entry.name { + Some(spec) + } else { + None + } + }, + ) { match device_spec { instance_spec::v0::StorageDeviceV0::VirtioDisk(disk) => { diff --git a/crates/propolis-api-types/src/lib.rs b/crates/propolis-api-types/src/lib.rs index 3afe4b530..b5c1a7889 100644 --- a/crates/propolis-api-types/src/lib.rs +++ b/crates/propolis-api-types/src/lib.rs @@ -52,6 +52,9 @@ pub struct InstanceEnsureRequest { #[serde(default)] pub disks: Vec, + #[serde(default)] + pub boot_order: Option, + pub migrate: Option, // base64 encoded cloud-init ISO @@ -385,6 +388,11 @@ pub struct DiskAttachment { pub state: DiskAttachmentState, } +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct BootDeclaration { + pub name: String, +} + /// A stable index which is translated by Propolis /// into a PCI BDF, visible to the guest. #[derive(Copy, Clone, Debug, Deserialize, Serialize, JsonSchema)] diff --git a/openapi/propolis-server.json b/openapi/propolis-server.json index e2ec407b9..ab4c176dc 100644 --- a/openapi/propolis-server.json +++ b/openapi/propolis-server.json @@ -863,6 +863,13 @@ "$ref": "#/components/schemas/DiskRequest" } }, + "boot_order": { + "nullable": true, + "type": "array", + "items": { + "$ref": "#/components/schemas/BootDeclaration" + } + }, "migrate": { "nullable": true, "allOf": [ diff --git a/phd-tests/tests/src/boot_order.rs b/phd-tests/tests/src/boot_order.rs index efc8f2e09..751a8c2ea 100644 --- a/phd-tests/tests/src/boot_order.rs +++ b/phd-tests/tests/src/boot_order.rs @@ -400,7 +400,8 @@ async fn boot_order_source_priority(ctx: &Framework) { // If the guest doesn't have an EFI partition then there's no way for boot order preferences to // be persisted. - let mountline = vm_no_bootorder.run_shell_command("mount | grep /boot/efi").await?; + let mountline = + vm_no_bootorder.run_shell_command("mount | grep /boot/efi").await?; if !mountline.contains(" on /boot/efi type vfat") { warn!("guest doesn't have an EFI partition, cannot manage boot order"); @@ -434,7 +435,7 @@ async fn boot_order_source_priority(ctx: &Framework) { // but no such luck. The good news is that we can in fact influence the boot order. let unbootable_idx_after_reboot = find_option_in_boot_order(&reloaded_order, unbootable_num) - .expect("unbootable is back in the order"); + .expect("unbootable is back in the order"); let last_boot_option = &reloaded_order[reloaded_order.len() - 2..]; assert_eq!(last_boot_option, &unbootable_num.to_le_bytes()); diff --git a/phd-tests/tests/src/boot_order/efi_utils.rs b/phd-tests/tests/src/boot_order/efi_utils.rs index c42714304..0264140ca 100644 --- a/phd-tests/tests/src/boot_order/efi_utils.rs +++ b/phd-tests/tests/src/boot_order/efi_utils.rs @@ -213,10 +213,10 @@ impl EfiLoadOption { loop { let c = bytes.read_u16::()?; - description_chars.push(c); if c == 0 { break; } + description_chars.push(c); } let description = String::from_utf16(&description_chars) From 09583fb18f679065c44d1dcecf7a6be0b71fda3c Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 17 Sep 2024 03:33:59 +0000 Subject: [PATCH 14/42] extracted that into another PR --- bin/propolis-server/src/lib/initializer.rs | 7 ------- 1 file changed, 7 deletions(-) diff --git a/bin/propolis-server/src/lib/initializer.rs b/bin/propolis-server/src/lib/initializer.rs index 0c6dd2a9c..09b32a53b 100644 --- a/bin/propolis-server/src/lib/initializer.rs +++ b/bin/propolis-server/src/lib/initializer.rs @@ -1119,13 +1119,6 @@ impl<'a> MachineInitializer<'a> { ) .unwrap(); - // TODO(ixi): extract `generate_smbios` and expect `smbios::TableBytes` as an argument to - // initialize_fwcfg. make `generate_smbios` accept rom size as an explicit parameter. this - // avoids the implicit panic if these are called out of order (and makes it harder to call - // out of order). - // - // one step towards sharing smbios init code between propolis-standalone and - // propolis-server. let smbios::TableBytes { entry_point, structure_table } = self.generate_smbios(); fwcfg From 7d595a335b24b7065c27112c4528cd60f6665e24 Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 17 Sep 2024 03:44:22 +0000 Subject: [PATCH 15/42] cleanup on aisle SpecBuilder --- lib/propolis-client/src/instance_spec.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/lib/propolis-client/src/instance_spec.rs b/lib/propolis-client/src/instance_spec.rs index e1ff0f06e..87cb10e00 100644 --- a/lib/propolis-client/src/instance_spec.rs +++ b/lib/propolis-client/src/instance_spec.rs @@ -189,7 +189,8 @@ impl SpecBuilderV0 { /// Permissible to not this if the implicit boot order is desired, but the implicit boot order /// may be unstable across device addition and removal. /// - /// XXX: talk about what happens if names are included that do not name real devices..? + /// If any devices named in this order are not actually present in the constructed spec, + /// Propolis will return an error when the spec is provided. /// /// XXX: this should certainly return `&mut Self` - all the builders here should. check if any /// of these are chained..? @@ -201,11 +202,8 @@ impl SpecBuilderV0 { .into_iter() .map(|name| crate::types::BootDeclaration { name }) .collect(); - eprintln!("setting boot order to {:?}", boot_declarations); - self.spec.devices.boot_order = Some(boot_declarations); - // TODO: would be nice to warn if any of the devices named here are not devices in the spec - // though. + self.spec.devices.boot_order = Some(boot_declarations); Ok(self) } From f53ba2e1a8e163e3a2cedad2997035add18d747e Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 17 Sep 2024 03:47:33 +0000 Subject: [PATCH 16/42] fix phd-tests also --- phd-tests/framework/src/test_vm/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/phd-tests/framework/src/test_vm/mod.rs b/phd-tests/framework/src/test_vm/mod.rs index db88f7d27..fdd14d382 100644 --- a/phd-tests/framework/src/test_vm/mod.rs +++ b/phd-tests/framework/src/test_vm/mod.rs @@ -357,6 +357,7 @@ impl TestVm { disks: disk_reqs.clone().unwrap(), migrate: migrate.clone(), nics: vec![], + boot_order: None, properties: properties.clone(), }; From 16138d1fce0579c0e4c85485ed92536f6b8522bd Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 17 Sep 2024 04:05:06 +0000 Subject: [PATCH 17/42] clippy lints --- bin/propolis-server/src/lib/initializer.rs | 2 +- phd-tests/tests/src/boot_order/efi_utils.rs | 28 ++++++++++++--------- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/bin/propolis-server/src/lib/initializer.rs b/bin/propolis-server/src/lib/initializer.rs index 09b32a53b..0b9a1dc5d 100644 --- a/bin/propolis-server/src/lib/initializer.rs +++ b/bin/propolis-server/src/lib/initializer.rs @@ -1078,7 +1078,7 @@ impl<'a> MachineInitializer<'a> { }; } else if let Some(vnic_spec) = self.spec.nics.iter().find_map(|(_name, spec)| { - if &spec.backend_name == &boot_entry.name { + if spec.backend_name == boot_entry.name { Some(spec) } else { None diff --git a/phd-tests/tests/src/boot_order/efi_utils.rs b/phd-tests/tests/src/boot_order/efi_utils.rs index 0264140ca..d6dbd87f3 100644 --- a/phd-tests/tests/src/boot_order/efi_utils.rs +++ b/phd-tests/tests/src/boot_order/efi_utils.rs @@ -15,6 +15,7 @@ use anyhow::{bail, Error}; use byteorder::{LittleEndian, ReadBytesExt}; use phd_testcase::*; use std::collections::HashMap; +use std::fmt::Write; use std::io::{Cursor, Read}; use tracing::{info, warn}; @@ -35,15 +36,15 @@ use super::run_long_command; // EFI Internal Shell: https://github.com/tianocore/edk2/blob/a445e1a/ShellPkg/ShellPkg.dec#L59-L60 // UiApp: // https://github.com/tianocore/edk2/blob/a445e1a/MdeModulePkg/Application/UiApp/UiApp.inf#L13 -pub(crate) const EDK2_FIRMWARE_VOL_GUID: &'static [u8; 16] = &[ +pub(crate) const EDK2_FIRMWARE_VOL_GUID: &[u8; 16] = &[ 0xc9, 0xbd, 0xb8, 0x7c, 0xeb, 0xf8, 0x34, 0x4f, 0xaa, 0xea, 0x3e, 0xe4, 0xaf, 0x65, 0x16, 0xa1, ]; -pub(crate) const EDK2_UI_APP_GUID: &'static [u8; 16] = &[ +pub(crate) const EDK2_UI_APP_GUID: &[u8; 16] = &[ 0x21, 0xaa, 0x2c, 0x46, 0x14, 0x76, 0x03, 0x45, 0x83, 0x6e, 0x8a, 0xb6, 0xf4, 0x66, 0x23, 0x31, ]; -pub(crate) const EDK2_EFI_SHELL_GUID: &'static [u8; 16] = &[ +pub(crate) const EDK2_EFI_SHELL_GUID: &[u8; 16] = &[ 0x83, 0xa5, 0x04, 0x7c, 0x3e, 0x9e, 0x1c, 0x4f, 0xad, 0x65, 0xe0, 0x52, 0x68, 0xd0, 0xb4, 0xd1, ]; @@ -53,9 +54,9 @@ pub(crate) const EDK2_EFI_SHELL_GUID: &'static [u8; 16] = &[ // `/sys/firmware/efi/efivars/`, are both Linux `efivars`-isms. // // These tests likely will not pass when run with other guest OSes. -pub(crate) const BOOT_CURRENT_VAR: &'static str = +pub(crate) const BOOT_CURRENT_VAR: &str = "BootCurrent-8be4df61-93ca-11d2-aa0d-00e098032b8c"; -pub(crate) const BOOT_ORDER_VAR: &'static str = +pub(crate) const BOOT_ORDER_VAR: &str = "BootOrder-8be4df61-93ca-11d2-aa0d-00e098032b8c"; pub(crate) fn bootvar(num: u16) -> String { @@ -292,7 +293,7 @@ fn unhex(s: &str) -> Vec { res.push(b); } - return res; + res } /// Read the EFI variable `varname` from inside the VM, and return the data therein as a byte @@ -350,13 +351,16 @@ pub(crate) async fn write_efivar( // ``` // where AAAAAAAA are the attribute bytes and DDDDDDDD are caller-provided data. let escaped: String = - new_value.into_iter().map(|b| format!("\\x{:02x}", b)).collect(); + new_value.into_iter().fold(String::new(), |mut out, b| { + write!(out, "\\x{:02x}", b).expect("can append to String"); + out + }); let cmd = format!("printf \"{}\" > {}", escaped, efipath(varname)); let res = run_long_command(vm, &cmd).await?; // If something went sideways and the write failed with something like `invalid argument`... - if res.len() != 0 { + if !res.is_empty() { bail!("writing efi produced unexpected output: {}", res); } @@ -377,14 +381,14 @@ pub(crate) async fn discover_boot_option_numbers( ) -> Result> { let mut option_mappings: HashMap = HashMap::new(); - let boot_order_bytes = read_efivar(&vm, BOOT_ORDER_VAR).await?; + let boot_order_bytes = read_efivar(vm, BOOT_ORDER_VAR).await?; info!("Initial boot order var: {:?}", boot_order_bytes); for chunk in boot_order_bytes.chunks(2) { assert_eq!(chunk.len(), 2); let option_num = u16::from_le_bytes(chunk.try_into().unwrap()); - let option_bytes = read_efivar(&vm, &bootvar(option_num)).await?; + let option_bytes = read_efivar(vm, &bootvar(option_num)).await?; let mut cursor = Cursor::new(option_bytes.as_slice()); @@ -460,7 +464,7 @@ pub(crate) async fn remove_boot_entry( vm: &phd_framework::TestVm, boot_option_num: u16, ) -> Result> { - let mut without_option = read_efivar(&vm, BOOT_ORDER_VAR).await?; + let mut without_option = read_efivar(vm, BOOT_ORDER_VAR).await?; let Some(option_idx) = find_option_in_boot_order(&without_option, boot_option_num) @@ -484,7 +488,7 @@ pub(crate) async fn remove_boot_entry( None ); - write_efivar(&vm, BOOT_ORDER_VAR, &without_option).await?; + write_efivar(vm, BOOT_ORDER_VAR, &without_option).await?; Ok(Some(option_idx)) } From 92e98596e82c1609ee673bdc1f4c6899b9ac5a2c Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 17 Sep 2024 04:19:41 +0000 Subject: [PATCH 18/42] include new boot order item in propolis-server-falcon.json too --- openapi/propolis-server-falcon.json | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/openapi/propolis-server-falcon.json b/openapi/propolis-server-falcon.json index acb4a3357..f33bcb49d 100644 --- a/openapi/propolis-server-falcon.json +++ b/openapi/propolis-server-falcon.json @@ -631,6 +631,13 @@ "board": { "$ref": "#/components/schemas/Board" }, + "boot_order": { + "nullable": true, + "type": "array", + "items": { + "$ref": "#/components/schemas/BootDeclaration" + } + }, "network_devices": { "type": "object", "additionalProperties": { @@ -876,6 +883,13 @@ "InstanceEnsureRequest": { "type": "object", "properties": { + "boot_order": { + "nullable": true, + "type": "array", + "items": { + "$ref": "#/components/schemas/BootDeclaration" + } + }, "cloud_init_bytes": { "nullable": true, "type": "string" @@ -1533,6 +1547,19 @@ "vcr_matches" ] }, + "BootDeclaration": { + "description": "A boot preference for some device.", + "type": "object", + "properties": { + "name": { + "description": "The name of the device to attempt booting from.", + "type": "string" + } + }, + "required": [ + "name" + ] + }, "SerialPort": { "description": "A serial port device.", "type": "object", From 2183773dcfc219df5ba0b2a529e7211a3c8c638a Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 17 Sep 2024 04:52:40 +0000 Subject: [PATCH 19/42] fix formatting in propolis-server.json this is not the `BootOption` lineup that this should land with, but it'll start passing CI again --- .../src/instance_spec/v0.rs | 7 +-- crates/propolis-api-types/src/lib.rs | 2 + openapi/propolis-server-falcon.json | 27 ++++----- openapi/propolis-server.json | 55 ++++++++++--------- 4 files changed, 45 insertions(+), 46 deletions(-) diff --git a/crates/propolis-api-types/src/instance_spec/v0.rs b/crates/propolis-api-types/src/instance_spec/v0.rs index 4d50cc7af..add53b1e3 100644 --- a/crates/propolis-api-types/src/instance_spec/v0.rs +++ b/crates/propolis-api-types/src/instance_spec/v0.rs @@ -112,7 +112,7 @@ pub struct DeviceSpecV0 { // same as above #[serde(default)] #[serde(skip_serializing_if = "Option::is_none")] - pub boot_order: Option>, + pub boot_order: Option>, #[cfg(feature = "falcon")] pub softnpu_pci_port: Option, @@ -182,11 +182,6 @@ impl DeviceSpecV0 { } } -#[derive(Clone, Deserialize, Serialize, Debug, JsonSchema)] -pub struct BootDeclaration { - pub name: SpecKey, -} - #[derive(Clone, Deserialize, Serialize, Debug, JsonSchema)] #[serde(deny_unknown_fields, tag = "type", content = "component")] pub enum StorageBackendV0 { diff --git a/crates/propolis-api-types/src/lib.rs b/crates/propolis-api-types/src/lib.rs index 819b7e2a4..173cc5e91 100644 --- a/crates/propolis-api-types/src/lib.rs +++ b/crates/propolis-api-types/src/lib.rs @@ -388,8 +388,10 @@ pub struct DiskAttachment { pub state: DiskAttachmentState, } +/// A boot preference for some device. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] pub struct BootDeclaration { + /// The name of the device to attempt booting from. pub name: String, } diff --git a/openapi/propolis-server-falcon.json b/openapi/propolis-server-falcon.json index f33bcb49d..ab35d1e65 100644 --- a/openapi/propolis-server-falcon.json +++ b/openapi/propolis-server-falcon.json @@ -529,6 +529,19 @@ ], "additionalProperties": false }, + "BootDeclaration": { + "description": "A boot preference for some device.", + "type": "object", + "properties": { + "name": { + "description": "The name of the device to attempt booting from.", + "type": "string" + } + }, + "required": [ + "name" + ] + }, "Chipset": { "description": "A kind of virtual chipset.", "oneOf": [ @@ -885,6 +898,7 @@ "properties": { "boot_order": { "nullable": true, + "default": null, "type": "array", "items": { "$ref": "#/components/schemas/BootDeclaration" @@ -1547,19 +1561,6 @@ "vcr_matches" ] }, - "BootDeclaration": { - "description": "A boot preference for some device.", - "type": "object", - "properties": { - "name": { - "description": "The name of the device to attempt booting from.", - "type": "string" - } - }, - "required": [ - "name" - ] - }, "SerialPort": { "description": "A serial port device.", "type": "object", diff --git a/openapi/propolis-server.json b/openapi/propolis-server.json index ab4c176dc..c9de8dd0a 100644 --- a/openapi/propolis-server.json +++ b/openapi/propolis-server.json @@ -529,6 +529,19 @@ ], "additionalProperties": false }, + "BootDeclaration": { + "description": "A boot preference for some device.", + "type": "object", + "properties": { + "name": { + "description": "The name of the device to attempt booting from.", + "type": "string" + } + }, + "required": [ + "name" + ] + }, "Chipset": { "description": "A kind of virtual chipset.", "oneOf": [ @@ -631,6 +644,13 @@ "board": { "$ref": "#/components/schemas/Board" }, + "boot_order": { + "nullable": true, + "type": "array", + "items": { + "$ref": "#/components/schemas/BootDeclaration" + } + }, "network_devices": { "type": "object", "additionalProperties": { @@ -662,13 +682,6 @@ "additionalProperties": { "$ref": "#/components/schemas/StorageDeviceV0" } - }, - "boot_order": { - "nullable": true, - "type": "array", - "items": { - "$ref": "#/components/schemas/BootDeclaration" - } } }, "required": [ @@ -852,6 +865,14 @@ "InstanceEnsureRequest": { "type": "object", "properties": { + "boot_order": { + "nullable": true, + "default": null, + "type": "array", + "items": { + "$ref": "#/components/schemas/BootDeclaration" + } + }, "cloud_init_bytes": { "nullable": true, "type": "string" @@ -863,13 +884,6 @@ "$ref": "#/components/schemas/DiskRequest" } }, - "boot_order": { - "nullable": true, - "type": "array", - "items": { - "$ref": "#/components/schemas/BootDeclaration" - } - }, "migrate": { "nullable": true, "allOf": [ @@ -1482,19 +1496,6 @@ "vcr_matches" ] }, - "BootDeclaration": { - "description": "A boot preference for some device.", - "type": "object", - "properties": { - "name": { - "description": "The name of the device to attempt booting from.", - "type": "string" - } - }, - "required": [ - "name" - ] - }, "SerialPort": { "description": "A serial port device.", "type": "object", From 05495d5355ba34074cb4577359aff9de0704f505 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 18 Sep 2024 03:43:16 +0000 Subject: [PATCH 20/42] fix bad merge resolution was not pulling boot order through `impl TryFrom for Spec>` --- bin/propolis-server/src/lib/server.rs | 4 +- .../src/lib/spec/api_spec_v0.rs | 6 +++ bin/propolis-server/src/lib/spec/builder.rs | 51 +++++++++---------- 3 files changed, 33 insertions(+), 28 deletions(-) diff --git a/bin/propolis-server/src/lib/server.rs b/bin/propolis-server/src/lib/server.rs index 34837d7c6..86f858f71 100644 --- a/bin/propolis-server/src/lib/server.rs +++ b/bin/propolis-server/src/lib/server.rs @@ -130,7 +130,9 @@ fn instance_spec_from_request( } if let Some(boot_order) = request.boot_order.as_ref() { - spec_builder.add_boot_order_from_request(boot_order)?; + for item in boot_order.iter() { + spec_builder.add_boot_option(item)?; + } } if let Some(base64) = &request.cloud_init_bytes { diff --git a/bin/propolis-server/src/lib/spec/api_spec_v0.rs b/bin/propolis-server/src/lib/spec/api_spec_v0.rs index a41b5185e..2fc92a1b7 100644 --- a/bin/propolis-server/src/lib/spec/api_spec_v0.rs +++ b/bin/propolis-server/src/lib/spec/api_spec_v0.rs @@ -253,6 +253,12 @@ impl TryFrom for Spec { return Err(ApiSpecError::BackendNotUsed(backend.to_owned())); } + if let Some(boot_order) = value.devices.boot_order.as_ref() { + for item in boot_order.iter() { + builder.add_boot_option(item)?; + } + } + // TODO(#735): Serial ports need to have names like other devices. for serial_port in value.devices.serial_ports.values() { builder.add_serial_port(serial_port.num)?; diff --git a/bin/propolis-server/src/lib/spec/builder.rs b/bin/propolis-server/src/lib/spec/builder.rs index f542221f6..e2f6cbcc5 100644 --- a/bin/propolis-server/src/lib/spec/builder.rs +++ b/bin/propolis-server/src/lib/spec/builder.rs @@ -57,9 +57,6 @@ pub(crate) enum SpecBuilderError { #[error("Boot option {0} is not an attached device")] BootOptionMissing(String), - - #[error("An item named {0} exists but it is not bootable")] - UnbootableItem(String), } #[derive(Debug, Default)] @@ -112,34 +109,34 @@ impl SpecBuilder { Ok(()) } - /// Converts an HTTP API request to add a disk to an instance into - /// device/backend entries in the spec under construction. - pub fn add_boot_order_from_request( + /// Add a boot option to the boot option list of the spec under construction. + pub fn add_boot_option( &mut self, - boot_order: &[BootDeclaration], + item: &BootDeclaration, ) -> Result<(), SpecBuilderError> { - let mut parsed_bootorder = Vec::new(); - - for item in boot_order.iter() { - if !self.spec.disks.contains_key(&item.name) - && !self.spec.nics.contains_key(&item.name) - { - if self.component_names.contains(&item.name) { - return Err(SpecBuilderError::UnbootableItem( - item.name.clone(), - )); - } else { - return Err(SpecBuilderError::BootOptionMissing( - item.name.clone(), - )); - } - } - - parsed_bootorder - .push(crate::spec::BootDeclaration { name: item.name.clone() }); + let boot_order = self.spec.boot_order.get_or_insert(Vec::new()); + + let is_disk = self + .spec + .disks + .values() + .find(|v| v.backend_name.as_str() == item.name) + .is_some(); + + let is_nic = self + .spec + .nics + .values() + .find(|v| v.backend_name.as_str() == item.name) + .is_some(); + + if !is_disk && !is_nic { + return Err(SpecBuilderError::BootOptionMissing(item.name.clone())); } - self.spec.boot_order = Some(parsed_bootorder); + boot_order + .push(crate::spec::BootDeclaration { name: item.name.clone() }); + Ok(()) } From 6a386b0806a9b5dcba6924619ac1afc541bc8715 Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 24 Sep 2024 05:45:16 +0000 Subject: [PATCH 21/42] move to more forward-compatible boot settings approach --- bin/propolis-cli/src/main.rs | 6 ++- bin/propolis-server/src/lib/server.rs | 4 +- .../src/lib/spec/api_spec_v0.rs | 4 +- bin/propolis-server/src/lib/spec/builder.rs | 6 +-- bin/propolis-server/src/lib/spec/mod.rs | 4 +- .../src/instance_spec/v0.rs | 4 +- crates/propolis-api-types/src/lib.rs | 11 +++-- lib/propolis-client/src/instance_spec.rs | 8 ++-- lib/propolis-client/src/lib.rs | 2 + openapi/propolis-server.json | 40 +++++++++++++------ phd-tests/framework/src/test_vm/mod.rs | 2 +- 11 files changed, 59 insertions(+), 32 deletions(-) diff --git a/bin/propolis-cli/src/main.rs b/bin/propolis-cli/src/main.rs index ac4bac942..4cf9ac85d 100644 --- a/bin/propolis-cli/src/main.rs +++ b/bin/propolis-cli/src/main.rs @@ -266,7 +266,7 @@ async fn new_instance( // TODO: Allow specifying NICs nics: vec![], disks, - boot_order: None, + boot_settings: None, migrate: None, cloud_init_bytes, }; @@ -518,7 +518,9 @@ async fn migrate_instance( // TODO: Handle migrating NICs nics: vec![], disks, - boot_order: None, + // TODO: Handle retaining boot settings? Or extant boot settings + // forwarded along outside InstanceEnsure anyway. + boot_settings: None, migrate: Some(InstanceMigrateInitiateRequest { migration_id: Uuid::new_v4(), src_addr: src_addr.to_string(), diff --git a/bin/propolis-server/src/lib/server.rs b/bin/propolis-server/src/lib/server.rs index 86f858f71..5e471ef34 100644 --- a/bin/propolis-server/src/lib/server.rs +++ b/bin/propolis-server/src/lib/server.rs @@ -129,8 +129,8 @@ fn instance_spec_from_request( spec_builder.add_disk_from_request(disk)?; } - if let Some(boot_order) = request.boot_order.as_ref() { - for item in boot_order.iter() { + if let Some(boot_settings) = request.boot_settings.as_ref() { + for item in boot_settings.order.iter() { spec_builder.add_boot_option(item)?; } } diff --git a/bin/propolis-server/src/lib/spec/api_spec_v0.rs b/bin/propolis-server/src/lib/spec/api_spec_v0.rs index 2fc92a1b7..1ab1dd95d 100644 --- a/bin/propolis-server/src/lib/spec/api_spec_v0.rs +++ b/bin/propolis-server/src/lib/spec/api_spec_v0.rs @@ -253,8 +253,8 @@ impl TryFrom for Spec { return Err(ApiSpecError::BackendNotUsed(backend.to_owned())); } - if let Some(boot_order) = value.devices.boot_order.as_ref() { - for item in boot_order.iter() { + if let Some(boot_settings) = value.devices.boot_settings.as_ref() { + for item in boot_settings.order.iter() { builder.add_boot_option(item)?; } } diff --git a/bin/propolis-server/src/lib/spec/builder.rs b/bin/propolis-server/src/lib/spec/builder.rs index e2f6cbcc5..85a626911 100644 --- a/bin/propolis-server/src/lib/spec/builder.rs +++ b/bin/propolis-server/src/lib/spec/builder.rs @@ -14,7 +14,7 @@ use propolis_api_types::{ }, PciPath, }, - BootDeclaration, DiskRequest, InstanceProperties, NetworkInterfaceRequest, + BootOrderEntry, DiskRequest, InstanceProperties, NetworkInterfaceRequest, }; use thiserror::Error; @@ -112,7 +112,7 @@ impl SpecBuilder { /// Add a boot option to the boot option list of the spec under construction. pub fn add_boot_option( &mut self, - item: &BootDeclaration, + item: &BootOrderEntry, ) -> Result<(), SpecBuilderError> { let boot_order = self.spec.boot_order.get_or_insert(Vec::new()); @@ -135,7 +135,7 @@ impl SpecBuilder { } boot_order - .push(crate::spec::BootDeclaration { name: item.name.clone() }); + .push(crate::spec::BootOrderEntry { name: item.name.clone() }); Ok(()) } diff --git a/bin/propolis-server/src/lib/spec/mod.rs b/bin/propolis-server/src/lib/spec/mod.rs index c1aad60a7..34ab183d2 100644 --- a/bin/propolis-server/src/lib/spec/mod.rs +++ b/bin/propolis-server/src/lib/spec/mod.rs @@ -57,7 +57,7 @@ pub(crate) struct Spec { pub board: Board, pub disks: HashMap, pub nics: HashMap, - pub boot_order: Option>, + pub boot_order: Option>, pub serial: HashMap, @@ -69,7 +69,7 @@ pub(crate) struct Spec { } #[derive(Clone, Debug, Default)] -pub(crate) struct BootDeclaration { +pub(crate) struct BootOrderEntry { pub name: String, } diff --git a/crates/propolis-api-types/src/instance_spec/v0.rs b/crates/propolis-api-types/src/instance_spec/v0.rs index add53b1e3..69f065482 100644 --- a/crates/propolis-api-types/src/instance_spec/v0.rs +++ b/crates/propolis-api-types/src/instance_spec/v0.rs @@ -109,10 +109,10 @@ pub struct DeviceSpecV0 { #[serde(skip_serializing_if = "Option::is_none")] pub qemu_pvpanic: Option, - // same as above + // Same backwards compatibility reasoning as above. #[serde(default)] #[serde(skip_serializing_if = "Option::is_none")] - pub boot_order: Option>, + pub boot_settings: Option, #[cfg(feature = "falcon")] pub softnpu_pci_port: Option, diff --git a/crates/propolis-api-types/src/lib.rs b/crates/propolis-api-types/src/lib.rs index 173cc5e91..593a2ec4b 100644 --- a/crates/propolis-api-types/src/lib.rs +++ b/crates/propolis-api-types/src/lib.rs @@ -53,7 +53,7 @@ pub struct InstanceEnsureRequest { pub disks: Vec, #[serde(default)] - pub boot_order: Option>, + pub boot_settings: Option, pub migrate: Option, @@ -388,9 +388,14 @@ pub struct DiskAttachment { pub state: DiskAttachmentState, } -/// A boot preference for some device. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct BootDeclaration { +pub struct BootSettings { + pub order: Vec, +} + +/// An entry in a list of boot options. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct BootOrderEntry { /// The name of the device to attempt booting from. pub name: String, } diff --git a/lib/propolis-client/src/instance_spec.rs b/lib/propolis-client/src/instance_spec.rs index 87cb10e00..cb8bbc449 100644 --- a/lib/propolis-client/src/instance_spec.rs +++ b/lib/propolis-client/src/instance_spec.rs @@ -198,12 +198,14 @@ impl SpecBuilderV0 { &mut self, boot_order: Vec, ) -> Result<&Self, SpecBuilderError> { - let boot_declarations = boot_order + let boot_order = boot_order .into_iter() - .map(|name| crate::types::BootDeclaration { name }) + .map(|name| crate::types::BootOrderEntry { name }) .collect(); - self.spec.devices.boot_order = Some(boot_declarations); + let settings = crate::types::BootSettings { order: boot_order }; + + self.spec.devices.boot_settings = Some(settings); Ok(self) } diff --git a/lib/propolis-client/src/lib.rs b/lib/propolis-client/src/lib.rs index 3fc2d6f9c..e0c769347 100644 --- a/lib/propolis-client/src/lib.rs +++ b/lib/propolis-client/src/lib.rs @@ -18,6 +18,8 @@ progenitor::generate_api!( // Some Crucible-related bits are re-exported through simulated // sled-agent and thus require JsonSchema + BootOrderEntry = { derives = [schemars::JsonSchema] }, + BootSettings = { derives = [schemars::JsonSchema] }, DiskRequest = { derives = [schemars::JsonSchema] }, VolumeConstructionRequest = { derives = [schemars::JsonSchema] }, CrucibleOpts = { derives = [schemars::JsonSchema] }, diff --git a/openapi/propolis-server.json b/openapi/propolis-server.json index c9de8dd0a..41f3fb5a2 100644 --- a/openapi/propolis-server.json +++ b/openapi/propolis-server.json @@ -529,8 +529,8 @@ ], "additionalProperties": false }, - "BootDeclaration": { - "description": "A boot preference for some device.", + "BootOrderEntry": { + "description": "An entry in a list of boot options.", "type": "object", "properties": { "name": { @@ -542,6 +542,20 @@ "name" ] }, + "BootSettings": { + "type": "object", + "properties": { + "order": { + "type": "array", + "items": { + "$ref": "#/components/schemas/BootOrderEntry" + } + } + }, + "required": [ + "order" + ] + }, "Chipset": { "description": "A kind of virtual chipset.", "oneOf": [ @@ -644,12 +658,13 @@ "board": { "$ref": "#/components/schemas/Board" }, - "boot_order": { + "boot_settings": { "nullable": true, - "type": "array", - "items": { - "$ref": "#/components/schemas/BootDeclaration" - } + "allOf": [ + { + "$ref": "#/components/schemas/BootSettings" + } + ] }, "network_devices": { "type": "object", @@ -865,13 +880,14 @@ "InstanceEnsureRequest": { "type": "object", "properties": { - "boot_order": { + "boot_settings": { "nullable": true, "default": null, - "type": "array", - "items": { - "$ref": "#/components/schemas/BootDeclaration" - } + "allOf": [ + { + "$ref": "#/components/schemas/BootSettings" + } + ] }, "cloud_init_bytes": { "nullable": true, diff --git a/phd-tests/framework/src/test_vm/mod.rs b/phd-tests/framework/src/test_vm/mod.rs index fdd14d382..9e220fe25 100644 --- a/phd-tests/framework/src/test_vm/mod.rs +++ b/phd-tests/framework/src/test_vm/mod.rs @@ -357,7 +357,7 @@ impl TestVm { disks: disk_reqs.clone().unwrap(), migrate: migrate.clone(), nics: vec![], - boot_order: None, + boot_settings: None, properties: properties.clone(), }; From bcb682450ef362a98b5cb00025b392057dfaa447 Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 24 Sep 2024 06:11:47 +0000 Subject: [PATCH 22/42] check if guest environment supports efivar writes, bail early if not --- phd-tests/tests/src/boot_order.rs | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/phd-tests/tests/src/boot_order.rs b/phd-tests/tests/src/boot_order.rs index 751a8c2ea..faf3db3f1 100644 --- a/phd-tests/tests/src/boot_order.rs +++ b/phd-tests/tests/src/boot_order.rs @@ -270,10 +270,11 @@ async fn guest_can_adjust_boot_order(ctx: &Framework) { // If the guest doesn't have an EFI partition then there's no way for boot order preferences to // be persisted. - let mountline = vm.run_shell_command("mount | grep /boot/efi").await?; + let mountline = vm.run_shell_command("mount | grep efivarfs").await?; - if !mountline.contains(" on /boot/efi type vfat") { - warn!("guest doesn't have an EFI partition, cannot manage boot order"); + if !mountline.starts_with("efivarfs on ") { + warn!("guest doesn't have an efivarfs, cannot manage boot order! exiting test WITHOUT VALIDATING ANYTHING"); + return Ok(()); } // Try adding a few new boot options, then add them to the boot order, reboot, and make sure @@ -317,6 +318,26 @@ async fn guest_can_adjust_boot_order(ctx: &Framework) { // they are duplicates of. This seems to not get reordered. let boot_option_bytes = read_efivar(&vm, &bootvar(boot_num)).await?; + // Finally, seeing a read-write `efivarfs` is not sufficient to know that writes to EFI + // variables will actually stick. For example, an Alpine live image backed by an ISO 9660 + // filesystem may have an EFI System Partition and `efivarfs`, but certainly cannot persist + // state and will drop writes to EFI variables. + // + // Check for this condition and exit early if the guest OS configuration will not let us + // perform a useful test. + write_efivar(&vm, &bootvar(0xfff0), &boot_option_bytes).await?; + let reread = read_efivar(&vm, &bootvar(0xfff0)).await?; + if reread.len() == 0 { + warn!("guest environment drops EFI variable writes! exiting test WITHOUT VALIDATING ANYTHING"); + return Ok(()); + } else { + assert_eq!( + boot_option_bytes, + read_efivar(&vm, &bootvar(0xfff0)).await?, + "EFI variable write wrote something, but not what we expected?" + ); + } + let boot_order_bytes = read_efivar(&vm, BOOT_ORDER_VAR).await?; let mut new_boot_order = Vec::new(); From 3200708011fe6a71a2b615e5ea58a6dd43d177f0 Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 24 Sep 2024 06:22:09 +0000 Subject: [PATCH 23/42] update falcon openapi document too --- openapi/propolis-server-falcon.json | 40 ++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/openapi/propolis-server-falcon.json b/openapi/propolis-server-falcon.json index ab35d1e65..baf6dc95d 100644 --- a/openapi/propolis-server-falcon.json +++ b/openapi/propolis-server-falcon.json @@ -529,8 +529,8 @@ ], "additionalProperties": false }, - "BootDeclaration": { - "description": "A boot preference for some device.", + "BootOrderEntry": { + "description": "An entry in a list of boot options.", "type": "object", "properties": { "name": { @@ -542,6 +542,20 @@ "name" ] }, + "BootSettings": { + "type": "object", + "properties": { + "order": { + "type": "array", + "items": { + "$ref": "#/components/schemas/BootOrderEntry" + } + } + }, + "required": [ + "order" + ] + }, "Chipset": { "description": "A kind of virtual chipset.", "oneOf": [ @@ -644,12 +658,13 @@ "board": { "$ref": "#/components/schemas/Board" }, - "boot_order": { + "boot_settings": { "nullable": true, - "type": "array", - "items": { - "$ref": "#/components/schemas/BootDeclaration" - } + "allOf": [ + { + "$ref": "#/components/schemas/BootSettings" + } + ] }, "network_devices": { "type": "object", @@ -896,13 +911,14 @@ "InstanceEnsureRequest": { "type": "object", "properties": { - "boot_order": { + "boot_settings": { "nullable": true, "default": null, - "type": "array", - "items": { - "$ref": "#/components/schemas/BootDeclaration" - } + "allOf": [ + { + "$ref": "#/components/schemas/BootSettings" + } + ] }, "cloud_init_bytes": { "nullable": true, From d4d9b07f25f83ec48fea58293939204401e50ca0 Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 24 Sep 2024 06:58:00 +0000 Subject: [PATCH 24/42] last eprintln: should be trace tbh --- phd-tests/tests/src/boot_order/efi_utils.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/phd-tests/tests/src/boot_order/efi_utils.rs b/phd-tests/tests/src/boot_order/efi_utils.rs index d6dbd87f3..f587f5e7f 100644 --- a/phd-tests/tests/src/boot_order/efi_utils.rs +++ b/phd-tests/tests/src/boot_order/efi_utils.rs @@ -17,7 +17,7 @@ use phd_testcase::*; use std::collections::HashMap; use std::fmt::Write; use std::io::{Cursor, Read}; -use tracing::{info, warn}; +use tracing::{trace, info, warn}; use super::run_long_command; @@ -282,7 +282,7 @@ impl EfiLoadOption { fn unhex(s: &str) -> Vec { let s = s.replace("\n", ""); - eprintln!("unhexing {}", s); + trace!("unhexing {}", s); let mut res = Vec::new(); for chunk in s.as_bytes().chunks(2) { assert_eq!(chunk.len(), 2); From 3ad73835cb7fc0c110e0671c32bfce1caae9d75c Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 24 Sep 2024 08:14:00 +0000 Subject: [PATCH 25/42] keep some notes about where backend_name went from --- bin/propolis-server/src/lib/initializer.rs | 4 ++++ bin/propolis-server/src/lib/spec/builder.rs | 3 +++ 2 files changed, 7 insertions(+) diff --git a/bin/propolis-server/src/lib/initializer.rs b/bin/propolis-server/src/lib/initializer.rs index ba5b0d9a8..70e1a38df 100644 --- a/bin/propolis-server/src/lib/initializer.rs +++ b/bin/propolis-server/src/lib/initializer.rs @@ -1039,6 +1039,10 @@ impl<'a> MachineInitializer<'a> { // // realistically we won't be booting from network devices, but leave that as a question // for plumbing on the next layer up. + // + // NOTE: as of #761 the only name that sticks around is the derived name for the backend + // + // maybe we should make this just the provided name? let expected_name = format!("{}-backend", boot_entry.name.as_str()); if let Some(spec) = diff --git a/bin/propolis-server/src/lib/spec/builder.rs b/bin/propolis-server/src/lib/spec/builder.rs index c1f159fe6..a3e4820c8 100644 --- a/bin/propolis-server/src/lib/spec/builder.rs +++ b/bin/propolis-server/src/lib/spec/builder.rs @@ -120,6 +120,9 @@ impl SpecBuilder { ) -> Result<(), SpecBuilderError> { let boot_order = self.spec.boot_order.get_or_insert(Vec::new()); + // NOTE: as of #761 the only name that sticks around is the derived name for the backend + // + // maybe we should make this just the provided name? let expected_name = format!("{}-backend", item.name.as_str()); let is_disk = self From 2e0116998b7450d6a22a6dc2a4934d32201fd8d8 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 25 Sep 2024 00:27:01 +0000 Subject: [PATCH 26/42] resolve merge conflict with refactors this drops the untested support from booting from NICs as well --- bin/propolis-server/src/lib/initializer.rs | 28 ++++++++------------- bin/propolis-server/src/lib/spec/builder.rs | 20 +++++++-------- 2 files changed, 19 insertions(+), 29 deletions(-) diff --git a/bin/propolis-server/src/lib/initializer.rs b/bin/propolis-server/src/lib/initializer.rs index 70e1a38df..c3f81bc36 100644 --- a/bin/propolis-server/src/lib/initializer.rs +++ b/bin/propolis-server/src/lib/initializer.rs @@ -1035,18 +1035,16 @@ impl<'a> MachineInitializer<'a> { for boot_entry in boot_names.iter() { - // names may refer to a storage device or network device. - // - // realistically we won't be booting from network devices, but leave that as a question - // for plumbing on the next layer up. - // - // NOTE: as of #761 the only name that sticks around is the derived name for the backend - // - // maybe we should make this just the provided name? - let expected_name = format!("{}-backend", boot_entry.name.as_str()); - + // Theoretically we could support booting from network devices by + // matching them here and adding their PCI paths, but exactly what + // would happen is ill-understood and device names are not plumbed + // in a way that supports it. So, only check disks here. if let Some(spec) = - self.spec.disks.iter().find_map(|(name, spec)| if name == &expected_name { Some(spec) } else { None }) + self.spec.disks.iter().find(|(_name, spec)| match &spec.device_spec { + StorageDevice::Nvme(disk) => { disk.backend_name.as_str() == boot_entry.name.as_str() }, + StorageDevice::Virtio(disk) => { disk.backend_name.as_str() == boot_entry.name.as_str() }, + }) + .map(|(_name, spec)| spec) { match &spec.device_spec { StorageDevice::Virtio(disk) => { @@ -1065,15 +1063,9 @@ impl<'a> MachineInitializer<'a> { order.add_nvme(bdf.location, 0); } }; - } else if let Some(vnic_spec) = - self.spec.nics.iter().find_map(|(name, spec)| if name == &expected_name { Some(spec) } else { None }) - { - let bdf = parse_bdf(&vnic_spec.device_spec.pci_path)?; - - order.add_pci(bdf.location, "ethernet"); } else { let message = format!( - "Instance spec included boot entry which does not refer to an existing device: `{}`", + "Instance spec included boot entry which does not refer to an existing disk: `{}`", boot_entry.name.as_str(), ); // TODO(ixi): this is actually duplicative with the top-level `error!` that this diff --git a/bin/propolis-server/src/lib/spec/builder.rs b/bin/propolis-server/src/lib/spec/builder.rs index a3e4820c8..12635eb2d 100644 --- a/bin/propolis-server/src/lib/spec/builder.rs +++ b/bin/propolis-server/src/lib/spec/builder.rs @@ -120,22 +120,20 @@ impl SpecBuilder { ) -> Result<(), SpecBuilderError> { let boot_order = self.spec.boot_order.get_or_insert(Vec::new()); - // NOTE: as of #761 the only name that sticks around is the derived name for the backend - // - // maybe we should make this just the provided name? - let expected_name = format!("{}-backend", item.name.as_str()); + use crate::spec::StorageDevice; + use crate::spec::{NvmeDisk, VirtioDisk}; let is_disk = self .spec .disks - .contains_key(&expected_name); - - let is_nic = self - .spec - .nics - .contains_key(&expected_name); + .values() + .find(|disk| match &disk.device_spec { + StorageDevice::Virtio(VirtioDisk { backend_name, .. }) => backend_name == item.name.as_str(), + StorageDevice::Nvme(NvmeDisk { backend_name, .. }) => backend_name == item.name.as_str(), + }) + .is_some(); - if !is_disk && !is_nic { + if !is_disk { return Err(SpecBuilderError::BootOptionMissing(item.name.clone())); } From 9af0380e462f6955e295bbddff50b55f029eb3c9 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 25 Sep 2024 00:28:44 +0000 Subject: [PATCH 27/42] one less todo --- bin/propolis-server/src/lib/initializer.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/bin/propolis-server/src/lib/initializer.rs b/bin/propolis-server/src/lib/initializer.rs index c3f81bc36..857d188ac 100644 --- a/bin/propolis-server/src/lib/initializer.rs +++ b/bin/propolis-server/src/lib/initializer.rs @@ -46,7 +46,7 @@ use propolis::vmm::{self, Builder, Machine}; use propolis_api_types::instance_spec; use propolis_api_types::instance_spec::components::devices::SerialPortNumber; use propolis_api_types::InstanceProperties; -use slog::{error, info}; +use slog::info; /// Arbitrary ROM limit for now const MAX_ROM_SIZE: usize = 0x20_0000; @@ -1068,9 +1068,6 @@ impl<'a> MachineInitializer<'a> { "Instance spec included boot entry which does not refer to an existing disk: `{}`", boot_entry.name.as_str(), ); - // TODO(ixi): this is actually duplicative with the top-level `error!` that this - // unhandled `error` will bubble out to. Maybe just don't log here? - error!(self.log, "{}", message); return Err(Error::new(ErrorKind::Other, message)); } } From 617dd0c870ce382f3208d1b5f42a6eb1c94df5bb Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 25 Sep 2024 00:29:06 +0000 Subject: [PATCH 28/42] settle on an actual approach for the test "boot_disk" helper fn --- phd-tests/framework/src/test_vm/config.rs | 50 +++++++++++++++-------- 1 file changed, 32 insertions(+), 18 deletions(-) diff --git a/phd-tests/framework/src/test_vm/config.rs b/phd-tests/framework/src/test_vm/config.rs index 90495928e..62dd74d13 100644 --- a/phd-tests/framework/src/test_vm/config.rs +++ b/phd-tests/framework/src/test_vm/config.rs @@ -74,9 +74,8 @@ impl<'dr> VmConfig<'dr> { devices: BTreeMap::new(), }; - config.data_disk( - "boot-disk", - DiskSource::Artifact(guest_artifact), + config.boot_disk( + guest_artifact, DiskInterface::Nvme, DiskBackend::File, 4, @@ -128,6 +127,15 @@ impl<'dr> VmConfig<'dr> { self } + pub fn clear_boot_order(&mut self) -> &mut Self { + self.boot_order = None; + self + } + + /// Add a new disk to the VM config, and add it to the front of the VM's boot order. + /// + /// The added disk will have the name `boot-disk`, and replace the previous existing + /// `boot-disk`. pub fn boot_disk( &mut self, artifact: &'dr str, @@ -135,22 +143,24 @@ impl<'dr> VmConfig<'dr> { backend: DiskBackend, pci_device_num: u8, ) -> &mut Self { - // If you're calling `boot_disk` directly, you're probably not specifying an explicit boot - // order. the _implicit_ boot order seems to be the order disks are created in, so preserve - // that implied behavior by having a boot disk inserted up front. - // - // XXX: i thought that the implicit order would be *pci device order*, not disk creation - // order? - // XXX: figure out what to do if there is both an explicit boot order and a call to - // `.boot_disk`. - - self.disks[0] = DiskRequest { - name: "boot-disk", + let boot_order = self.boot_order.get_or_insert(Vec::new()); + if let Some(prev_boot_item) = boot_order.iter().position(|d| *d == "boot-disk") { + boot_order.remove(prev_boot_item); + } + + if let Some(prev_boot_disk) = self.disks.iter().position(|d| d.name == "boot-disk") { + self.disks.remove(prev_boot_disk); + } + + boot_order.insert(0, "boot-disk"); + + self.data_disk( + "boot-disk", + DiskSource::Artifact(artifact), interface, backend, - source: DiskSource::Artifact(artifact), pci_device_num, - }; + ); self } @@ -193,9 +203,13 @@ impl<'dr> VmConfig<'dr> { }) .context("serializing Propolis server config")?; - let boot_disk = &self.disks[0]; + let boot_disk_name = self.boot_order.as_ref().map(|order| order[0]).unwrap_or("boot-disk"); + let boot_disk = self.disks.iter().find(|d| d.name == boot_disk_name) + .or_else(|| self.disks.get(0)) + .expect("VM config includes at least one disk (and maybe a boot order)?"); - // TODO: adapt this to check all listed boot devices + // XXX: assuming all bootable images are equivalent to the first, or at least the same + // guest OS kind. let DiskSource::Artifact(boot_artifact) = boot_disk.source else { unreachable!("boot disks always use artifacts as sources"); }; From fcfa0a2397accad9ee6901734fc451b444fc6bb5 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 25 Sep 2024 01:56:47 +0000 Subject: [PATCH 29/42] ok, keep all tests passing with framework changes --- phd-tests/framework/src/test_vm/config.rs | 23 +++++++++++++++++++---- phd-tests/tests/src/boot_order.rs | 4 ++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/phd-tests/framework/src/test_vm/config.rs b/phd-tests/framework/src/test_vm/config.rs index 62dd74d13..6dfb07b17 100644 --- a/phd-tests/framework/src/test_vm/config.rs +++ b/phd-tests/framework/src/test_vm/config.rs @@ -203,10 +203,25 @@ impl<'dr> VmConfig<'dr> { }) .context("serializing Propolis server config")?; - let boot_disk_name = self.boot_order.as_ref().map(|order| order[0]).unwrap_or("boot-disk"); - let boot_disk = self.disks.iter().find(|d| d.name == boot_disk_name) - .or_else(|| self.disks.get(0)) - .expect("VM config includes at least one disk (and maybe a boot order)?"); + // The first disk in the boot list might not be the disk a test *actually* expects to boot. + // + // If there are multiple bootable disks in the boot order, we'll assume they're all + // the same guest OS kind. So look for `boot-disk` - if there isn't a disk named + // `boot-disk` then fall back to hoping the first disk in the boot orrder is a bootable + // disk, and if *that* isn't a bootable disk, maybe the first disk is. + // + // TODO: theoretically we might want to accept configuration of a specific guest OS adapter + // and avoid the guessing games. So far the above supports existing tests and makes them + // "Just Work", but a more complicated test may want more control here. + let boot_disk = + self.disks.iter().find(|d| d.name == "boot-disk") + .or_else(|| if let Some(boot_order) = self.boot_order.as_ref() { + boot_order.get(0).and_then(|name| self.disks.iter().find(|d| &d.name == name)) + } else { + None + }) + .or_else(|| self.disks.get(0)) + .expect("VM config includes at least one disk (and maybe a boot order)?"); // XXX: assuming all bootable images are equivalent to the first, or at least the same // guest OS kind. diff --git a/phd-tests/tests/src/boot_order.rs b/phd-tests/tests/src/boot_order.rs index faf3db3f1..e9b84270b 100644 --- a/phd-tests/tests/src/boot_order.rs +++ b/phd-tests/tests/src/boot_order.rs @@ -415,6 +415,10 @@ async fn boot_order_source_priority(ctx: &Framework) { 20, ); + // For the first stage of this test, we want to leave the boot procedure up to whatever the + // guest firmware will do. + cfg.clear_boot_order(); + let mut vm_no_bootorder = ctx.spawn_vm(&cfg, None).await?; vm_no_bootorder.launch().await?; vm_no_bootorder.wait_to_boot().await?; From 247f1e3b5c20bd9e723d9a15150bb3bad18a46c0 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 25 Sep 2024 01:57:24 +0000 Subject: [PATCH 30/42] rustfmt --- bin/propolis-server/src/lib/initializer.rs | 16 +++++++++++----- bin/propolis-server/src/lib/spec/builder.rs | 8 ++++++-- phd-tests/framework/src/test_vm/config.rs | 8 ++++++-- phd-tests/tests/src/boot_order/efi_utils.rs | 2 +- 4 files changed, 24 insertions(+), 10 deletions(-) diff --git a/bin/propolis-server/src/lib/initializer.rs b/bin/propolis-server/src/lib/initializer.rs index 857d188ac..3db41f3c5 100644 --- a/bin/propolis-server/src/lib/initializer.rs +++ b/bin/propolis-server/src/lib/initializer.rs @@ -1033,16 +1033,22 @@ impl<'a> MachineInitializer<'a> { bdf }; - for boot_entry in boot_names.iter() { // Theoretically we could support booting from network devices by // matching them here and adding their PCI paths, but exactly what // would happen is ill-understood and device names are not plumbed // in a way that supports it. So, only check disks here. - if let Some(spec) = - self.spec.disks.iter().find(|(_name, spec)| match &spec.device_spec { - StorageDevice::Nvme(disk) => { disk.backend_name.as_str() == boot_entry.name.as_str() }, - StorageDevice::Virtio(disk) => { disk.backend_name.as_str() == boot_entry.name.as_str() }, + if let Some(spec) = self + .spec + .disks + .iter() + .find(|(_name, spec)| match &spec.device_spec { + StorageDevice::Nvme(disk) => { + disk.backend_name.as_str() == boot_entry.name.as_str() + } + StorageDevice::Virtio(disk) => { + disk.backend_name.as_str() == boot_entry.name.as_str() + } }) .map(|(_name, spec)| spec) { diff --git a/bin/propolis-server/src/lib/spec/builder.rs b/bin/propolis-server/src/lib/spec/builder.rs index 12635eb2d..85d2dbc8b 100644 --- a/bin/propolis-server/src/lib/spec/builder.rs +++ b/bin/propolis-server/src/lib/spec/builder.rs @@ -128,8 +128,12 @@ impl SpecBuilder { .disks .values() .find(|disk| match &disk.device_spec { - StorageDevice::Virtio(VirtioDisk { backend_name, .. }) => backend_name == item.name.as_str(), - StorageDevice::Nvme(NvmeDisk { backend_name, .. }) => backend_name == item.name.as_str(), + StorageDevice::Virtio(VirtioDisk { backend_name, .. }) => { + backend_name == item.name.as_str() + } + StorageDevice::Nvme(NvmeDisk { backend_name, .. }) => { + backend_name == item.name.as_str() + } }) .is_some(); diff --git a/phd-tests/framework/src/test_vm/config.rs b/phd-tests/framework/src/test_vm/config.rs index 6dfb07b17..2bc9fa19e 100644 --- a/phd-tests/framework/src/test_vm/config.rs +++ b/phd-tests/framework/src/test_vm/config.rs @@ -144,11 +144,15 @@ impl<'dr> VmConfig<'dr> { pci_device_num: u8, ) -> &mut Self { let boot_order = self.boot_order.get_or_insert(Vec::new()); - if let Some(prev_boot_item) = boot_order.iter().position(|d| *d == "boot-disk") { + if let Some(prev_boot_item) = + boot_order.iter().position(|d| *d == "boot-disk") + { boot_order.remove(prev_boot_item); } - if let Some(prev_boot_disk) = self.disks.iter().position(|d| d.name == "boot-disk") { + if let Some(prev_boot_disk) = + self.disks.iter().position(|d| d.name == "boot-disk") + { self.disks.remove(prev_boot_disk); } diff --git a/phd-tests/tests/src/boot_order/efi_utils.rs b/phd-tests/tests/src/boot_order/efi_utils.rs index f587f5e7f..a26a8e3de 100644 --- a/phd-tests/tests/src/boot_order/efi_utils.rs +++ b/phd-tests/tests/src/boot_order/efi_utils.rs @@ -17,7 +17,7 @@ use phd_testcase::*; use std::collections::HashMap; use std::fmt::Write; use std::io::{Cursor, Read}; -use tracing::{trace, info, warn}; +use tracing::{info, trace, warn}; use super::run_long_command; From 52aa0fe71c354be5c12b18f39db0863aa3dc8b16 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 25 Sep 2024 02:16:41 +0000 Subject: [PATCH 31/42] speed limit 80 --- phd-tests/tests/src/boot_order.rs | 242 +++++++++++--------- phd-tests/tests/src/boot_order/efi_utils.rs | 114 +++++---- 2 files changed, 206 insertions(+), 150 deletions(-) diff --git a/phd-tests/tests/src/boot_order.rs b/phd-tests/tests/src/boot_order.rs index e9b84270b..7f46c7be3 100644 --- a/phd-tests/tests/src/boot_order.rs +++ b/phd-tests/tests/src/boot_order.rs @@ -24,15 +24,17 @@ pub(crate) async fn run_long_command( vm: &phd_framework::TestVm, cmd: &str, ) -> Result { - // Ok, this is a bit whacky: something about the line wrapping for long commands causes - // `run_shell_command` to hang instead of ever actually seeing a response prompt. + // Ok, this is a bit whacky: something about the line wrapping for long + // commands causes `run_shell_command` to hang instead of ever actually + // seeing a response prompt. // - // I haven't gone and debugged that; instead, chunk the input command up into segments short - // enough to not wrap when input, put them all in a file, then run the file. + // I haven't gone and debugged that; instead, chunk the input command up + // into segments short enough to not wrap when input, put them all in a + // file, then run the file. vm.run_shell_command("rm cmd").await?; let mut offset = 0; - // Escape any internal `\`. This isn't comprehensive escaping (doesn't handle \n, for - // example).. + // Escape any internal `\`. This isn't comprehensive escaping (doesn't + // handle \n, for example).. let cmd = cmd.replace("\\", "\\\\"); while offset < cmd.len() { let lim = std::cmp::min(cmd.len() - offset, 50); @@ -49,37 +51,42 @@ pub(crate) async fn run_long_command( vm.run_shell_command(". cmd").await } -// This test checks that with a specified boot order, the guest boots whichever disk we wanted to -// come first. This is simple enough, until you want to know "what you booted from".. +// This test checks that with a specified boot order, the guest boots whichever +// disk we wanted to come first. This is simple enough, until you want to know +// "what you booted from".. // -// For live CDs, such as Alpine's, the system boots into a tmpfs loaded from a boot disk, but -// there's no clear line to what disk that live image *came from*. If you had two Alpine 3.20.3 -// images attached to one VM, you'd ceretainly boot into Alpine 3.20.3, but I don't see a way to -// tell *which disk* that Alpine would be sourced from, from Alpine alone. +// For live CDs, such as Alpine's, the system boots into a tmpfs loaded from a +// boot disk, but there's no clear line to what disk that live image *came +// from*. If you had two Alpine 3.20.3 images attached to one VM, you'd +// ceretainly boot into Alpine 3.20.3, but I don't see a way to tell *which +// disk* that Alpine would be sourced from, from Alpine alone. // -// So instead, check EFI variables. To do this, then, we have to.. parse EFI variables. That is -// what this test does below, but it's probably not fully robust to what we might do with PCI -// devices in the future. +// So instead, check EFI variables. To do this, then, we have to.. parse EFI +// variables. That is what this test does below, but it's probably not fully +// robust to what we might do with PCI devices in the future. // -// A more "future-proof" setup might be to just boot an OS, see that we ended up in the OS we -// expected, and check some attribute about it like that the kernel version is what we expected the -// booted OS to be. That's still a good fallback if we discover that parsing EFI variables is -// difficult to stick with for any reason. It has a downside though: we'd have to keep a specific -// image around with a specific kernel version as either the "we expect to boot into this" image -// or the "we expected to boot into not this" cases. +// A more "future-proof" setup might be to just boot an OS, see that we ended up +// in the OS we expected, and check some attribute about it like that the kernel +// version is what we expected the booted OS to be. That's still a good fallback +// if we discover that parsing EFI variables is difficult to stick with for any +// reason. It has a downside though: we'd have to keep a specific image around +// with a specific kernel version as either the "we expect to boot into this" +// image or the "we expected to boot into not this" cases. // -// The simplest case: show that we can configure the guest's boot order from outside the machine. -// This is the most likely common case, where Propolis is told what the boot order should be by -// Nexus and we simply make it happen. +// The simplest case: show that we can configure the guest's boot order from +// outside the machine. This is the most likely common case, where Propolis is +// told what the boot order should be by Nexus and we simply make it happen. // -// Unlike later tests, this test does not manipulate boot configuration from inside the guest OS. +// Unlike later tests, this test does not manipulate boot configuration from +// inside the guest OS. #[phd_testcase] async fn configurable_boot_order(ctx: &Framework) { let mut cfg = ctx.vm_config_builder("configurable_boot_order"); - // Create a second disk backed by the same artifact as the default `boot-disk`. This way we'll - // boot to the same environment regardless of which disk is used; we'll check EFI variables to - // figure out if the right disk was booted. + // Create a second disk backed by the same artifact as the default + // `boot-disk`. This way we'll boot to the same environment regardless of + // which disk is used; we'll check EFI variables to figure out if the right + // disk was booted. cfg.data_disk( "alt-boot", DiskSource::Artifact(ctx.default_guest_os_artifact()), @@ -88,8 +95,8 @@ async fn configurable_boot_order(ctx: &Framework) { 24, ); - // We haven't specified a boot order. So, we'll expect that we boot to the lower-numbered PCI - // device (4) and end up in Alpine 3.20. + // We haven't specified a boot order. So, we'll expect that we boot to the + // lower-numbered PCI device (4) and end up in Alpine 3.20. let mut vm = ctx.spawn_vm(&cfg, None).await?; vm.launch().await?; vm.wait_to_boot().await?; @@ -106,8 +113,8 @@ async fn configurable_boot_order(ctx: &Framework) { assert!(load_option.path.matches_pci_device_function(4, 0)); - // Now specify a boot order and do the whole thing again. Note that this order puts the later - // PCI device first, so this changes the boot order! + // Now specify a boot order and do the whole thing again. Note that this + // order puts the later PCI device first, so this changes the boot order! cfg.boot_order(vec!["alt-boot", "boot-disk"]); let mut vm = ctx.spawn_vm(&cfg, None).await?; @@ -124,15 +131,17 @@ async fn configurable_boot_order(ctx: &Framework) { let load_option = EfiLoadOption::parse_from(&mut cursor).unwrap(); - // If we were going to test the PCI bus number too, we'd check the AHCI Device Path entry that - // precedes these PCI values. But we only use PCI bus 0 today, and the mapping from an AHCI - // Device Path to a PCI root is not immediately obvious? + // If we were going to test the PCI bus number too, we'd check the AHCI + // Device Path entry that precedes these PCI values. But we only use PCI bus + // 0 today, and the mapping from an AHCI Device Path to a PCI root is not + // immediately obvious? assert!(load_option.path.matches_pci_device_function(24, 0)); } -// This is very similar to the `in_memory_backend_smoke_test` test, but specifically asserts that -// the unbootable disk is first in the boot order; the system booting means that boot order is -// respected and a non-bootable disk does not wedge startup. +// This is very similar to the `in_memory_backend_smoke_test` test, but +// specifically asserts that the unbootable disk is first in the boot order; the +// system booting means that boot order is respected and a non-bootable disk +// does not wedge startup. #[phd_testcase] async fn unbootable_disk_skipped(ctx: &Framework) { let mut cfg = ctx.vm_config_builder("unbootable_disk_skipped"); @@ -145,9 +154,11 @@ async fn unbootable_disk_skipped(ctx: &Framework) { 16, ); - // `boot-disk` is the implicitly-created boot disk made from the default guest OS artifact. + // `boot-disk` is the implicitly-created boot disk made from the default + // guest OS artifact. // - // explicitly boot from it later, so OVMF has to try and fail to boot `unbootable`. + // explicitly boot from it later, so OVMF has to try and fail to boot + // `unbootable`. cfg.boot_order(vec!["unbootable", "boot-disk"]); let mut vm = ctx.spawn_vm(&cfg, None).await?; @@ -164,18 +175,21 @@ async fn unbootable_disk_skipped(ctx: &Framework) { let load_option = EfiLoadOption::parse_from(&mut cursor).unwrap(); - // Device 4 is the implicitly-used `boot-disk` PCI device number. This is not 16, for example, - // as we expect to not boot `unbootable`. + // Device 4 is the implicitly-used `boot-disk` PCI device number. This is + // not 16, for example, as we expect to not boot `unbootable`. assert_eq!(load_option.pci_device_function(), (4, 0)); let boot_order_bytes = read_efivar(&vm, BOOT_ORDER_VAR).await?; - // Interestingly, when we specify a boot order via fwcfg, OVMF includes two additional entries: + // Interestingly, when we specify a boot order via fwcfg, OVMF includes two + // additional entries: // * "UiApp", which I can't find much about - // * "EFI Internal Shell", the EFI shell the system drops into if no disks are bootable + // * "EFI Internal Shell", the EFI shell the system drops into if no disks + // are bootable // - // Exactly where these end up in the boot order is not entirely important; we really just need - // to make sure that the boot order we specified comes first (and before "EFI Internal Shell") + // Exactly where these end up in the boot order is not entirely important; + // we really just need to make sure that the boot order we specified comes + // first (and before "EFI Internal Shell") #[derive(Debug, PartialEq, Eq)] enum TestState { SeekingUnbootable, @@ -247,9 +261,9 @@ async fn unbootable_disk_skipped(ctx: &Framework) { assert_eq!(state, TestState::AfterBootOrder); } -// Start with the boot order being `["boot-disk", "unbootable"]`, then change it so that next boot -// we'll boot from `unbootable` first. Then reboot and verify that the boot order is still -// "boot-disk" first. +// Start with the boot order being `["boot-disk", "unbootable"]`, then change it +// so that next boot we'll boot from `unbootable` first. Then reboot and verify +// that the boot order is still "boot-disk" first. #[phd_testcase] async fn guest_can_adjust_boot_order(ctx: &Framework) { let mut cfg = ctx.vm_config_builder("guest_can_adjust_boot_order"); @@ -268,22 +282,28 @@ async fn guest_can_adjust_boot_order(ctx: &Framework) { vm.launch().await?; vm.wait_to_boot().await?; - // If the guest doesn't have an EFI partition then there's no way for boot order preferences to - // be persisted. + // If the guest doesn't have an EFI partition then there's no way for boot + // order preferences to be persisted. let mountline = vm.run_shell_command("mount | grep efivarfs").await?; if !mountline.starts_with("efivarfs on ") { - warn!("guest doesn't have an efivarfs, cannot manage boot order! exiting test WITHOUT VALIDATING ANYTHING"); + warn!( + "guest doesn't have an efivarfs, cannot manage boot order! \ + exiting test WITHOUT VALIDATING ANYTHING" + ); return Ok(()); } - // Try adding a few new boot options, then add them to the boot order, reboot, and make sure - // they're all as we set them. + // Try adding a few new boot options, then add them to the boot order, + // reboot, and make sure they're all as we set them. if !run_long_command(&vm, &format!("ls {}", efipath(&bootvar(0xffff)))) .await? .is_empty() { - warn!("guest environment already has a BootFFFF entry; is this not a fresh image?"); + warn!( + "guest environment already has a BootFFFF entry; \ + is this not a fresh image?" + ); } let boot_num: u16 = { @@ -291,44 +311,52 @@ async fn guest_can_adjust_boot_order(ctx: &Framework) { u16::from_le_bytes(bytes.try_into().unwrap()) }; - // The entry we booted from is clearly valid, so we should be able to insert a few duplicate - // entries. We won't boot into them, but if something bogus happens and we did boot one of - // these, at least it'll work and we can detect the misbehavior. + // The entry we booted from is clearly valid, so we should be able to insert + // a few duplicate entries. We won't boot into them, but if something bogus + // happens and we did boot one of these, at least it'll work and we can + // detect the misbehavior. // - // But here's a weird one: if we just append these to the end, on reboot they'll be moved - // somewhat up the boot order. This occurrs both if setting variables through `efibootmgr` or - // by writing to /sys/firmware/efi/efivars/BootOrder-* directly. As an example, say we had a - // boot order of "0004,0001,0003,0000" where boot options were as follows: + // But here's a weird one: if we just append these to the end, on reboot + // they'll be moved somewhat up the boot order. This occurrs both if setting + // variables through `efibootmgr` or by writing to + // /sys/firmware/efi/efivars/BootOrder-* directly. As an example, say we had + // a boot order of "0004,0001,0003,0000" where boot options were as follows: // * 0000: UiApp // * 0001: PCI device 4, function 0 // * 0003: EFI shell (Firmware volume+file) // * 0004: Ubuntu (HD partition 15, GPT formatted) // - // If we duplicate entry 4 to new options FFF0 and FFFF, reset the boot order to - // "0004,0001,0003,0000,FFF0,FFFF", then reboot the VM, the boot order when it comes back - // up will be "0004,0001,FFF0,FFFF,0003,0000". + // If we duplicate entry 4 to new options FFF0 and FFFF, reset the boot + // order to "0004,0001,0003,0000,FFF0,FFFF", then reboot the VM, the boot + // order when it comes back up will be "0004,0001,FFF0,FFFF,0003,0000". // - // This almost makes sense, but with other devices in the mix I've seen reorderings like - // `0004,0001,,0003,0000,FFF0,FFFF` turning into - // `0004,0001,FFF0,FFFF,,0003,0000`. This is particularly strange in that the new - // options were reordered around some other PCI device. It's not the boot order we set! + // This almost makes sense, but with other devices in the mix I've seen + // reorderings like `0004,0001,,0003,0000,FFF0,FFFF` turning into + // `0004,0001,FFF0,FFFF,,0003,0000`. This is particularly strange + // in that the new options were reordered around some other PCI device. It's + // not the boot order we set! // - // So, to at least confirm we *can* modify the boot order in a stable way, make a somewhat less - // ambitious change: insert the duplicate boot options in the order directly after the option - // they are duplicates of. This seems to not get reordered. + // So, to at least confirm we *can* modify the boot order in a stable way, + // make a somewhat less ambitious change: insert the duplicate boot options + // in the order directly after the option they are duplicates of. This seems + // to not get reordered. let boot_option_bytes = read_efivar(&vm, &bootvar(boot_num)).await?; - // Finally, seeing a read-write `efivarfs` is not sufficient to know that writes to EFI - // variables will actually stick. For example, an Alpine live image backed by an ISO 9660 - // filesystem may have an EFI System Partition and `efivarfs`, but certainly cannot persist - // state and will drop writes to EFI variables. + // Finally, seeing a read-write `efivarfs` is not sufficient to know that + // writes to EFI variables will actually stick. For example, an Alpine live + // image backed by an ISO 9660 filesystem may have an EFI System Partition + // and `efivarfs`, but certainly cannot persist state and will drop writes + // to EFI variables. // - // Check for this condition and exit early if the guest OS configuration will not let us - // perform a useful test. + // Check for this condition and exit early if the guest OS configuration + // will not let us perform a useful test. write_efivar(&vm, &bootvar(0xfff0), &boot_option_bytes).await?; let reread = read_efivar(&vm, &bootvar(0xfff0)).await?; if reread.len() == 0 { - warn!("guest environment drops EFI variable writes! exiting test WITHOUT VALIDATING ANYTHING"); + warn!( + "guest environment drops EFI variable writes! \ + exiting test WITHOUT VALIDATING ANYTHING" + ); return Ok(()); } else { assert_eq!( @@ -382,19 +410,22 @@ async fn guest_can_adjust_boot_order(ctx: &Framework) { assert_eq!(boot_option_bytes, boot_option_bytes_after_reboot); } -// This test is less demonstrating specific desired behavior, and more the observed behavior of -// OVMF with configuration we can offer today. If Propolis or other changes break this test, the -// test may well be what needs changing. +// This test is less demonstrating specific desired behavior, and more the +// observed behavior of OVMF with configuration we can offer today. If Propolis +// or other changes break this test, the test may well be what needs changing. // -// If a `bootorder` file is present in fwcfg, there two relevant consequences demonstrated here: -// * The order of devices in `bootorder` is the order that will be used; on reboot any persisted -// configuration will be replaced with one derived from `bootorder` and corresponding OVMF logic. -// * Guests cannot meaningfully change boot order. If an entry is in `bootorder`, that determines -// its' order. If it is not in `bootorder` but is retained for booting, it is appended to the end -// of the boot order in what seems to be the order that OVMF discovers the device. +// If a `bootorder` file is present in fwcfg, there two relevant consequences +// demonstrated here: * The order of devices in `bootorder` is the order that +// will be used; on reboot any persisted configuration will be replaced with one +// derived from `bootorder` and corresponding OVMF logic. * Guests cannot +// meaningfully change boot order. If an entry is in `bootorder`, that +// determines its' order. If it is not in `bootorder` but is retained for +// booting, it is appended to the end of the boot order in what seems to be the +// order that OVMF discovers the device. // -// If `bootorder` is removed for subsequent reboots, the EFI System Partition's store of NvVar -// variables is the source of boot order, and guests can control their boot fates. +// If `bootorder` is removed for subsequent reboots, the EFI System Partition's +// store of NvVar variables is the source of boot order, and guests can control +// their boot fates. #[phd_testcase] async fn boot_order_source_priority(ctx: &Framework) { let mut cfg = ctx.vm_config_builder("boot_order_source_priority"); @@ -415,16 +446,16 @@ async fn boot_order_source_priority(ctx: &Framework) { 20, ); - // For the first stage of this test, we want to leave the boot procedure up to whatever the - // guest firmware will do. + // For the first stage of this test, we want to leave the boot procedure up + // to whatever the guest firmware will do. cfg.clear_boot_order(); let mut vm_no_bootorder = ctx.spawn_vm(&cfg, None).await?; vm_no_bootorder.launch().await?; vm_no_bootorder.wait_to_boot().await?; - // If the guest doesn't have an EFI partition then there's no way for boot order preferences to - // be persisted. + // If the guest doesn't have an EFI partition then there's no way for boot + // order preferences to be persisted. let mountline = vm_no_bootorder.run_shell_command("mount | grep /boot/efi").await?; @@ -442,8 +473,9 @@ async fn boot_order_source_priority(ctx: &Framework) { ) .await?; - // `unbootable` should be somewhere in the middle of the boot order: definitely between - // `boot-disk` and `unbootable-2`, for the options enumerated from PCI devices. + // `unbootable` should be somewhere in the middle of the boot order: + // definitely between `boot-disk` and `unbootable-2`, for the options + // enumerated from PCI devices. let unbootable_num = boot_option_numbers["unbootable"]; let unbootable_idx = remove_boot_entry(&vm_no_bootorder, unbootable_num) @@ -455,9 +487,10 @@ async fn boot_order_source_priority(ctx: &Framework) { let reloaded_order = read_efivar(&vm_no_bootorder, BOOT_ORDER_VAR).await?; - // Somewhat unexpected, but where OVMF gets us: `unbootable` is back in the boot order, but at - // the end of the list. One might hope it would be entirely removed from the boot order now, - // but no such luck. The good news is that we can in fact influence the boot order. + // Somewhat unexpected, but where OVMF gets us: `unbootable` is back in the + // boot order, but at the end of the list. One might hope it would be + // entirely removed from the boot order now, but no such luck. The good news + // is that we can in fact influence the boot order. let unbootable_idx_after_reboot = find_option_in_boot_order(&reloaded_order, unbootable_num) .expect("unbootable is back in the order"); @@ -465,11 +498,12 @@ async fn boot_order_source_priority(ctx: &Framework) { let last_boot_option = &reloaded_order[reloaded_order.len() - 2..]; assert_eq!(last_boot_option, &unbootable_num.to_le_bytes()); - // But this new position for `unbootable` definitely should be different from before. + // But this new position for `unbootable` definitely should be different + // from before. assert_ne!(unbootable_idx, unbootable_idx_after_reboot); - // And if we do the whole dance again with an explicit boot order provided to the guest, we'll - // get different results! + // And if we do the whole dance again with an explicit boot order provided + // to the guest, we'll get different results! drop(vm_no_bootorder); cfg.boot_order(vec!["boot-disk", "unbootable", "unbootable-2"]); @@ -499,8 +533,8 @@ async fn boot_order_source_priority(ctx: &Framework) { let reloaded_order = read_efivar(&vm, BOOT_ORDER_VAR).await?; - // The option will be back in the boot order, where it was before! This is because fwcfg still - // has a `bootorder` file. + // The option will be back in the boot order, where it was before! This is + // because fwcfg still has a `bootorder` file. assert_eq!( find_option_in_boot_order(&reloaded_order, unbootable_num), Some(unbootable_idx) diff --git a/phd-tests/tests/src/boot_order/efi_utils.rs b/phd-tests/tests/src/boot_order/efi_utils.rs index a26a8e3de..8f979c3c5 100644 --- a/phd-tests/tests/src/boot_order/efi_utils.rs +++ b/phd-tests/tests/src/boot_order/efi_utils.rs @@ -4,10 +4,11 @@ //! EFI variable parsing and manipulation utilities. //! -//! Conceptually, this would be a separate crate. Something like `uefi`, or maybe more accurately, -//! `uefi-raw`. Those crates are very oriented towards *being* the platform firmware though - it's -//! not clear how to use them to parse a boot option into a device path, for example, though they -//! clearly are able to support processing device paths. +//! Conceptually, this would be a separate crate. Something like `uefi`, or +//! maybe more accurately, `uefi-raw`. Those crates are very oriented towards +//! *being* the platform firmware though - it's not clear how to use them to +//! parse a boot option into a device path, for example, though they clearly are +//! able to support processing device paths. //! //! So instead, this is enough supporting logic for our tests in Propolis. @@ -21,19 +22,23 @@ use tracing::{info, trace, warn}; use super::run_long_command; -// First, some GUIDs. These GUIDs come from EDK2, and OVMF reuses them. Notably these are the raw -// bytes of the GUID: textual values will have slightly different ordering of bytes. +// First, some GUIDs. These GUIDs come from EDK2, and OVMF reuses them. Notably +// these are the raw bytes of the GUID: textual values will have slightly +// different ordering of bytes. // -// Some source references, as you won't find these GUIDs in a UEFI or related spec document.. The -// firmware volume is identified by what seems to be the DXE firmware volume: +// Some source references, as you won't find these GUIDs in a UEFI or related +// spec document.. The firmware volume is identified by what seems to be the DXE +// firmware volume: // https://github.com/tianocore/edk2/blob/712797c/OvmfPkg/OvmfPkgIa32.fdf#L181 // introduced in -// https://github.com/tianocore/edk2/commit/16f26de663967b5a64140b6abba2c145ea50194c, note this -// is the DXEFV entry. +// https://github.com/tianocore/edk2/commit/16f26de663967b5a64140b6abba2c145ea50194c, +// note this is the DXEFV entry. // -// The *files* we'll care about in this test are identified by other GUIDs in the above *volume*. +// The *files* we'll care about in this test are identified by other GUIDs in +// the above *volume*. // -// EFI Internal Shell: https://github.com/tianocore/edk2/blob/a445e1a/ShellPkg/ShellPkg.dec#L59-L60 +// EFI Internal Shell: +// https://github.com/tianocore/edk2/blob/a445e1a/ShellPkg/ShellPkg.dec#L59-L60 // UiApp: // https://github.com/tianocore/edk2/blob/a445e1a/MdeModulePkg/Application/UiApp/UiApp.inf#L13 pub(crate) const EDK2_FIRMWARE_VOL_GUID: &[u8; 16] = &[ @@ -49,9 +54,10 @@ pub(crate) const EDK2_EFI_SHELL_GUID: &[u8; 16] = &[ 0x68, 0xd0, 0xb4, 0xd1, ]; -// The variable namespace `8be4df61-93ca-11d2-aa0d-00e098032b8c` comes from UEFI, as do the -// variable names here. The presentation as `{varname}-{namespace}`, and at a path like -// `/sys/firmware/efi/efivars/`, are both Linux `efivars`-isms. +// The variable namespace `8be4df61-93ca-11d2-aa0d-00e098032b8c` comes from +// UEFI, as do the variable names here. The presentation as +// `{varname}-{namespace}`, and at a path like `/sys/firmware/efi/efivars/`, are +// both Linux `efivars`-isms. // // These tests likely will not pass when run with other guest OSes. pub(crate) const BOOT_CURRENT_VAR: &str = @@ -126,15 +132,17 @@ impl EfiLoadPath { } } +// The `Acpi` fields are not explicitly used (yet), but are useful for `Debug` +// purposes. #[allow(dead_code)] -// The `Acpi` fields are not explicitly used (yet), but are useful for `Debug` purposes. #[derive(Debug, Clone, Copy)] pub(crate) enum DevicePath { Acpi { hid: u32, uid: u32 }, Pci { device: u8, function: u8 }, - // These two are described in sections 8.2 and 8.3 of the UEFI PI spec, respectively. - // Version 1.6 can be found at https://uefi.org/sites/default/files/resources/PI_Spec_1.6.pdf + // These two are described in sections 8.2 and 8.3 of the UEFI PI spec, + // respectively. Version 1.6 can be found at + // https://uefi.org/sites/default/files/resources/PI_Spec_1.6.pdf FirmwareVolume { guid: [u8; 16] }, FirmwareFile { guid: [u8; 16] }, } @@ -238,7 +246,11 @@ impl EfiLoadOption { DevicePath::parse_from(&mut device_path_cursor) .expect("can read device path element"); if !matches!(pci_device, DevicePath::Pci { .. }) { - bail!("expected ACPI Device Path entry to be followed by a PCI Device Path, but was {:?}", pci_device); + bail!( + "expected ACPI Device Path entry to be followed by \ + a PCI Device Path, but was {:?}", + pci_device + ); } EfiLoadPath::Device { acpi_root, pci_device } @@ -247,7 +259,11 @@ impl EfiLoadOption { let file = DevicePath::parse_from(&mut device_path_cursor) .expect("can read device path element"); if !matches!(file, DevicePath::FirmwareFile { .. }) { - bail!("expected Firmware Volume entry to be followed by a Firmware File, but was {:?}", file); + bail!( + "expected Firmware Volume entry to be followed by \ + a Firmware File, but was {:?}", + file + ); } EfiLoadPath::FirmwareFile { volume, file } @@ -257,9 +273,9 @@ impl EfiLoadOption { } }; - // Not strictly necessary, but advance `bytes` by the number of bytes we read from - // `device_path_cursor`. To callers, this keeps it as if we had just been reading `bytes` - // all along. + // Not strictly necessary, but advance `bytes` by the number of bytes we + // read from `device_path_cursor`. To callers, this keeps it as if we + // had just been reading `bytes` all along. bytes.set_position(bytes.position() + device_path_cursor.position()); Ok(EfiLoadOption { description, path: load_path }) @@ -296,8 +312,8 @@ fn unhex(s: &str) -> Vec { res } -/// Read the EFI variable `varname` from inside the VM, and return the data therein as a byte -/// array. +/// Read the EFI variable `varname` from inside the VM, and return the data +/// therein as a byte array. pub(crate) async fn read_efivar( vm: &phd_framework::TestVm, varname: &str, @@ -315,8 +331,8 @@ pub(crate) async fn read_efivar( /// Write the provided bytes to the EFI variable `varname`. /// -/// For Linux guests: variables automatically have their prior attributes prepended. Provide only -/// the variable's data. +/// For Linux guests: variables automatically have their prior attributes +/// prepended. Provide only the variable's data. pub(crate) async fn write_efivar( vm: &phd_framework::TestVm, varname: &str, @@ -329,9 +345,10 @@ pub(crate) async fn write_efivar( let attr_read_bytes = run_long_command(vm, &attr_cmd).await?; let attrs = if attr_read_bytes.ends_with(": No such file or directory") { - // Default attributes if the variable does not exist yet. We expect it to be non-volatile - // because we are writing it, we expect it to be available to boot services (not strictly - // required, but for boot configuration we need it), and we expect it to be available at + // Default attributes if the variable does not exist yet. We expect it + // to be non-volatile because we are writing it, we expect it to be + // available to boot services (not strictly required, but for boot + // configuration we need it), and we expect it to be available at // runtime (e.g. where we are reading and writing it). // // so: @@ -349,7 +366,8 @@ pub(crate) async fn write_efivar( // ``` // printf "\xAA\xAA\xAA\xAA\xDD\xDD\xDD\xDD" > /sys/firmware/efi/efivars/... // ``` - // where AAAAAAAA are the attribute bytes and DDDDDDDD are caller-provided data. + // where AAAAAAAA are the attribute bytes and DDDDDDDD are caller-provided + // data. let escaped: String = new_value.into_iter().fold(String::new(), |mut out, b| { write!(out, "\\x{:02x}", b).expect("can append to String"); @@ -359,7 +377,8 @@ pub(crate) async fn write_efivar( let cmd = format!("printf \"{}\" > {}", escaped, efipath(varname)); let res = run_long_command(vm, &cmd).await?; - // If something went sideways and the write failed with something like `invalid argument`... + // If something went sideways and the write failed with something like + // `invalid argument`... if !res.is_empty() { bail!("writing efi produced unexpected output: {}", res); } @@ -367,14 +386,15 @@ pub(crate) async fn write_efivar( Ok(()) } -/// Learn the boot option numbers associated with various boot options that may or should -/// exist. +/// Learn the boot option numbers associated with various boot options that may +/// or should exist. /// -/// The fundamental wrinkle here is that we don't necessarily know what `Boot####` entries -/// exist, or which numbers they have, because NvVar is handled through persistence in guest -/// disks. This means a guest image may have some prior NvVar state with `Boot####` entries -/// that aren't removed, and cause entries reflecting the current system to have later numbers -/// than a fully blank initial set of variables. +/// The fundamental wrinkle here is that we don't necessarily know what +/// `Boot####` entries exist, or which numbers they have, because NvVar is +/// handled through persistence in guest disks. This means a guest image may +/// have some prior NvVar state with `Boot####` entries that aren't removed, and +/// cause entries reflecting the current system to have later numbers than a +/// fully blank initial set of variables. pub(crate) async fn discover_boot_option_numbers( vm: &phd_framework::TestVm, device_names: &[((u8, u8), &'static str)], @@ -455,11 +475,12 @@ pub(crate) fn find_option_in_boot_order( .map(|(i, _chunk)| i) } -/// Remove the boot option from `vm`'s EFI BootOrder variable. `boot_option_num` is assumed to -/// refer to a boot option named like `format!("Boot{boot_option_num:4X}-*")`. +/// Remove the boot option from `vm`'s EFI BootOrder variable. `boot_option_num` +/// is assumed to refer to a boot option named like +/// `format!("Boot{boot_option_num:4X}-*")`. /// -/// If the boot order was actually modified, returns the index that `boot_option_num` was -/// removed at. +/// If the boot order was actually modified, returns the index that +/// `boot_option_num` was removed at. pub(crate) async fn remove_boot_entry( vm: &phd_framework::TestVm, boot_option_num: u16, @@ -480,9 +501,10 @@ pub(crate) async fn remove_boot_entry( without_option.remove(option_idx * 2); without_option.remove(option_idx * 2); - // Technically it's fine if an option is present multiple times, but typically an option is - // present only once. This function intends to remove all copies of the specified option, - // so assert that we have done so in the new order. + // Technically it's fine if an option is present multiple times, but + // typically an option is present only once. This function intends to remove + // all copies of the specified option, so assert that we have done so in the + // new order. assert_eq!( find_option_in_boot_order(&without_option, boot_option_num), None From c9a522b9f6c9a0fb23fe3ed272ff7445f59d235c Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 25 Sep 2024 02:17:20 +0000 Subject: [PATCH 32/42] dont want to wrap those lines, avoid the problem --- phd-tests/tests/src/boot_order/efi_utils.rs | 32 +++++++++++---------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/phd-tests/tests/src/boot_order/efi_utils.rs b/phd-tests/tests/src/boot_order/efi_utils.rs index 8f979c3c5..8febd0160 100644 --- a/phd-tests/tests/src/boot_order/efi_utils.rs +++ b/phd-tests/tests/src/boot_order/efi_utils.rs @@ -152,13 +152,24 @@ impl DevicePath { let ty = bytes.read_u8()?; let subtype = bytes.read_u8()?; + macro_rules! check_size { + ($desc:expr, $size: expr, $expect:expr) => { + if $size != $expect { + bail!( + "{} size is wrong (was {:#04x}, not {:#04x}", + $desc, + $size, + $expect, + ); + } + } + } + match (ty, subtype) { (2, 1) => { // ACPI Device Path let size = bytes.read_u16::()?; - if size != 0xc { - bail!("ACPI Device Path size is wrong (was {:#04x}, not 0x000c)", size); - } + check_size!("ACPI Device Path", size, 0xc); let hid = bytes.read_u32::().unwrap(); let uid = bytes.read_u32::().unwrap(); Ok(DevicePath::Acpi { hid, uid }) @@ -166,9 +177,7 @@ impl DevicePath { (1, 1) => { // PCI device path let size = bytes.read_u16::()?; - if size != 0x6 { - bail!("PCI Device Path size is wrong (was {:#04x}, not 0x0006)", size); - } + check_size!("PCI Device Path", size, 0x6); let function = bytes.read_u8().unwrap(); let device = bytes.read_u8().unwrap(); Ok(DevicePath::Pci { device, function }) @@ -176,12 +185,7 @@ impl DevicePath { (4, 6) => { // "PIWG Firmware File" aka "Firmware File" in UEFI PI spec let size = bytes.read_u16::()?; - if size != 0x14 { - bail!( - "Firmware File size is wrong (was {:#04x}, not 0x0014)", - size - ); - } + check_size!("Firmware File", size, 0x14); let mut guid = [0u8; 16]; bytes.read_exact(&mut guid)?; Ok(DevicePath::FirmwareFile { guid }) @@ -189,9 +193,7 @@ impl DevicePath { (4, 7) => { // "PIWG Firmware Volume" aka "Firmware Volume" in UEFI PI spec let size = bytes.read_u16::()?; - if size != 0x14 { - bail!("Firmware Volume size is wrong (was {:#04x}, not 0x0014)", size); - } + check_size!("Firmware Volume", size, 0x14); let mut guid = [0u8; 16]; bytes.read_exact(&mut guid)?; Ok(DevicePath::FirmwareVolume { guid }) From bbeafcf6db21957eb7118313757c92c82e404ec5 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 25 Sep 2024 02:17:34 +0000 Subject: [PATCH 33/42] rustfmt --- phd-tests/tests/src/boot_order/efi_utils.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phd-tests/tests/src/boot_order/efi_utils.rs b/phd-tests/tests/src/boot_order/efi_utils.rs index 8febd0160..a57f4e3c4 100644 --- a/phd-tests/tests/src/boot_order/efi_utils.rs +++ b/phd-tests/tests/src/boot_order/efi_utils.rs @@ -162,7 +162,7 @@ impl DevicePath { $expect, ); } - } + }; } match (ty, subtype) { From a636216ef05f304588fd04982d9f658af64cad45 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 25 Sep 2024 02:21:32 +0000 Subject: [PATCH 34/42] rustfmt --- bin/propolis-server/src/lib/spec/builder.rs | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/bin/propolis-server/src/lib/spec/builder.rs b/bin/propolis-server/src/lib/spec/builder.rs index 85d2dbc8b..805b3386e 100644 --- a/bin/propolis-server/src/lib/spec/builder.rs +++ b/bin/propolis-server/src/lib/spec/builder.rs @@ -123,19 +123,15 @@ impl SpecBuilder { use crate::spec::StorageDevice; use crate::spec::{NvmeDisk, VirtioDisk}; - let is_disk = self - .spec - .disks - .values() - .find(|disk| match &disk.device_spec { + let is_disk = + self.spec.disks.values().any(|disk| match &disk.device_spec { StorageDevice::Virtio(VirtioDisk { backend_name, .. }) => { backend_name == item.name.as_str() } StorageDevice::Nvme(NvmeDisk { backend_name, .. }) => { backend_name == item.name.as_str() } - }) - .is_some(); + }); if !is_disk { return Err(SpecBuilderError::BootOptionMissing(item.name.clone())); From ab9743ea22f7e071d48f82c181bd1bcf107d44a7 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 25 Sep 2024 02:33:37 +0000 Subject: [PATCH 35/42] remove unneeded (and not-generally-correct) ubuntu 24.04 adapter --- phd-tests/framework/src/guest_os/mod.rs | 4 --- .../framework/src/guest_os/ubuntu24_04.rs | 28 ------------------- 2 files changed, 32 deletions(-) delete mode 100644 phd-tests/framework/src/guest_os/ubuntu24_04.rs diff --git a/phd-tests/framework/src/guest_os/mod.rs b/phd-tests/framework/src/guest_os/mod.rs index 3d1f094cc..b993bd175 100644 --- a/phd-tests/framework/src/guest_os/mod.rs +++ b/phd-tests/framework/src/guest_os/mod.rs @@ -13,7 +13,6 @@ mod alpine; mod debian11_nocloud; mod shell_commands; mod ubuntu22_04; -mod ubuntu24_04; mod windows; mod windows_server_2016; mod windows_server_2019; @@ -89,7 +88,6 @@ pub enum GuestOsKind { Alpine, Debian11NoCloud, Ubuntu2204, - Ubuntu2404, WindowsServer2016, WindowsServer2019, WindowsServer2022, @@ -103,7 +101,6 @@ impl FromStr for GuestOsKind { "alpine" => Ok(Self::Alpine), "debian11nocloud" => Ok(Self::Debian11NoCloud), "ubuntu2204" => Ok(Self::Ubuntu2204), - "ubuntu2404" => Ok(Self::Ubuntu2404), "windowsserver2016" => Ok(Self::WindowsServer2016), "windowsserver2019" => Ok(Self::WindowsServer2019), "windowsserver2022" => Ok(Self::WindowsServer2022), @@ -122,7 +119,6 @@ pub(super) fn get_guest_os_adapter(kind: GuestOsKind) -> Box { Box::new(debian11_nocloud::Debian11NoCloud) } GuestOsKind::Ubuntu2204 => Box::new(ubuntu22_04::Ubuntu2204), - GuestOsKind::Ubuntu2404 => Box::new(ubuntu24_04::Ubuntu2404), GuestOsKind::WindowsServer2016 => { Box::new(windows_server_2016::WindowsServer2016) } diff --git a/phd-tests/framework/src/guest_os/ubuntu24_04.rs b/phd-tests/framework/src/guest_os/ubuntu24_04.rs deleted file mode 100644 index 40d182b4c..000000000 --- a/phd-tests/framework/src/guest_os/ubuntu24_04.rs +++ /dev/null @@ -1,28 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! Guest OS adaptations for Ubuntu 24.04 images. These must be prepped with -//! a cloud-init disk that is configured with the appropriate user and password. - -use super::{CommandSequence, CommandSequenceEntry, GuestOs}; - -pub(super) struct Ubuntu2404; - -impl GuestOs for Ubuntu2404 { - fn get_login_sequence(&self) -> CommandSequence { - CommandSequence(vec![ - CommandSequenceEntry::wait_for("ubuntu login: "), - CommandSequenceEntry::write_str("root"), - CommandSequenceEntry::wait_for(self.get_shell_prompt()), - ]) - } - - fn get_shell_prompt(&self) -> &'static str { - "root@ubuntu:~#" - } - - fn read_only_fs(&self) -> bool { - false - } -} From 458b7402ff6cd569372e49ae21727e839bb7f483 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 25 Sep 2024 02:43:13 +0000 Subject: [PATCH 36/42] cargo clippy --workspace... --- phd-tests/framework/src/test_vm/config.rs | 4 ++-- phd-tests/tests/src/boot_order.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/phd-tests/framework/src/test_vm/config.rs b/phd-tests/framework/src/test_vm/config.rs index 2bc9fa19e..d1dc5938c 100644 --- a/phd-tests/framework/src/test_vm/config.rs +++ b/phd-tests/framework/src/test_vm/config.rs @@ -220,11 +220,11 @@ impl<'dr> VmConfig<'dr> { let boot_disk = self.disks.iter().find(|d| d.name == "boot-disk") .or_else(|| if let Some(boot_order) = self.boot_order.as_ref() { - boot_order.get(0).and_then(|name| self.disks.iter().find(|d| &d.name == name)) + boot_order.first().and_then(|name| self.disks.iter().find(|d| &d.name == name)) } else { None }) - .or_else(|| self.disks.get(0)) + .or_else(|| self.disks.first()) .expect("VM config includes at least one disk (and maybe a boot order)?"); // XXX: assuming all bootable images are equivalent to the first, or at least the same diff --git a/phd-tests/tests/src/boot_order.rs b/phd-tests/tests/src/boot_order.rs index 7f46c7be3..f797bea4a 100644 --- a/phd-tests/tests/src/boot_order.rs +++ b/phd-tests/tests/src/boot_order.rs @@ -352,7 +352,7 @@ async fn guest_can_adjust_boot_order(ctx: &Framework) { // will not let us perform a useful test. write_efivar(&vm, &bootvar(0xfff0), &boot_option_bytes).await?; let reread = read_efivar(&vm, &bootvar(0xfff0)).await?; - if reread.len() == 0 { + if reread.is_empty() { warn!( "guest environment drops EFI variable writes! \ exiting test WITHOUT VALIDATING ANYTHING" From 6d0077e6c8d661d7f272454376ff861bc9fce34d Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 25 Sep 2024 03:58:49 +0000 Subject: [PATCH 37/42] remove unnecessary warning about EFI partition this test does not rely on an EFI partition existing at all! --- phd-tests/tests/src/boot_order.rs | 9 --------- 1 file changed, 9 deletions(-) diff --git a/phd-tests/tests/src/boot_order.rs b/phd-tests/tests/src/boot_order.rs index f797bea4a..d906abd7d 100644 --- a/phd-tests/tests/src/boot_order.rs +++ b/phd-tests/tests/src/boot_order.rs @@ -454,15 +454,6 @@ async fn boot_order_source_priority(ctx: &Framework) { vm_no_bootorder.launch().await?; vm_no_bootorder.wait_to_boot().await?; - // If the guest doesn't have an EFI partition then there's no way for boot - // order preferences to be persisted. - let mountline = - vm_no_bootorder.run_shell_command("mount | grep /boot/efi").await?; - - if !mountline.contains(" on /boot/efi type vfat") { - warn!("guest doesn't have an EFI partition, cannot manage boot order"); - } - let boot_option_numbers = discover_boot_option_numbers( &vm_no_bootorder, &[ From cee2adcf5d28c69e514b8d1fbfeffc279f39cb24 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 25 Sep 2024 03:59:04 +0000 Subject: [PATCH 38/42] fix migration tests for this non-API-breaking change --- phd-tests/tests/src/migrate.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/phd-tests/tests/src/migrate.rs b/phd-tests/tests/src/migrate.rs index ba4bb9abb..78438be79 100644 --- a/phd-tests/tests/src/migrate.rs +++ b/phd-tests/tests/src/migrate.rs @@ -90,7 +90,16 @@ mod from_base { let mut env = ctx.environment_builder(); env.propolis(artifacts::BASE_PROPOLIS_ARTIFACT); - let cfg = ctx.vm_config_builder(name); + let mut cfg = ctx.vm_config_builder(name); + // TODO: not strictly necessary, but as of #756 PHD began adding a + // `boot_settings` key by default to new instances. This is not + // understood by older Propolis, so migration tests would fail because + // the test changed, rather than a migration issue. + // + // At some point after landing #756, stop clearing the boot order, + // because a newer base Propolis will understand `boot_settings` just + // fine. + cfg.clear_boot_order(); ctx.spawn_vm(&cfg, Some(&env)).await } } From 93e33e46d26b6c074ab1c6ac0f58ed7ce751de09 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 25 Sep 2024 11:35:07 -0700 Subject: [PATCH 39/42] Update phd-tests/framework/src/test_vm/config.rs Co-authored-by: Greg Colombo --- phd-tests/framework/src/test_vm/config.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phd-tests/framework/src/test_vm/config.rs b/phd-tests/framework/src/test_vm/config.rs index d1dc5938c..ca99820c4 100644 --- a/phd-tests/framework/src/test_vm/config.rs +++ b/phd-tests/framework/src/test_vm/config.rs @@ -211,7 +211,7 @@ impl<'dr> VmConfig<'dr> { // // If there are multiple bootable disks in the boot order, we'll assume they're all // the same guest OS kind. So look for `boot-disk` - if there isn't a disk named - // `boot-disk` then fall back to hoping the first disk in the boot orrder is a bootable + // `boot-disk` then fall back to hoping the first disk in the boot order is a bootable // disk, and if *that* isn't a bootable disk, maybe the first disk is. // // TODO: theoretically we might want to accept configuration of a specific guest OS adapter From ba459299fac1e58cbbc88c5b1e589f80d65fbd9a Mon Sep 17 00:00:00 2001 From: iximeow Date: Fri, 27 Sep 2024 12:42:39 +0000 Subject: [PATCH 40/42] make PHD disk name/backend name derivation match propolis-server API propolis-server was correct with respect to the scheme PHD uses to name disks and their backends (user-provided name is the name of the backend, derived name is the name of the disk), but disk requests from `parse_disk_from_request` use the user-provided name as the device name, with the derived name as the backend name. this means that propolis-server is wrong when used with Nexus, and real boot disk names never match against any of the `-backend` backends they were being compared against. this probably makes the whole thing work though! yay! --- bin/propolis-server/src/lib/initializer.rs | 18 ++---------------- bin/propolis-server/src/lib/spec/builder.rs | 19 +++---------------- phd-tests/framework/src/test_vm/config.rs | 10 +++++++--- 3 files changed, 12 insertions(+), 35 deletions(-) diff --git a/bin/propolis-server/src/lib/initializer.rs b/bin/propolis-server/src/lib/initializer.rs index 3db41f3c5..2b6107b18 100644 --- a/bin/propolis-server/src/lib/initializer.rs +++ b/bin/propolis-server/src/lib/initializer.rs @@ -1036,22 +1036,8 @@ impl<'a> MachineInitializer<'a> { for boot_entry in boot_names.iter() { // Theoretically we could support booting from network devices by // matching them here and adding their PCI paths, but exactly what - // would happen is ill-understood and device names are not plumbed - // in a way that supports it. So, only check disks here. - if let Some(spec) = self - .spec - .disks - .iter() - .find(|(_name, spec)| match &spec.device_spec { - StorageDevice::Nvme(disk) => { - disk.backend_name.as_str() == boot_entry.name.as_str() - } - StorageDevice::Virtio(disk) => { - disk.backend_name.as_str() == boot_entry.name.as_str() - } - }) - .map(|(_name, spec)| spec) - { + // would happen is ill-understood. So, only check disks here. + if let Some(spec) = self.spec.disks.get(boot_entry.name.as_str()) { match &spec.device_spec { StorageDevice::Virtio(disk) => { let bdf = parse_bdf(&disk.pci_path)?; diff --git a/bin/propolis-server/src/lib/spec/builder.rs b/bin/propolis-server/src/lib/spec/builder.rs index 805b3386e..3357d8b43 100644 --- a/bin/propolis-server/src/lib/spec/builder.rs +++ b/bin/propolis-server/src/lib/spec/builder.rs @@ -118,25 +118,12 @@ impl SpecBuilder { &mut self, item: &BootOrderEntry, ) -> Result<(), SpecBuilderError> { - let boot_order = self.spec.boot_order.get_or_insert(Vec::new()); - - use crate::spec::StorageDevice; - use crate::spec::{NvmeDisk, VirtioDisk}; - - let is_disk = - self.spec.disks.values().any(|disk| match &disk.device_spec { - StorageDevice::Virtio(VirtioDisk { backend_name, .. }) => { - backend_name == item.name.as_str() - } - StorageDevice::Nvme(NvmeDisk { backend_name, .. }) => { - backend_name == item.name.as_str() - } - }); - - if !is_disk { + if !self.spec.disks.contains_key(item.name.as_str()) { return Err(SpecBuilderError::BootOptionMissing(item.name.clone())); } + let boot_order = self.spec.boot_order.get_or_insert(Vec::new()); + boot_order .push(crate::spec::BootOrderEntry { name: item.name.clone() }); diff --git a/phd-tests/framework/src/test_vm/config.rs b/phd-tests/framework/src/test_vm/config.rs index ca99820c4..2a4b42921 100644 --- a/phd-tests/framework/src/test_vm/config.rs +++ b/phd-tests/framework/src/test_vm/config.rs @@ -257,10 +257,14 @@ impl<'dr> VmConfig<'dr> { // in the correct order: boot disk first, then in the data disks' // iteration order. let all_disks = self.disks.iter().zip(disk_handles.iter()); - for (idx, (req, hdl)) in all_disks.enumerate() { - let device_name = format!("disk-device{}", idx); + for (req, hdl) in all_disks { let pci_path = PciPath::new(0, req.pci_device_num, 0).unwrap(); - let (backend_name, backend_spec) = hdl.backend_spec(); + let (device_name, backend_spec) = hdl.backend_spec(); + // propolis-server uses a disk's name from the API as its device + // name. It then derives a backend name from the disk name. Match + // that name derivation here so PHD tests match the API as used by + // Nexus. + let backend_name = format!("{}-backend", device_name); let device_spec = match req.interface { DiskInterface::Virtio => { StorageDeviceV0::VirtioDisk(VirtioDisk { From 22b5a08ae09a1fb9fd8e7e9d76f9475a71f5b6cc Mon Sep 17 00:00:00 2001 From: iximeow Date: Fri, 27 Sep 2024 13:49:28 +0000 Subject: [PATCH 41/42] review feedback: * a few typos * a few 80col violations * actually enforce that boot devices are on PCI bus 0 * emphasize that propolis-server should never see a Spec with invalid boot disk * vestigial line * phd_skip!() instead of warn!+return Ok --- bin/propolis-server/src/lib/initializer.rs | 26 ++++++++--- lib/propolis-client/src/instance_spec.rs | 14 +++--- phd-tests/framework/src/test_vm/config.rs | 51 +++++++++++++--------- phd-tests/tests/src/boot_order.rs | 6 +-- 4 files changed, 58 insertions(+), 39 deletions(-) diff --git a/bin/propolis-server/src/lib/initializer.rs b/bin/propolis-server/src/lib/initializer.rs index 2b6107b18..c0fd10b49 100644 --- a/bin/propolis-server/src/lib/initializer.rs +++ b/bin/propolis-server/src/lib/initializer.rs @@ -1041,26 +1041,38 @@ impl<'a> MachineInitializer<'a> { match &spec.device_spec { StorageDevice::Virtio(disk) => { let bdf = parse_bdf(&disk.pci_path)?; - // TODO: check that bus is 0. only support boot devices - // directly attached to the root bus for now. + if bdf.bus.get() != 0 { + return Err(Error::new( + ErrorKind::InvalidInput, + "Boot device currently must be on PCI bus 0", + )); + } + order.add_disk(bdf.location); } StorageDevice::Nvme(disk) => { let bdf = parse_bdf(&disk.pci_path)?; - // TODO: check that bus is 0. only support boot devices - // directly attached to the root bus for now. - // + if bdf.bus.get() != 0 { + return Err(Error::new( + ErrorKind::InvalidInput, + "Boot device currently must be on PCI bus 0", + )); + } + // TODO: separately, propolis-standalone passes an eui64 // of 0, so do that here too. is that.. ok? order.add_nvme(bdf.location, 0); } }; } else { + // This should be unreachable - we check that the boot disk is + // valid when constructing the spec we're initializing from. let message = format!( - "Instance spec included boot entry which does not refer to an existing disk: `{}`", + "Instance spec included boot entry which does not refer \ + to an existing disk: `{}`", boot_entry.name.as_str(), ); - return Err(Error::new(ErrorKind::Other, message)); + return Err(Error::new(ErrorKind::InvalidInput, message)); } } diff --git a/lib/propolis-client/src/instance_spec.rs b/lib/propolis-client/src/instance_spec.rs index cb8bbc449..8c75bee98 100644 --- a/lib/propolis-client/src/instance_spec.rs +++ b/lib/propolis-client/src/instance_spec.rs @@ -186,14 +186,16 @@ impl SpecBuilderV0 { /// Sets a boot order. Names here refer to devices included in this spec. /// - /// Permissible to not this if the implicit boot order is desired, but the implicit boot order - /// may be unstable across device addition and removal. + /// Permissible to not set this if the implicit boot order is desired, but + /// the implicit boot order may be unstable across device addition and + /// removal. /// - /// If any devices named in this order are not actually present in the constructed spec, - /// Propolis will return an error when the spec is provided. + /// If any devices named in this order are not actually present in the + /// constructed spec, Propolis will return an error when the spec is + /// provided. /// - /// XXX: this should certainly return `&mut Self` - all the builders here should. check if any - /// of these are chained..? + /// XXX: this should certainly return `&mut Self` - all the builders here + /// should. check if any of these are chained..? pub fn set_boot_order( &mut self, boot_order: Vec, diff --git a/phd-tests/framework/src/test_vm/config.rs b/phd-tests/framework/src/test_vm/config.rs index 2a4b42921..14d92fb32 100644 --- a/phd-tests/framework/src/test_vm/config.rs +++ b/phd-tests/framework/src/test_vm/config.rs @@ -132,10 +132,11 @@ impl<'dr> VmConfig<'dr> { self } - /// Add a new disk to the VM config, and add it to the front of the VM's boot order. + /// Add a new disk to the VM config, and add it to the front of the VM's + /// boot order. /// - /// The added disk will have the name `boot-disk`, and replace the previous existing - /// `boot-disk`. + /// The added disk will have the name `boot-disk`, and replace the previous + /// existing `boot-disk`. pub fn boot_disk( &mut self, artifact: &'dr str, @@ -207,28 +208,37 @@ impl<'dr> VmConfig<'dr> { }) .context("serializing Propolis server config")?; - // The first disk in the boot list might not be the disk a test *actually* expects to boot. + // The first disk in the boot list might not be the disk a test + // *actually* expects to boot. // - // If there are multiple bootable disks in the boot order, we'll assume they're all - // the same guest OS kind. So look for `boot-disk` - if there isn't a disk named - // `boot-disk` then fall back to hoping the first disk in the boot order is a bootable - // disk, and if *that* isn't a bootable disk, maybe the first disk is. + // If there are multiple bootable disks in the boot order, we'll assume + // they're all the same guest OS kind. So look for `boot-disk` - if + // there isn't a disk named `boot-disk` then fall back to hoping the + // first disk in the boot order is a bootable disk, and if *that* isn't + // a bootable disk, maybe the first disk is. // - // TODO: theoretically we might want to accept configuration of a specific guest OS adapter - // and avoid the guessing games. So far the above supports existing tests and makes them - // "Just Work", but a more complicated test may want more control here. - let boot_disk = - self.disks.iter().find(|d| d.name == "boot-disk") - .or_else(|| if let Some(boot_order) = self.boot_order.as_ref() { - boot_order.first().and_then(|name| self.disks.iter().find(|d| &d.name == name)) + // TODO: theoretically we might want to accept configuration of a + // specific guest OS adapter and avoid the guessing games. So far the + // above supports existing tests and makes them "Just Work", but a more + // complicated test may want more control here. + let boot_disk = self + .disks + .iter() + .find(|d| d.name == "boot-disk") + .or_else(|| { + if let Some(boot_order) = self.boot_order.as_ref() { + boot_order.first().and_then(|name| { + self.disks.iter().find(|d| &d.name == name) + }) } else { None - }) - .or_else(|| self.disks.first()) - .expect("VM config includes at least one disk (and maybe a boot order)?"); + } + }) + .or_else(|| self.disks.first()) + .expect("VM config includes at least one disk"); - // XXX: assuming all bootable images are equivalent to the first, or at least the same - // guest OS kind. + // XXX: assuming all bootable images are equivalent to the first, or at + // least the same guest OS kind. let DiskSource::Artifact(boot_artifact) = boot_disk.source else { unreachable!("boot disks always use artifacts as sources"); }; @@ -242,7 +252,6 @@ impl<'dr> VmConfig<'dr> { let mut disk_handles = Vec::new(); for disk in self.disks.iter() { disk_handles.push( - // format!("data-disk-{}", idx) make_disk(disk.name.to_owned(), framework, disk) .await .context("creating disk")?, diff --git a/phd-tests/tests/src/boot_order.rs b/phd-tests/tests/src/boot_order.rs index d906abd7d..569a72458 100644 --- a/phd-tests/tests/src/boot_order.rs +++ b/phd-tests/tests/src/boot_order.rs @@ -353,11 +353,7 @@ async fn guest_can_adjust_boot_order(ctx: &Framework) { write_efivar(&vm, &bootvar(0xfff0), &boot_option_bytes).await?; let reread = read_efivar(&vm, &bootvar(0xfff0)).await?; if reread.is_empty() { - warn!( - "guest environment drops EFI variable writes! \ - exiting test WITHOUT VALIDATING ANYTHING" - ); - return Ok(()); + phd_skip!("Guest environment drops EFI variable writes"); } else { assert_eq!( boot_option_bytes, From 3a3e753ca28a383f48eea33673f339f835a98546 Mon Sep 17 00:00:00 2001 From: iximeow Date: Fri, 27 Sep 2024 23:33:07 +0000 Subject: [PATCH 42/42] use newtypes to delineate DeviceName/BackendName in PHD these newtypes are useful for other kinds of devices, and in propolis-api-types generally, but threading that through is more time than i can spend on this right now --- phd-tests/framework/src/disk/crucible.rs | 31 ++++++++------ phd-tests/framework/src/disk/file.rs | 27 ++++++------ phd-tests/framework/src/disk/in_memory.rs | 24 ++++++----- phd-tests/framework/src/disk/mod.rs | 52 +++++++++++++++++++++-- phd-tests/framework/src/test_vm/config.rs | 29 ++++++------- phd-tests/framework/src/test_vm/spec.rs | 4 +- 6 files changed, 110 insertions(+), 57 deletions(-) diff --git a/phd-tests/framework/src/disk/crucible.rs b/phd-tests/framework/src/disk/crucible.rs index 04d71b595..c8b543450 100644 --- a/phd-tests/framework/src/disk/crucible.rs +++ b/phd-tests/framework/src/disk/crucible.rs @@ -21,7 +21,9 @@ use tracing::{error, info}; use uuid::Uuid; use super::BlockSize; -use crate::{guest_os::GuestOsKind, server_log_mode::ServerLogMode}; +use crate::{ + disk::DeviceName, guest_os::GuestOsKind, server_log_mode::ServerLogMode, +}; /// An RAII wrapper around a directory containing Crucible data files. Deletes /// the directory and its contents when dropped. @@ -60,8 +62,8 @@ impl Drop for Downstairs { /// An RAII wrapper around a Crucible disk. #[derive(Debug)] pub struct CrucibleDisk { - /// The name of the backend to use in instance specs that include this disk. - backend_name: String, + /// The name to use in instance specs that include this disk. + device_name: DeviceName, /// The UUID to insert into this disk's `VolumeConstructionRequest`s. id: Uuid, @@ -97,7 +99,7 @@ impl CrucibleDisk { /// `data_dir`. #[allow(clippy::too_many_arguments)] pub(crate) fn new( - backend_name: String, + device_name: DeviceName, min_disk_size_gib: u64, block_size: BlockSize, downstairs_binary_path: &impl AsRef, @@ -251,7 +253,7 @@ impl CrucibleDisk { } Ok(Self { - backend_name, + device_name, id: disk_uuid, block_size, blocks_per_extent, @@ -280,7 +282,11 @@ impl CrucibleDisk { } impl super::DiskConfig for CrucibleDisk { - fn backend_spec(&self) -> (String, StorageBackendV0) { + fn device_name(&self) -> &DeviceName { + &self.device_name + } + + fn backend_spec(&self) -> StorageBackendV0 { let gen = self.generation.load(Ordering::Relaxed); let downstairs_addrs = self .downstairs_instances @@ -318,14 +324,11 @@ impl super::DiskConfig for CrucibleDisk { }), }; - ( - self.backend_name.clone(), - StorageBackendV0::Crucible(CrucibleStorageBackend { - request_json: serde_json::to_string(&vcr) - .expect("VolumeConstructionRequest should serialize"), - readonly: false, - }), - ) + StorageBackendV0::Crucible(CrucibleStorageBackend { + request_json: serde_json::to_string(&vcr) + .expect("VolumeConstructionRequest should serialize"), + readonly: false, + }) } fn guest_os(&self) -> Option { diff --git a/phd-tests/framework/src/disk/file.rs b/phd-tests/framework/src/disk/file.rs index 76eb409b6..3fbf05308 100644 --- a/phd-tests/framework/src/disk/file.rs +++ b/phd-tests/framework/src/disk/file.rs @@ -9,7 +9,9 @@ use propolis_client::types::{FileStorageBackend, StorageBackendV0}; use tracing::{debug, error, warn}; use uuid::Uuid; -use crate::{guest_os::GuestOsKind, zfs::ClonedFile as ZfsClonedFile}; +use crate::{ + disk::DeviceName, guest_os::GuestOsKind, zfs::ClonedFile as ZfsClonedFile, +}; /// Describes the method used to create the backing file for a file-backed disk. #[derive(Debug)] @@ -80,7 +82,7 @@ impl Drop for BackingFile { #[derive(Debug)] pub struct FileBackedDisk { /// The name to use for instance spec backends that refer to this disk. - backend_name: String, + device_name: DeviceName, /// The backing file for this disk. file: BackingFile, @@ -94,7 +96,7 @@ impl FileBackedDisk { /// Creates a new file-backed disk whose initial contents are copied from /// the specified artifact on the host file system. pub(crate) fn new_from_artifact( - backend_name: String, + device_name: DeviceName, artifact_path: &impl AsRef, data_dir: &impl AsRef, guest_os: Option, @@ -116,19 +118,20 @@ impl FileBackedDisk { permissions.set_readonly(false); disk_file.set_permissions(permissions)?; - Ok(Self { backend_name, file: artifact, guest_os }) + Ok(Self { device_name, file: artifact, guest_os }) } } impl super::DiskConfig for FileBackedDisk { - fn backend_spec(&self) -> (String, StorageBackendV0) { - ( - self.backend_name.clone(), - StorageBackendV0::File(FileStorageBackend { - path: self.file.path().to_string(), - readonly: false, - }), - ) + fn device_name(&self) -> &DeviceName { + &self.device_name + } + + fn backend_spec(&self) -> StorageBackendV0 { + StorageBackendV0::File(FileStorageBackend { + path: self.file.path().to_string(), + readonly: false, + }) } fn guest_os(&self) -> Option { diff --git a/phd-tests/framework/src/disk/in_memory.rs b/phd-tests/framework/src/disk/in_memory.rs index 51a57f680..5300c4f68 100644 --- a/phd-tests/framework/src/disk/in_memory.rs +++ b/phd-tests/framework/src/disk/in_memory.rs @@ -7,11 +7,12 @@ use propolis_client::types::{BlobStorageBackend, StorageBackendV0}; use super::DiskConfig; +use crate::disk::DeviceName; /// A disk with an in-memory backend. #[derive(Debug)] pub struct InMemoryDisk { - backend_name: String, + device_name: DeviceName, contents: Vec, readonly: bool, } @@ -20,28 +21,29 @@ impl InMemoryDisk { /// Creates an in-memory backend that will present the supplied `contents` /// to the guest. pub fn new( - backend_name: String, + device_name: DeviceName, contents: Vec, readonly: bool, ) -> Self { - Self { backend_name, contents, readonly } + Self { device_name, contents, readonly } } } impl DiskConfig for InMemoryDisk { - fn backend_spec(&self) -> (String, StorageBackendV0) { + fn device_name(&self) -> &DeviceName { + &self.device_name + } + + fn backend_spec(&self) -> StorageBackendV0 { let base64 = base64::Engine::encode( &base64::engine::general_purpose::STANDARD, self.contents.as_slice(), ); - ( - self.backend_name.clone(), - StorageBackendV0::Blob(BlobStorageBackend { - base64, - readonly: self.readonly, - }), - ) + StorageBackendV0::Blob(BlobStorageBackend { + base64, + readonly: self.readonly, + }) } fn guest_os(&self) -> Option { diff --git a/phd-tests/framework/src/disk/mod.rs b/phd-tests/framework/src/disk/mod.rs index e3803e3da..35ec84af4 100644 --- a/phd-tests/framework/src/disk/mod.rs +++ b/phd-tests/framework/src/disk/mod.rs @@ -67,10 +67,54 @@ impl BlockSize { } } +/// The name for the device implementing a disk. This is the name provided for a +/// disk in constructing a VM spec for PHD tests. The disk by this name likely +/// also has a [`BackendName`] derived from this device name. +/// +/// TODO: This exists largely to ensure that PHD matches the same spec +/// construction conventions as `propolis-server` when handling API requests: it +/// is another piece of glue that could reasonably be deleted if/when PHD and +/// sled-agent use the same code to build InstanceEnsureRequest. Until then, +/// carefully match the relationship between names with these newtypes. +/// +/// Alternatively, `DeviceName` and `BackendName` could be pulled into +/// `propolis-api-types`. +#[derive(Clone, Debug)] +pub struct DeviceName(String); + +impl DeviceName { + pub fn new(name: String) -> Self { + DeviceName(name) + } + + pub fn into_backend_name(self) -> BackendName { + // This must match `api_request.rs`' `parse_disk_from_request`. + BackendName(format!("{}-backend", self.0)) + } + + pub fn into_string(self) -> String { + self.0 + } +} + +/// The name for a backend implementing storage for a disk. This is derived +/// exclusively from a corresponding [`DeviceName`]. +#[derive(Clone, Debug)] +pub struct BackendName(String); + +impl BackendName { + pub fn into_string(self) -> String { + self.0 + } +} + /// A trait for functions exposed by all disk backends (files, Crucible, etc.). pub trait DiskConfig: std::fmt::Debug + Send + Sync { + /// Yields the device name for this disk. + fn device_name(&self) -> &DeviceName; + /// Yields the backend spec for this disk's storage backend. - fn backend_spec(&self) -> (String, StorageBackendV0); + fn backend_spec(&self) -> StorageBackendV0; /// Yields the guest OS kind of the guest image the disk was created from, /// or `None` if the disk was not created from a guest image. @@ -182,7 +226,7 @@ impl DiskFactory { /// by `source`. pub(crate) async fn create_file_backed_disk<'d>( &self, - name: String, + name: DeviceName, source: &DiskSource<'d>, ) -> Result, DiskError> { let artifact_name = match source { @@ -226,7 +270,7 @@ impl DiskFactory { /// - block_size: The disk's block size. pub(crate) async fn create_crucible_disk<'d>( &self, - name: String, + name: DeviceName, source: &DiskSource<'d>, mut min_disk_size_gib: u64, block_size: BlockSize, @@ -278,7 +322,7 @@ impl DiskFactory { pub(crate) async fn create_in_memory_disk<'d>( &self, - name: String, + name: DeviceName, source: &DiskSource<'d>, readonly: bool, ) -> Result, DiskError> { diff --git a/phd-tests/framework/src/test_vm/config.rs b/phd-tests/framework/src/test_vm/config.rs index 14d92fb32..ddfdb42f8 100644 --- a/phd-tests/framework/src/test_vm/config.rs +++ b/phd-tests/framework/src/test_vm/config.rs @@ -16,7 +16,7 @@ use propolis_client::{ use uuid::Uuid; use crate::{ - disk::{DiskConfig, DiskSource}, + disk::{DeviceName, DiskConfig, DiskSource}, test_vm::spec::VmSpec, Framework, }; @@ -268,30 +268,27 @@ impl<'dr> VmConfig<'dr> { let all_disks = self.disks.iter().zip(disk_handles.iter()); for (req, hdl) in all_disks { let pci_path = PciPath::new(0, req.pci_device_num, 0).unwrap(); - let (device_name, backend_spec) = hdl.backend_spec(); - // propolis-server uses a disk's name from the API as its device - // name. It then derives a backend name from the disk name. Match - // that name derivation here so PHD tests match the API as used by - // Nexus. - let backend_name = format!("{}-backend", device_name); + let backend_spec = hdl.backend_spec(); + let device_name = hdl.device_name().clone(); + let backend_name = device_name.clone().into_backend_name(); let device_spec = match req.interface { DiskInterface::Virtio => { StorageDeviceV0::VirtioDisk(VirtioDisk { - backend_name: backend_name.clone(), + backend_name: backend_name.clone().into_string(), pci_path, }) } DiskInterface::Nvme => StorageDeviceV0::NvmeDisk(NvmeDisk { - backend_name: backend_name.clone(), + backend_name: backend_name.clone().into_string(), pci_path, }), }; spec_builder .add_storage_device( - device_name, + device_name.into_string(), device_spec, - backend_name, + backend_name.into_string(), backend_spec, ) .context("adding storage device to spec")?; @@ -334,14 +331,16 @@ impl<'dr> VmConfig<'dr> { } async fn make_disk<'req>( - name: String, + device_name: String, framework: &Framework, req: &DiskRequest<'req>, ) -> anyhow::Result> { + let device_name = DeviceName::new(device_name); + Ok(match req.backend { DiskBackend::File => framework .disk_factory - .create_file_backed_disk(name, &req.source) + .create_file_backed_disk(device_name, &req.source) .await .with_context(|| { format!("creating new file-backed disk from {:?}", req.source,) @@ -349,7 +348,7 @@ async fn make_disk<'req>( DiskBackend::Crucible { min_disk_size_gib, block_size } => framework .disk_factory .create_crucible_disk( - name, + device_name, &req.source, min_disk_size_gib, block_size, @@ -364,7 +363,7 @@ async fn make_disk<'req>( as Arc, DiskBackend::InMemory { readonly } => framework .disk_factory - .create_in_memory_disk(name, &req.source, readonly) + .create_in_memory_disk(device_name, &req.source, readonly) .await .with_context(|| { format!("creating new in-memory disk from {:?}", req.source) diff --git a/phd-tests/framework/src/test_vm/spec.rs b/phd-tests/framework/src/test_vm/spec.rs index 841a05b7d..0d25dc271 100644 --- a/phd-tests/framework/src/test_vm/spec.rs +++ b/phd-tests/framework/src/test_vm/spec.rs @@ -46,7 +46,9 @@ impl VmSpec { continue; }; - let (backend_name, backend_spec) = disk.backend_spec(); + let backend_spec = disk.backend_spec(); + let backend_name = + disk.device_name().clone().into_backend_name().into_string(); match self .instance_spec .backends