Skip to content

Commit 11371b0

Browse files
authored
teach propolis-server to understand configurable boot order (#756)
while we accept a list of boot options here, note that Nexus intentionally has a more reductive view, maintaining only a single boot_disk. this is heavily based on what i've learned writing tests for how OVMF+guests behave with different mixes of boot device orders and sources of boot configuration. this conversation on RFD 470: oxidecomputer/rfd#751 (comment) gets at the crux of it - we don't have like a platform NVRAM device for guests to persist EFI variables in, so boot configuration is persisted in the EFI system partition (so, associated with a disk, not an instance). because we're not going to go modify the EFI system partition in user disks to effect boot order preferences, we're using the QEMU-style fw_cfg mechanism to indicate boot order. this, in turn, means that guest settings are blown away and replaced with what OVMF determines at boot time. taken together, this isn't an ideal spot to be in if we were to support more complex boot behavior logic, and it's probably not how we want to work multi-device orderings into propolis-server. it also means that guest-managed boot ordering just don't reliably persist if the instance is configured with a specific boot device! in the future with managed nonvolatile storage for guest firmware to persist settings in, we'll be in a somewhat better position to extend boot logic, and so this PR should leave us able to do so without much API hassle. that will probably be more interesting on the control plane side, which is why i'm permissive on the Propolis side, but restrictive on the Nexus side. along the way, this change brings PHD device/backend names in line with propolis-server.
1 parent fae5334 commit 11371b0

File tree

27 files changed

+1555
-99
lines changed

27 files changed

+1555
-99
lines changed

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

bin/propolis-cli/src/main.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,7 @@ async fn new_instance(
266266
// TODO: Allow specifying NICs
267267
nics: vec![],
268268
disks,
269+
boot_settings: None,
269270
migrate: None,
270271
cloud_init_bytes,
271272
};
@@ -517,6 +518,9 @@ async fn migrate_instance(
517518
// TODO: Handle migrating NICs
518519
nics: vec![],
519520
disks,
521+
// TODO: Handle retaining boot settings? Or extant boot settings
522+
// forwarded along outside InstanceEnsure anyway.
523+
boot_settings: None,
520524
migrate: Some(InstanceMigrateInitiateRequest {
521525
migration_id: Uuid::new_v4(),
522526
src_addr: src_addr.to_string(),

bin/propolis-server/src/lib/initializer.rs

Lines changed: 84 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use std::sync::Arc;
1111
use std::time::{SystemTime, UNIX_EPOCH};
1212

1313
use crate::serial::Serial;
14-
use crate::spec::{self, Spec, StorageBackend};
14+
use crate::spec::{self, Spec, StorageBackend, StorageDevice};
1515
use crate::stats::{
1616
track_network_interface_kstats, track_vcpu_kstats, VirtualDiskProducer,
1717
VirtualMachine,
@@ -34,7 +34,11 @@ use propolis::hw::ibmpc;
3434
use propolis::hw::pci;
3535
use propolis::hw::ps2::ctrl::PS2Ctrl;
3636
use propolis::hw::qemu::pvpanic::QemuPvpanic;
37-
use propolis::hw::qemu::{debug::QemuDebugPort, fwcfg, ramfb};
37+
use propolis::hw::qemu::{
38+
debug::QemuDebugPort,
39+
fwcfg::{self, Entry},
40+
ramfb,
41+
};
3842
use propolis::hw::uart::LpcUart;
3943
use propolis::hw::{nvme, virtio};
4044
use propolis::intr_pins;
@@ -1001,6 +1005,80 @@ impl<'a> MachineInitializer<'a> {
10011005
smb_tables.commit()
10021006
}
10031007

1008+
fn generate_bootorder(&self) -> Result<Option<Entry>, Error> {
1009+
info!(
1010+
self.log,
1011+
"Generating bootorder with order: {:?}",
1012+
self.spec.boot_order.as_ref()
1013+
);
1014+
let Some(boot_names) = self.spec.boot_order.as_ref() else {
1015+
return Ok(None);
1016+
};
1017+
1018+
let mut order = fwcfg::formats::BootOrder::new();
1019+
1020+
let parse_bdf =
1021+
|pci_path: &propolis_api_types::instance_spec::PciPath| {
1022+
let bdf: Result<pci::Bdf, Error> =
1023+
pci_path.to_owned().try_into().map_err(|e| {
1024+
Error::new(
1025+
ErrorKind::InvalidInput,
1026+
format!(
1027+
"Couldn't get PCI BDF for {}: {}",
1028+
pci_path, e
1029+
),
1030+
)
1031+
});
1032+
1033+
bdf
1034+
};
1035+
1036+
for boot_entry in boot_names.iter() {
1037+
// Theoretically we could support booting from network devices by
1038+
// matching them here and adding their PCI paths, but exactly what
1039+
// would happen is ill-understood. So, only check disks here.
1040+
if let Some(spec) = self.spec.disks.get(boot_entry.name.as_str()) {
1041+
match &spec.device_spec {
1042+
StorageDevice::Virtio(disk) => {
1043+
let bdf = parse_bdf(&disk.pci_path)?;
1044+
if bdf.bus.get() != 0 {
1045+
return Err(Error::new(
1046+
ErrorKind::InvalidInput,
1047+
"Boot device currently must be on PCI bus 0",
1048+
));
1049+
}
1050+
1051+
order.add_disk(bdf.location);
1052+
}
1053+
StorageDevice::Nvme(disk) => {
1054+
let bdf = parse_bdf(&disk.pci_path)?;
1055+
if bdf.bus.get() != 0 {
1056+
return Err(Error::new(
1057+
ErrorKind::InvalidInput,
1058+
"Boot device currently must be on PCI bus 0",
1059+
));
1060+
}
1061+
1062+
// TODO: separately, propolis-standalone passes an eui64
1063+
// of 0, so do that here too. is that.. ok?
1064+
order.add_nvme(bdf.location, 0);
1065+
}
1066+
};
1067+
} else {
1068+
// This should be unreachable - we check that the boot disk is
1069+
// valid when constructing the spec we're initializing from.
1070+
let message = format!(
1071+
"Instance spec included boot entry which does not refer \
1072+
to an existing disk: `{}`",
1073+
boot_entry.name.as_str(),
1074+
);
1075+
return Err(Error::new(ErrorKind::InvalidInput, message));
1076+
}
1077+
}
1078+
1079+
Ok(Some(order.finish()))
1080+
}
1081+
10041082
/// Initialize qemu `fw_cfg` device, and populate it with data including CPU
10051083
/// count, SMBIOS tables, and attached RAM-FB device.
10061084
///
@@ -1032,6 +1110,10 @@ impl<'a> MachineInitializer<'a> {
10321110
)
10331111
.unwrap();
10341112

1113+
if let Some(boot_order) = self.generate_bootorder()? {
1114+
fwcfg.insert_named("bootorder", boot_order).unwrap();
1115+
}
1116+
10351117
let ramfb = ramfb::RamFb::create(
10361118
self.log.new(slog::o!("component" => "ramfb")),
10371119
);

bin/propolis-server/src/lib/server.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,12 @@ fn instance_spec_from_request(
129129
spec_builder.add_disk_from_request(disk)?;
130130
}
131131

132+
if let Some(boot_settings) = request.boot_settings.as_ref() {
133+
for item in boot_settings.order.iter() {
134+
spec_builder.add_boot_option(item)?;
135+
}
136+
}
137+
132138
if let Some(base64) = &request.cloud_init_bytes {
133139
spec_builder.add_cloud_init_from_request(base64.clone())?;
134140
}

bin/propolis-server/src/lib/spec/api_spec_v0.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,12 @@ impl TryFrom<InstanceSpecV0> for Spec {
247247
return Err(ApiSpecError::BackendNotUsed(backend.to_owned()));
248248
}
249249

250+
if let Some(boot_settings) = value.devices.boot_settings.as_ref() {
251+
for item in boot_settings.order.iter() {
252+
builder.add_boot_option(item)?;
253+
}
254+
}
255+
250256
for (name, serial_port) in value.devices.serial_ports {
251257
builder.add_serial_port(name, serial_port.num)?;
252258
}

bin/propolis-server/src/lib/spec/builder.rs

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ use propolis_api_types::{
1414
},
1515
PciPath,
1616
},
17-
DiskRequest, InstanceProperties, NetworkInterfaceRequest,
17+
BootOrderEntry, DiskRequest, InstanceProperties, NetworkInterfaceRequest,
1818
};
1919
use thiserror::Error;
2020

@@ -57,6 +57,9 @@ pub(crate) enum SpecBuilderError {
5757

5858
#[error("pvpanic device already specified")]
5959
PvpanicInUse,
60+
61+
#[error("Boot option {0} is not an attached device")]
62+
BootOptionMissing(String),
6063
}
6164

6265
#[derive(Debug, Default)]
@@ -110,6 +113,23 @@ impl SpecBuilder {
110113
Ok(())
111114
}
112115

116+
/// Add a boot option to the boot option list of the spec under construction.
117+
pub fn add_boot_option(
118+
&mut self,
119+
item: &BootOrderEntry,
120+
) -> Result<(), SpecBuilderError> {
121+
if !self.spec.disks.contains_key(item.name.as_str()) {
122+
return Err(SpecBuilderError::BootOptionMissing(item.name.clone()));
123+
}
124+
125+
let boot_order = self.spec.boot_order.get_or_insert(Vec::new());
126+
127+
boot_order
128+
.push(crate::spec::BootOrderEntry { name: item.name.clone() });
129+
130+
Ok(())
131+
}
132+
113133
/// Converts an HTTP API request to add a cloud-init disk to an instance
114134
/// into device/backend entries in the spec under construction.
115135
pub fn add_cloud_init_from_request(

bin/propolis-server/src/lib/spec/mod.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ pub(crate) struct Spec {
5757
pub board: Board,
5858
pub disks: HashMap<String, Disk>,
5959
pub nics: HashMap<String, Nic>,
60+
pub boot_order: Option<Vec<BootOrderEntry>>,
6061

6162
pub serial: HashMap<String, SerialPort>,
6263

@@ -67,6 +68,11 @@ pub(crate) struct Spec {
6768
pub softnpu: SoftNpu,
6869
}
6970

71+
#[derive(Clone, Debug, Default)]
72+
pub(crate) struct BootOrderEntry {
73+
pub name: String,
74+
}
75+
7076
/// Describes the device half of a [`Disk`].
7177
#[derive(Clone, Debug)]
7278
pub enum StorageDevice {

bin/propolis-standalone/src/main.rs

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -959,8 +959,19 @@ fn generate_smbios(params: SmbiosParams) -> anyhow::Result<smbios::TableBytes> {
959959
Ok(smb_tables.commit())
960960
}
961961

962-
fn generate_bootorder(config: &config::Config) -> anyhow::Result<fwcfg::Entry> {
963-
let names = config.main.boot_order.as_ref().unwrap();
962+
fn generate_bootorder(
963+
config: &config::Config,
964+
log: &slog::Logger,
965+
) -> anyhow::Result<Option<fwcfg::Entry>> {
966+
let Some(names) = config.main.boot_order.as_ref() else {
967+
return Ok(None);
968+
};
969+
970+
slog::info!(
971+
log,
972+
"Bootorder declared as {:?}",
973+
config.main.boot_order.as_ref()
974+
);
964975

965976
let mut order = fwcfg::formats::BootOrder::new();
966977
for name in names.iter() {
@@ -994,7 +1005,7 @@ fn generate_bootorder(config: &config::Config) -> anyhow::Result<fwcfg::Entry> {
9941005
}
9951006
}
9961007
}
997-
Ok(order.finish())
1008+
Ok(Some(order.finish()))
9981009
}
9991010

10001011
fn setup_instance(
@@ -1306,14 +1317,10 @@ fn setup_instance(
13061317

13071318
// It is "safe" to generate bootorder (if requested) now, given that PCI
13081319
// device configuration has been validated by preceding logic
1309-
if config.main.boot_order.is_some() {
1310-
fwcfg
1311-
.insert_named(
1312-
"bootorder",
1313-
generate_bootorder(&config)
1314-
.context("Failed to generate boot order")?,
1315-
)
1316-
.unwrap();
1320+
if let Some(boot_config) = generate_bootorder(&config, log)
1321+
.context("Failed to generate boot order")?
1322+
{
1323+
fwcfg.insert_named("bootorder", boot_config).unwrap();
13171324
}
13181325

13191326
fwcfg.attach(pio, &machine.acc_mem);

crates/propolis-api-types/src/instance_spec/v0.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,11 @@ pub struct DeviceSpecV0 {
6161
#[serde(skip_serializing_if = "Option::is_none")]
6262
pub qemu_pvpanic: Option<components::devices::QemuPvpanic>,
6363

64+
// Same backwards compatibility reasoning as above.
65+
#[serde(default)]
66+
#[serde(skip_serializing_if = "Option::is_none")]
67+
pub boot_settings: Option<crate::BootSettings>,
68+
6469
#[cfg(feature = "falcon")]
6570
pub softnpu_pci_port: Option<components::devices::SoftNpuPciPort>,
6671
#[cfg(feature = "falcon")]

crates/propolis-api-types/src/lib.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ pub struct InstanceEnsureRequest {
5252
#[serde(default)]
5353
pub disks: Vec<DiskRequest>,
5454

55+
#[serde(default)]
56+
pub boot_settings: Option<BootSettings>,
57+
5558
pub migrate: Option<InstanceMigrateInitiateRequest>,
5659

5760
// base64 encoded cloud-init ISO
@@ -385,6 +388,18 @@ pub struct DiskAttachment {
385388
pub state: DiskAttachmentState,
386389
}
387390

391+
#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)]
392+
pub struct BootSettings {
393+
pub order: Vec<BootOrderEntry>,
394+
}
395+
396+
/// An entry in a list of boot options.
397+
#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)]
398+
pub struct BootOrderEntry {
399+
/// The name of the device to attempt booting from.
400+
pub name: String,
401+
}
402+
388403
/// A stable index which is translated by Propolis
389404
/// into a PCI BDF, visible to the guest.
390405
#[derive(Copy, Clone, Debug, Deserialize, Serialize, JsonSchema)]

0 commit comments

Comments
 (0)