From 5c9e4c715294ea5e80ea84e5986d1aa79099f9bd Mon Sep 17 00:00:00 2001 From: Jack Thomson Date: Wed, 22 Oct 2025 11:07:57 +0000 Subject: [PATCH 1/8] virtio-balloon: Add free page reporting Free page reporting is a mechanism in which the guest will notify the host of pages which are not currently in use. This feature can only be configured on boot and will continue to report continuously. With free page reporting firecracker will `MADV_DONTNEED` on the ranges reported. This allows the host to free up memory and reduce the RSS of the VM. With UFFD this is sent as the `UFFD_EVENT_REMOVE` after the call with `MADV_DONTNEED`. Signed-off-by: Jack Thomson --- src/vmm/src/builder.rs | 1 + src/vmm/src/device_manager/pci_mngr.rs | 4 +- src/vmm/src/device_manager/persist.rs | 4 +- src/vmm/src/devices/virtio/balloon/device.rs | 109 +++++++++++++++--- .../devices/virtio/balloon/event_handler.rs | 19 ++- src/vmm/src/devices/virtio/balloon/mod.rs | 16 ++- src/vmm/src/devices/virtio/balloon/persist.rs | 17 ++- src/vmm/src/persist.rs | 1 + src/vmm/src/resources.rs | 4 +- src/vmm/src/vmm_config/balloon.rs | 11 +- .../integration_tests/functional/test_api.py | 2 + 11 files changed, 157 insertions(+), 31 deletions(-) diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 0f9ef70813e..afe385f3dc2 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -1231,6 +1231,7 @@ pub(crate) mod tests { amount_mib: 0, deflate_on_oom: false, stats_polling_interval_s: 0, + free_page_reporting: false, }; let mut cmdline = default_kernel_cmdline(); diff --git a/src/vmm/src/device_manager/pci_mngr.rs b/src/vmm/src/device_manager/pci_mngr.rs index bf5252d2d17..9b7173eec33 100644 --- a/src/vmm/src/device_manager/pci_mngr.rs +++ b/src/vmm/src/device_manager/pci_mngr.rs @@ -645,6 +645,7 @@ mod tests { amount_mib: 123, deflate_on_oom: false, stats_polling_interval_s: 1, + free_page_reporting: false, }; insert_balloon_device(&mut vmm, &mut cmdline, &mut event_manager, balloon_cfg); // Add a block device. @@ -728,7 +729,8 @@ mod tests { "balloon": {{ "amount_mib": 123, "deflate_on_oom": false, - "stats_polling_interval_s": 1 + "stats_polling_interval_s": 1, + "free_page_reporting": false }}, "drives": [ {{ diff --git a/src/vmm/src/device_manager/persist.rs b/src/vmm/src/device_manager/persist.rs index 7616f252658..6741de7753b 100644 --- a/src/vmm/src/device_manager/persist.rs +++ b/src/vmm/src/device_manager/persist.rs @@ -669,6 +669,7 @@ mod tests { amount_mib: 123, deflate_on_oom: false, stats_polling_interval_s: 1, + free_page_reporting: false, }; insert_balloon_device(&mut vmm, &mut cmdline, &mut event_manager, balloon_cfg); // Add a block device. @@ -748,7 +749,8 @@ mod tests { "balloon": {{ "amount_mib": 123, "deflate_on_oom": false, - "stats_polling_interval_s": 1 + "stats_polling_interval_s": 1, + "free_page_reporting": false }}, "drives": [ {{ diff --git a/src/vmm/src/devices/virtio/balloon/device.rs b/src/vmm/src/devices/virtio/balloon/device.rs index f0ecf77bc9e..62c9c7e0a2e 100644 --- a/src/vmm/src/devices/virtio/balloon/device.rs +++ b/src/vmm/src/devices/virtio/balloon/device.rs @@ -24,7 +24,7 @@ use super::{ VIRTIO_BALLOON_S_MEMTOT, VIRTIO_BALLOON_S_MINFLT, VIRTIO_BALLOON_S_SWAP_IN, VIRTIO_BALLOON_S_SWAP_OUT, }; -use crate::devices::virtio::balloon::BalloonError; +use crate::devices::virtio::balloon::{BalloonError, VIRTIO_BALLOON_F_FREE_PAGE_REPORTING}; use crate::devices::virtio::device::ActiveState; use crate::devices::virtio::generated::virtio_config::VIRTIO_F_VERSION_1; use crate::devices::virtio::generated::virtio_ids::VIRTIO_ID_BALLOON; @@ -83,6 +83,9 @@ pub struct BalloonConfig { pub deflate_on_oom: bool, /// Interval of time in seconds at which the balloon statistics are updated. pub stats_polling_interval_s: u16, + /// Free page reporting enabled + #[serde(default)] + pub free_page_reporting: bool, } /// BalloonStats holds statistics returned from the stats_queue. @@ -189,6 +192,7 @@ impl Balloon { amount_mib: u32, deflate_on_oom: bool, stats_polling_interval_s: u16, + free_page_reporting: bool, ) -> Result { let mut avail_features = 1u64 << VIRTIO_F_VERSION_1; @@ -204,16 +208,26 @@ impl Balloon { EventFd::new(libc::EFD_NONBLOCK).map_err(BalloonError::EventFd)?, EventFd::new(libc::EFD_NONBLOCK).map_err(BalloonError::EventFd)?, EventFd::new(libc::EFD_NONBLOCK).map_err(BalloonError::EventFd)?, + EventFd::new(libc::EFD_NONBLOCK).map_err(BalloonError::EventFd)?, ]; let mut queues: Vec = BALLOON_QUEUE_SIZES.iter().map(|&s| Queue::new(s)).collect(); // The VirtIO specification states that the statistics queue should // not be present at all if the statistics are not enabled. + let mut dropped_queue_count = 0; if stats_polling_interval_s == 0 { - let _ = queues.remove(STATS_INDEX); + dropped_queue_count += 1; + } + + if free_page_reporting { + avail_features |= 1u64 << VIRTIO_BALLOON_F_FREE_PAGE_REPORTING; + } else { + dropped_queue_count += 1; } + queues.truncate(queues.len() - dropped_queue_count); + let stats_timer = TimerFd::new_custom(ClockId::Monotonic, true, true).map_err(BalloonError::Timer)?; @@ -262,9 +276,20 @@ impl Balloon { self.trigger_stats_update() } + pub(crate) fn process_free_page_reporting_queue_event(&mut self) -> Result<(), BalloonError> { + self.queue_evts[self.free_page_reporting_idx()] + .read() + .map_err(BalloonError::EventFd)?; + self.process_free_page_reporting_queue() + } + pub(crate) fn process_inflate(&mut self) -> Result<(), BalloonError> { // This is safe since we checked in the event handler that the device is activated. - let mem = &self.device_state.active_state().unwrap().mem; + let mem = &self + .device_state + .active_state() + .ok_or(BalloonError::DeviceNotActive)? + .mem; METRICS.inflate_count.inc(); let queue = &mut self.queues[INFLATE_INDEX]; @@ -406,6 +431,37 @@ impl Balloon { Ok(()) } + pub(crate) fn process_free_page_reporting_queue(&mut self) -> Result<(), BalloonError> { + let mem = &self.device_state.active_state().unwrap().mem; + + let idx = self.free_page_reporting_idx(); + let queue = &mut self.queues[idx]; + let mut needs_interrupt = false; + + while let Some(head) = queue.pop()? { + let head_index = head.index; + + let mut last_desc = Some(head); + while let Some(desc) = last_desc { + if let Err(err) = mem.discard_range(desc.addr, desc.len as usize) { + error!("balloon: failed to remove range: {err:?}"); + } + last_desc = desc.next_descriptor(); + } + + queue.add_used(head.index, 0)?; + needs_interrupt = true; + } + + queue.advance_used_ring_idx(); + + if needs_interrupt { + self.signal_used_queue(idx)?; + } + + Ok(()) + } + pub(crate) fn signal_used_queue(&self, qidx: usize) -> Result<(), BalloonError> { self.interrupt_trigger() .trigger(VirtioInterruptType::Queue( @@ -427,6 +483,13 @@ impl Balloon { return Err(err); } + if self.free_page_reporting() + && let Err(BalloonError::InvalidAvailIdx(err)) = + self.process_free_page_reporting_queue() + { + return Err(err); + } + Ok(()) } @@ -466,6 +529,20 @@ impl Balloon { } } + pub fn free_page_reporting(&self) -> bool { + self.avail_features & (1u64 << VIRTIO_BALLOON_F_FREE_PAGE_REPORTING) != 0 + } + + pub fn free_page_reporting_idx(&self) -> usize { + let mut idx = STATS_INDEX; + + if self.stats_polling_interval_s > 0 { + idx += 1; + } + + idx + } + /// Update the statistics polling interval. pub fn update_stats_polling_interval(&mut self, interval_s: u16) -> Result<(), BalloonError> { if self.stats_polling_interval_s == interval_s { @@ -529,6 +606,7 @@ impl Balloon { amount_mib: self.size_mb(), deflate_on_oom: self.deflate_on_oom(), stats_polling_interval_s: self.stats_polling_interval_s(), + free_page_reporting: self.free_page_reporting(), } } @@ -737,7 +815,7 @@ pub(crate) mod tests { // Test all feature combinations. for deflate_on_oom in [true, false].iter() { for stats_interval in [0, 1].iter() { - let mut balloon = Balloon::new(0, *deflate_on_oom, *stats_interval).unwrap(); + let mut balloon = Balloon::new(0, *deflate_on_oom, *stats_interval, false).unwrap(); assert_eq!(balloon.device_type(), VIRTIO_ID_BALLOON); let features: u64 = (1u64 << VIRTIO_F_VERSION_1) @@ -764,12 +842,13 @@ pub(crate) mod tests { #[test] fn test_virtio_read_config() { - let balloon = Balloon::new(0x10, true, 0).unwrap(); + let balloon = Balloon::new(0x10, true, 0, false).unwrap(); let cfg = BalloonConfig { amount_mib: 16, deflate_on_oom: true, stats_polling_interval_s: 0, + free_page_reporting: false, }; assert_eq!(balloon.config(), cfg); @@ -798,7 +877,7 @@ pub(crate) mod tests { #[test] fn test_virtio_write_config() { - let mut balloon = Balloon::new(0, true, 0).unwrap(); + let mut balloon = Balloon::new(0, true, 0, false).unwrap(); let expected_config_space: [u8; BALLOON_CONFIG_SPACE_SIZE] = [0x00, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; @@ -824,7 +903,7 @@ pub(crate) mod tests { #[test] fn test_invalid_request() { - let mut balloon = Balloon::new(0, true, 0).unwrap(); + let mut balloon = Balloon::new(0, true, 0, false).unwrap(); let mem = default_mem(); let interrupt = default_interrupt(); // Only initialize the inflate queue to demonstrate invalid request handling. @@ -885,7 +964,7 @@ pub(crate) mod tests { #[test] fn test_inflate() { - let mut balloon = Balloon::new(0, true, 0).unwrap(); + let mut balloon = Balloon::new(0, true, 0, false).unwrap(); let mem = default_mem(); let interrupt = default_interrupt(); let infq = VirtQueue::new(GuestAddress(0), &mem, 16); @@ -957,7 +1036,7 @@ pub(crate) mod tests { #[test] fn test_deflate() { - let mut balloon = Balloon::new(0, true, 0).unwrap(); + let mut balloon = Balloon::new(0, true, 0, false).unwrap(); let mem = default_mem(); let interrupt = default_interrupt(); let defq = VirtQueue::new(GuestAddress(0), &mem, 16); @@ -1007,7 +1086,7 @@ pub(crate) mod tests { #[test] fn test_stats() { - let mut balloon = Balloon::new(0, true, 1).unwrap(); + let mut balloon = Balloon::new(0, true, 1, false).unwrap(); let mem = default_mem(); let interrupt = default_interrupt(); let statsq = VirtQueue::new(GuestAddress(0), &mem, 16); @@ -1099,7 +1178,7 @@ pub(crate) mod tests { #[test] fn test_process_balloon_queues() { - let mut balloon = Balloon::new(0x10, true, 0).unwrap(); + let mut balloon = Balloon::new(0x10, true, 0, false).unwrap(); let mem = default_mem(); let interrupt = default_interrupt(); let infq = VirtQueue::new(GuestAddress(0), &mem, 16); @@ -1114,7 +1193,7 @@ pub(crate) mod tests { #[test] fn test_update_stats_interval() { - let mut balloon = Balloon::new(0, true, 0).unwrap(); + let mut balloon = Balloon::new(0, true, 0, false).unwrap(); let mem = default_mem(); let q = VirtQueue::new(GuestAddress(0), &mem, 16); balloon.set_queue(INFLATE_INDEX, q.create_queue()); @@ -1127,7 +1206,7 @@ pub(crate) mod tests { ); balloon.update_stats_polling_interval(0).unwrap(); - let mut balloon = Balloon::new(0, true, 1).unwrap(); + let mut balloon = Balloon::new(0, true, 1, false).unwrap(); let mem = default_mem(); let q = VirtQueue::new(GuestAddress(0), &mem, 16); balloon.set_queue(INFLATE_INDEX, q.create_queue()); @@ -1145,14 +1224,14 @@ pub(crate) mod tests { #[test] fn test_cannot_update_inactive_device() { - let mut balloon = Balloon::new(0, true, 0).unwrap(); + let mut balloon = Balloon::new(0, true, 0, false).unwrap(); // Assert that we can't update an inactive device. balloon.update_size(1).unwrap_err(); } #[test] fn test_num_pages() { - let mut balloon = Balloon::new(0, true, 0).unwrap(); + let mut balloon = Balloon::new(0, true, 0, false).unwrap(); // Switch the state to active. balloon.device_state = DeviceState::Activated(ActiveState { mem: single_region_mem(32 << 20), diff --git a/src/vmm/src/devices/virtio/balloon/event_handler.rs b/src/vmm/src/devices/virtio/balloon/event_handler.rs index 6fdd00c434c..1b057c588cd 100644 --- a/src/vmm/src/devices/virtio/balloon/event_handler.rs +++ b/src/vmm/src/devices/virtio/balloon/event_handler.rs @@ -15,6 +15,7 @@ impl Balloon { const PROCESS_VIRTQ_DEFLATE: u32 = 2; const PROCESS_VIRTQ_STATS: u32 = 3; const PROCESS_STATS_TIMER: u32 = 4; + const PROCESS_VIRTQ_FREE_PAGE_REPORTING: u32 = 6; fn register_runtime_events(&self, ops: &mut EventOps) { if let Err(err) = ops.add(Events::with_data( @@ -47,6 +48,19 @@ impl Balloon { error!("Failed to register stats timerfd event: {}", err); } } + + if self.free_page_reporting() + && let Err(err) = ops.add(Events::with_data( + &self.queue_evts[self.free_page_reporting_idx()], + Self::PROCESS_VIRTQ_FREE_PAGE_REPORTING, + EventSet::IN, + )) + { + error!( + "Failed to register free page reporting queue event: {}", + err + ); + } } fn register_activate_event(&self, ops: &mut EventOps) { @@ -103,6 +117,9 @@ impl MutEventSubscriber for Balloon { Self::PROCESS_STATS_TIMER => self .process_stats_timer_event() .unwrap_or_else(report_balloon_event_fail), + Self::PROCESS_VIRTQ_FREE_PAGE_REPORTING => self + .process_free_page_reporting_queue_event() + .unwrap_or_else(report_balloon_event_fail), _ => { warn!("Balloon: Spurious event received: {:?}", source); } @@ -142,7 +159,7 @@ pub mod tests { #[test] fn test_event_handler() { let mut event_manager = EventManager::new().unwrap(); - let mut balloon = Balloon::new(0, true, 10).unwrap(); + let mut balloon = Balloon::new(0, true, 10, false).unwrap(); let mem = default_mem(); let interrupt = default_interrupt(); let infq = VirtQueue::new(GuestAddress(0), &mem, 16); diff --git a/src/vmm/src/devices/virtio/balloon/mod.rs b/src/vmm/src/devices/virtio/balloon/mod.rs index 33020e0ddec..a1c2b304afc 100644 --- a/src/vmm/src/devices/virtio/balloon/mod.rs +++ b/src/vmm/src/devices/virtio/balloon/mod.rs @@ -25,14 +25,11 @@ pub const BALLOON_DEV_ID: &str = "balloon"; /// The size of the config space. pub const BALLOON_CONFIG_SPACE_SIZE: usize = 8; /// Number of virtio queues. -pub const BALLOON_NUM_QUEUES: usize = 3; +pub const BALLOON_NUM_QUEUES: usize = 4; /// Virtio queue sizes, in number of descriptor chain heads. -// There are 3 queues for a virtio device (in this order): RX, TX, Event -pub const BALLOON_QUEUE_SIZES: [u16; BALLOON_NUM_QUEUES] = [ - FIRECRACKER_MAX_QUEUE_SIZE, - FIRECRACKER_MAX_QUEUE_SIZE, - FIRECRACKER_MAX_QUEUE_SIZE, -]; +// There are 4 queues for a virtio device (in this order): RX, TX, Event, Reporting +pub const BALLOON_QUEUE_SIZES: [u16; BALLOON_NUM_QUEUES] = + [FIRECRACKER_MAX_QUEUE_SIZE; BALLOON_NUM_QUEUES]; // Number of 4K pages in a MiB. pub const MIB_TO_4K_PAGES: u32 = 256; /// The maximum number of pages that can be received in a single descriptor. @@ -42,16 +39,17 @@ pub const MAX_PAGES_IN_DESC: usize = 256; pub const MAX_PAGE_COMPACT_BUFFER: usize = 2048; /// The addresses given by the driver are divided by 4096. pub const VIRTIO_BALLOON_PFN_SHIFT: u32 = 12; -/// The index of the deflate queue from Balloon device queues/queues_evts vector. +/// The index of the inflate queue from Balloon device queues/queues_evts vector. pub const INFLATE_INDEX: usize = 0; /// The index of the deflate queue from Balloon device queues/queues_evts vector. pub const DEFLATE_INDEX: usize = 1; -/// The index of the deflate queue from Balloon device queues/queues_evts vector. +/// The index of the stats queue from Balloon device queues/queues_evts vector. pub const STATS_INDEX: usize = 2; // The feature bitmap for virtio balloon. const VIRTIO_BALLOON_F_STATS_VQ: u32 = 1; // Enable statistics. const VIRTIO_BALLOON_F_DEFLATE_ON_OOM: u32 = 2; // Deflate balloon on OOM. +const VIRTIO_BALLOON_F_FREE_PAGE_REPORTING: u32 = 5; // Enable free page reportin // The statistics tags. const VIRTIO_BALLOON_S_SWAP_IN: u16 = 0; diff --git a/src/vmm/src/devices/virtio/balloon/persist.rs b/src/vmm/src/devices/virtio/balloon/persist.rs index e92356c394e..6ce9bf7fccc 100644 --- a/src/vmm/src/devices/virtio/balloon/persist.rs +++ b/src/vmm/src/devices/virtio/balloon/persist.rs @@ -119,9 +119,17 @@ impl Persist<'_> for Balloon { constructor_args: Self::ConstructorArgs, state: &Self::State, ) -> Result { + let free_page_reporting = + state.virtio_state.avail_features & (1u64 << VIRTIO_BALLOON_F_FREE_PAGE_REPORTING) != 0; + // We can safely create the balloon with arbitrary flags and // num_pages because we will overwrite them after. - let mut balloon = Balloon::new(0, false, state.stats_polling_interval_s)?; + let mut balloon = Balloon::new( + 0, + false, + state.stats_polling_interval_s, + free_page_reporting, + )?; let mut num_queues = BALLOON_NUM_QUEUES; // As per the virtio 1.1 specification, the statistics queue @@ -129,6 +137,11 @@ impl Persist<'_> for Balloon { if state.stats_polling_interval_s == 0 { num_queues -= 1; } + + if !free_page_reporting { + num_queues -= 1; + } + balloon.queues = state .virtio_state .build_queues_checked( @@ -178,7 +191,7 @@ mod tests { let mut mem = vec![0; 4096]; // Create and save the balloon device. - let balloon = Balloon::new(0x42, false, 2).unwrap(); + let balloon = Balloon::new(0x42, false, 2, false).unwrap(); Snapshot::new(balloon.save()) .save(&mut mem.as_mut_slice()) diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index ee76bf6800b..a704c2b6062 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -600,6 +600,7 @@ mod tests { amount_mib: 0, deflate_on_oom: false, stats_polling_interval_s: 0, + free_page_reporting: false, }; insert_balloon_device(&mut vmm, &mut cmdline, &mut event_manager, balloon_config); diff --git a/src/vmm/src/resources.rs b/src/vmm/src/resources.rs index 53f1185115a..85a9bb672fb 100644 --- a/src/vmm/src/resources.rs +++ b/src/vmm/src/resources.rs @@ -1476,6 +1476,7 @@ mod tests { amount_mib: 100, deflate_on_oom: false, stats_polling_interval_s: 0, + free_page_reporting: false, }) .unwrap(); aux_vm_config.mem_size_mib = Some(90); @@ -1514,6 +1515,7 @@ mod tests { amount_mib: 100, deflate_on_oom: false, stats_polling_interval_s: 0, + free_page_reporting: false, }; assert!(vm_resources.balloon.get().is_none()); vm_resources @@ -1551,7 +1553,7 @@ mod tests { .unwrap(); let err = vm_resources .update_from_restored_device(SharedDeviceType::Balloon(Arc::new(Mutex::new( - Balloon::new(128, false, 0).unwrap(), + Balloon::new(128, false, 0, false).unwrap(), )))) .unwrap_err(); assert!( diff --git a/src/vmm/src/vmm_config/balloon.rs b/src/vmm/src/vmm_config/balloon.rs index e56430d6dc6..2a306caa9b2 100644 --- a/src/vmm/src/vmm_config/balloon.rs +++ b/src/vmm/src/vmm_config/balloon.rs @@ -36,6 +36,9 @@ pub struct BalloonDeviceConfig { /// Interval in seconds between refreshing statistics. #[serde(default)] pub stats_polling_interval_s: u16, + /// Free page reporting enabled + #[serde(default)] + pub free_page_reporting: bool, } impl From for BalloonDeviceConfig { @@ -44,6 +47,7 @@ impl From for BalloonDeviceConfig { amount_mib: state.amount_mib, deflate_on_oom: state.deflate_on_oom, stats_polling_interval_s: state.stats_polling_interval_s, + free_page_reporting: state.free_page_reporting, } } } @@ -88,6 +92,7 @@ impl BalloonBuilder { cfg.amount_mib, cfg.deflate_on_oom, cfg.stats_polling_interval_s, + cfg.free_page_reporting, )?))); Ok(()) @@ -130,6 +135,7 @@ pub(crate) mod tests { amount_mib: 0, deflate_on_oom: false, stats_polling_interval_s: 0, + free_page_reporting: false, } } @@ -140,6 +146,7 @@ pub(crate) mod tests { amount_mib: 0, deflate_on_oom: false, stats_polling_interval_s: 0, + free_page_reporting: false, }; assert_eq!(default_balloon_config, balloon_config); let mut builder = BalloonBuilder::new(); @@ -161,12 +168,14 @@ pub(crate) mod tests { amount_mib: 5, deflate_on_oom: false, stats_polling_interval_s: 3, + free_page_reporting: false, }; let actual_balloon_config = BalloonDeviceConfig::from(BalloonConfig { amount_mib: 5, deflate_on_oom: false, stats_polling_interval_s: 3, + free_page_reporting: false, }); assert_eq!(expected_balloon_config, actual_balloon_config); @@ -175,7 +184,7 @@ pub(crate) mod tests { #[test] fn test_set_device() { let mut builder = BalloonBuilder::new(); - let balloon = Balloon::new(0, true, 0).unwrap(); + let balloon = Balloon::new(0, true, 0, false).unwrap(); builder.set_device(Arc::new(Mutex::new(balloon))); assert!(builder.inner.is_some()); } diff --git a/tests/integration_tests/functional/test_api.py b/tests/integration_tests/functional/test_api.py index 7dab0e14e6d..f2e6334b7a5 100644 --- a/tests/integration_tests/functional/test_api.py +++ b/tests/integration_tests/functional/test_api.py @@ -1167,6 +1167,7 @@ def test_get_full_config_after_restoring_snapshot(microvm_factory, uvm_nano): "amount_mib": 1, "deflate_on_oom": True, "stats_polling_interval_s": 0, + "free_page_reporting": False, } # Add a vsock device. @@ -1293,6 +1294,7 @@ def test_get_full_config(uvm_plain): "amount_mib": 1, "deflate_on_oom": True, "stats_polling_interval_s": 0, + "free_page_reporting": False, } # Add a vsock device. From 1ca36f24147a49cfd7a8f9f4d2c4b27d2940054d Mon Sep 17 00:00:00 2001 From: Jack Thomson Date: Wed, 22 Oct 2025 13:46:42 +0000 Subject: [PATCH 2/8] virtio-balloon: Add free page hinting Free page hinting is a mechanism which allows the guest driver to report ranges of pages to the host device. A "hinting" run is triggered by the device by issuing a new command id in the config space, after the update to the id the device will hint ranges to the host which are unused. Once the driver has exhausted all free ranges it notifies the device the run has completed. The device can then issue another command allowing the guest to reclaim these pages. Adding support for hinting the firecracker balloon device, we offer three points to manage the device; first to start a run, second to monitor the status and a final to issue the command to allow the guest to reclaim pages. To note, there is a potential condition in the linux driver which would allow a range to be reclaimed in an oom scenario before we remove the range. Signed-off-by: Jack Thomson --- src/vmm/src/builder.rs | 1 + src/vmm/src/device_manager/pci_mngr.rs | 2 + src/vmm/src/device_manager/persist.rs | 2 + src/vmm/src/devices/virtio/balloon/device.rs | 281 ++++++++++++++++-- .../devices/virtio/balloon/event_handler.rs | 16 +- src/vmm/src/devices/virtio/balloon/mod.rs | 14 +- src/vmm/src/devices/virtio/balloon/persist.rs | 30 +- src/vmm/src/persist.rs | 1 + src/vmm/src/resources.rs | 4 +- src/vmm/src/vmm_config/balloon.rs | 11 +- .../integration_tests/functional/test_api.py | 2 + 11 files changed, 330 insertions(+), 34 deletions(-) diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index afe385f3dc2..6f3ede62c9b 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -1231,6 +1231,7 @@ pub(crate) mod tests { amount_mib: 0, deflate_on_oom: false, stats_polling_interval_s: 0, + free_page_hinting: false, free_page_reporting: false, }; diff --git a/src/vmm/src/device_manager/pci_mngr.rs b/src/vmm/src/device_manager/pci_mngr.rs index 9b7173eec33..42079f54373 100644 --- a/src/vmm/src/device_manager/pci_mngr.rs +++ b/src/vmm/src/device_manager/pci_mngr.rs @@ -645,6 +645,7 @@ mod tests { amount_mib: 123, deflate_on_oom: false, stats_polling_interval_s: 1, + free_page_hinting: false, free_page_reporting: false, }; insert_balloon_device(&mut vmm, &mut cmdline, &mut event_manager, balloon_cfg); @@ -730,6 +731,7 @@ mod tests { "amount_mib": 123, "deflate_on_oom": false, "stats_polling_interval_s": 1, + "free_page_hinting": false, "free_page_reporting": false }}, "drives": [ diff --git a/src/vmm/src/device_manager/persist.rs b/src/vmm/src/device_manager/persist.rs index 6741de7753b..b294b4d0a85 100644 --- a/src/vmm/src/device_manager/persist.rs +++ b/src/vmm/src/device_manager/persist.rs @@ -669,6 +669,7 @@ mod tests { amount_mib: 123, deflate_on_oom: false, stats_polling_interval_s: 1, + free_page_hinting: false, free_page_reporting: false, }; insert_balloon_device(&mut vmm, &mut cmdline, &mut event_manager, balloon_cfg); @@ -750,6 +751,7 @@ mod tests { "amount_mib": 123, "deflate_on_oom": false, "stats_polling_interval_s": 1, + "free_page_hinting": false, "free_page_reporting": false }}, "drives": [ diff --git a/src/vmm/src/devices/virtio/balloon/device.rs b/src/vmm/src/devices/virtio/balloon/device.rs index 62c9c7e0a2e..ca72791ae9a 100644 --- a/src/vmm/src/devices/virtio/balloon/device.rs +++ b/src/vmm/src/devices/virtio/balloon/device.rs @@ -5,8 +5,8 @@ use std::ops::Deref; use std::sync::Arc; use std::time::Duration; -use log::{error, info}; -use serde::Serialize; +use log::{error, info, warn}; +use serde::{Deserialize, Serialize}; use timerfd::{ClockId, SetTimeFlags, TimerFd, TimerState}; use vmm_sys_util::eventfd::EventFd; @@ -24,7 +24,10 @@ use super::{ VIRTIO_BALLOON_S_MEMTOT, VIRTIO_BALLOON_S_MINFLT, VIRTIO_BALLOON_S_SWAP_IN, VIRTIO_BALLOON_S_SWAP_OUT, }; -use crate::devices::virtio::balloon::{BalloonError, VIRTIO_BALLOON_F_FREE_PAGE_REPORTING}; +use crate::devices::virtio::balloon::{ + BalloonError, FREE_PAGE_HINT_DONE, FREE_PAGE_HINT_STOP, VIRTIO_BALLOON_F_FREE_PAGE_HINTING, + VIRTIO_BALLOON_F_FREE_PAGE_REPORTING, +}; use crate::devices::virtio::device::ActiveState; use crate::devices::virtio::generated::virtio_config::VIRTIO_F_VERSION_1; use crate::devices::virtio::generated::virtio_ids::VIRTIO_ID_BALLOON; @@ -57,11 +60,47 @@ fn pages_to_mib(amount_pages: u32) -> u32 { pub(crate) struct ConfigSpace { pub num_pages: u32, pub actual_pages: u32, + pub free_page_hint_cmd_id: u32, } // SAFETY: Safe because ConfigSpace only contains plain data. unsafe impl ByteValued for ConfigSpace {} +/// Holds state of the free page hinting run +#[derive(Copy, Clone, Debug, Default, Serialize, Deserialize)] +pub(crate) struct HintingState { + /// The command requested by us. Set to STOP by default. + pub host_cmd: u32, + /// The last command supplied by guest. + pub last_cmd_id: u32, + /// The command supplied by guest. + pub guest_cmd: Option, + /// Whether or not to automatically ack on STOP. + pub acknowledge_on_finish: bool, +} + +/// By default hinting will ack on stop +fn default_ack_on_stop() -> bool { + true +} + +/// Command recieved from the API to start a hinting run +#[derive(Copy, Clone, Debug, Eq, PartialEq, Default, Deserialize)] +pub struct StartHintingCmd { + /// If we should automatically acknowledge end of the run after stop. + #[serde(default = "default_ack_on_stop")] + pub acknowledge_on_stop: bool, +} + +/// Returned to the API for get hinting status +#[derive(Copy, Clone, Debug, Eq, PartialEq, Default, Serialize)] +pub struct HintingStatus { + /// The command requested by us. Set to STOP by default. + pub host_cmd: u32, + /// The command supplied by guest. + pub guest_cmd: Option, +} + // This structure needs the `packed` attribute, otherwise Rust will assume // the size to be 16 bytes. #[derive(Copy, Clone, Debug, Default)] @@ -83,6 +122,9 @@ pub struct BalloonConfig { pub deflate_on_oom: bool, /// Interval of time in seconds at which the balloon statistics are updated. pub stats_polling_interval_s: u16, + /// Free page hinting enabled + #[serde(default)] + pub free_page_hinting: bool, /// Free page reporting enabled #[serde(default)] pub free_page_reporting: bool, @@ -184,6 +226,9 @@ pub struct Balloon { pub(crate) latest_stats: BalloonStats, // A buffer used as pfn accumulator during descriptor processing. pub(crate) pfn_buffer: [u32; MAX_PAGE_COMPACT_BUFFER], + + // Holds state for free page hinting + pub(crate) hinting_state: HintingState, } impl Balloon { @@ -192,6 +237,7 @@ impl Balloon { amount_mib: u32, deflate_on_oom: bool, stats_polling_interval_s: u16, + free_page_hinting: bool, free_page_reporting: bool, ) -> Result { let mut avail_features = 1u64 << VIRTIO_F_VERSION_1; @@ -209,6 +255,7 @@ impl Balloon { EventFd::new(libc::EFD_NONBLOCK).map_err(BalloonError::EventFd)?, EventFd::new(libc::EFD_NONBLOCK).map_err(BalloonError::EventFd)?, EventFd::new(libc::EFD_NONBLOCK).map_err(BalloonError::EventFd)?, + EventFd::new(libc::EFD_NONBLOCK).map_err(BalloonError::EventFd)?, ]; let mut queues: Vec = BALLOON_QUEUE_SIZES.iter().map(|&s| Queue::new(s)).collect(); @@ -220,6 +267,12 @@ impl Balloon { dropped_queue_count += 1; } + if free_page_hinting { + avail_features |= 1u64 << VIRTIO_BALLOON_F_FREE_PAGE_HINTING; + } else { + dropped_queue_count += 1; + } + if free_page_reporting { avail_features |= 1u64 << VIRTIO_BALLOON_F_FREE_PAGE_REPORTING; } else { @@ -237,6 +290,7 @@ impl Balloon { config_space: ConfigSpace { num_pages: mib_to_pages(amount_mib)?, actual_pages: 0, + free_page_hint_cmd_id: FREE_PAGE_HINT_STOP, }, queue_evts, queues, @@ -247,6 +301,7 @@ impl Balloon { stats_desc_index: None, latest_stats: BalloonStats::default(), pfn_buffer: [0u32; MAX_PAGE_COMPACT_BUFFER], + hinting_state: Default::default(), }) } @@ -276,6 +331,13 @@ impl Balloon { self.trigger_stats_update() } + pub(crate) fn process_free_page_hinting_queue_event(&mut self) -> Result<(), BalloonError> { + self.queue_evts[self.free_page_hinting_idx()] + .read() + .map_err(BalloonError::EventFd)?; + self.process_free_page_hinting_queue() + } + pub(crate) fn process_free_page_reporting_queue_event(&mut self) -> Result<(), BalloonError> { self.queue_evts[self.free_page_reporting_idx()] .read() @@ -431,8 +493,90 @@ impl Balloon { Ok(()) } + pub(crate) fn process_free_page_hinting_queue(&mut self) -> Result<(), BalloonError> { + let mem = &self + .device_state + .active_state() + .ok_or(BalloonError::DeviceNotActive)? + .mem; + + let idx = self.free_page_hinting_idx(); + let queue = &mut self.queues[idx]; + let host_cmd = self.hinting_state.host_cmd; + let mut needs_interrupt = false; + let mut complete = false; + + while let Some(head) = queue.pop()? { + let head_index = head.index; + + let mut last_desc = Some(head); + while let Some(desc) = last_desc { + last_desc = desc.next_descriptor(); + + // Updated cmd_ids are always of length 4 + if desc.len == 4 { + complete = false; + + let cmd = mem + .read_obj::(desc.addr) + .map_err(|_| BalloonError::MalformedDescriptor)?; + self.hinting_state.guest_cmd = Some(cmd); + if cmd == FREE_PAGE_HINT_STOP { + complete = true; + } + + // We don't expect this from the driver, but lets treat as a stop + if cmd == FREE_PAGE_HINT_DONE { + warn!("balloon hinting: Unexpected cmd from guest: {cmd}"); + complete = true; + } + + continue; + } + + // If we've requested done we have to discard any in-flight hints + if host_cmd == FREE_PAGE_HINT_DONE || host_cmd == FREE_PAGE_HINT_STOP { + continue; + } + + let Some(chain_cmd) = self.hinting_state.guest_cmd else { + warn!("balloon hinting: received range with no command id."); + continue; + }; + + if chain_cmd != host_cmd { + info!("balloon hinting: Received chain from previous command ignoring."); + continue; + } + + if let Err(err) = mem.discard_range(desc.addr, desc.len as usize) { + error!("balloon hinting: failed to remove range: {err:?}"); + } + } + + queue.add_used(head.index, 0)?; + needs_interrupt = true; + } + + queue.advance_used_ring_idx(); + + if needs_interrupt { + self.signal_used_queue(idx)?; + } + + if complete && self.hinting_state.acknowledge_on_finish { + self.update_free_page_hint_cmd(FREE_PAGE_HINT_DONE); + } + + Ok(()) + } + pub(crate) fn process_free_page_reporting_queue(&mut self) -> Result<(), BalloonError> { - let mem = &self.device_state.active_state().unwrap().mem; + let mem = &self + .device_state + .active_state() + .ok_or(BalloonError::DeviceNotActive)? + .mem; let idx = self.free_page_reporting_idx(); let queue = &mut self.queues[idx]; @@ -483,6 +627,12 @@ impl Balloon { return Err(err); } + if self.free_page_hinting() + && let Err(BalloonError::InvalidAvailIdx(err)) = self.process_free_page_hinting_queue() + { + return Err(err); + } + if self.free_page_reporting() && let Err(BalloonError::InvalidAvailIdx(err)) = self.process_free_page_reporting_queue() @@ -529,6 +679,20 @@ impl Balloon { } } + pub fn free_page_hinting(&self) -> bool { + self.avail_features & (1u64 << VIRTIO_BALLOON_F_FREE_PAGE_HINTING) != 0 + } + + pub fn free_page_hinting_idx(&self) -> usize { + let mut idx = STATS_INDEX; + + if self.stats_polling_interval_s > 0 { + idx += 1; + } + + idx + } + pub fn free_page_reporting(&self) -> bool { self.avail_features & (1u64 << VIRTIO_BALLOON_F_FREE_PAGE_REPORTING) != 0 } @@ -540,6 +704,10 @@ impl Balloon { idx += 1; } + if self.free_page_hinting() { + idx += 1; + } + idx } @@ -600,12 +768,65 @@ impl Balloon { } } + /// Update the free page hinting cmd + pub fn update_free_page_hint_cmd(&mut self, cmd_id: u32) -> Result<(), BalloonError> { + if !self.is_activated() { + return Err(BalloonError::DeviceNotActive); + } + + self.hinting_state.host_cmd = cmd_id; + self.config_space.free_page_hint_cmd_id = cmd_id; + self.interrupt_trigger() + .trigger(VirtioInterruptType::Config) + .map_err(BalloonError::InterruptError) + } + + /// Starts a hinting run by setting the cmd_id to a new value. + pub(crate) fn start_hinting(&mut self, cmd: StartHintingCmd) -> Result<(), BalloonError> { + if !self.free_page_hinting() { + return Err(BalloonError::HintingNotEnabled); + } + + let mut cmd_id = self.hinting_state.last_cmd_id.wrapping_add(1); + // 0 and 1 are reserved and cannot be used to start a hinting run + if cmd_id <= 1 { + cmd_id = 2; + } + + self.hinting_state.acknowledge_on_finish = cmd.acknowledge_on_stop; + self.hinting_state.last_cmd_id = cmd_id; + self.update_free_page_hint_cmd(cmd_id) + } + + /// Return the status of the hinting including the last command we sent to the driver + /// and the last cmd sent from the driver + pub(crate) fn get_hinting_status(&self) -> Result { + if !self.free_page_hinting() { + return Err(BalloonError::HintingNotEnabled); + } + + Ok(HintingStatus { + host_cmd: self.hinting_state.host_cmd, + guest_cmd: self.hinting_state.guest_cmd, + }) + } + + /// Stops the hinting run allowing the guest to reclaim hinted pages + pub(crate) fn stop_hinting(&mut self) -> Result<(), BalloonError> { + if !self.free_page_hinting() { + Err(BalloonError::HintingNotEnabled) + } else { + self.update_free_page_hint_cmd(FREE_PAGE_HINT_DONE) + } + } + /// Return the config of the balloon device. pub fn config(&self) -> BalloonConfig { BalloonConfig { amount_mib: self.size_mb(), deflate_on_oom: self.deflate_on_oom(), stats_polling_interval_s: self.stats_polling_interval_s(), + free_page_hinting: self.free_page_hinting(), free_page_reporting: self.free_page_reporting(), } } @@ -815,7 +1036,8 @@ pub(crate) mod tests { // Test all feature combinations. for deflate_on_oom in [true, false].iter() { for stats_interval in [0, 1].iter() { - let mut balloon = Balloon::new(0, *deflate_on_oom, *stats_interval, false).unwrap(); + let mut balloon = + Balloon::new(0, *deflate_on_oom, *stats_interval, false, false).unwrap(); assert_eq!(balloon.device_type(), VIRTIO_ID_BALLOON); let features: u64 = (1u64 << VIRTIO_F_VERSION_1) @@ -842,12 +1064,13 @@ pub(crate) mod tests { #[test] fn test_virtio_read_config() { - let balloon = Balloon::new(0x10, true, 0, false).unwrap(); + let balloon = Balloon::new(0x10, true, 0, false, false).unwrap(); let cfg = BalloonConfig { amount_mib: 16, deflate_on_oom: true, stats_polling_interval_s: 0, + free_page_hinting: false, free_page_reporting: false, }; assert_eq!(balloon.config(), cfg); @@ -858,13 +1081,15 @@ pub(crate) mod tests { // The config space is little endian. // 0x10 MB in the constructor corresponds to 0x1000 pages in the // config space. - let expected_config_space: [u8; BALLOON_CONFIG_SPACE_SIZE] = - [0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; + let expected_config_space: [u8; BALLOON_CONFIG_SPACE_SIZE] = [ + 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + ]; assert_eq!(actual_config_space, expected_config_space); // Invalid read. - let expected_config_space: [u8; BALLOON_CONFIG_SPACE_SIZE] = - [0xd, 0xe, 0xa, 0xd, 0xb, 0xe, 0xe, 0xf]; + let expected_config_space: [u8; BALLOON_CONFIG_SPACE_SIZE] = [ + 0xd, 0xe, 0xa, 0xd, 0xb, 0xe, 0xe, 0xf, 0x00, 0x00, 0x00, 0x00, + ]; actual_config_space = expected_config_space; balloon.read_config( BALLOON_CONFIG_SPACE_SIZE as u64 + 1, @@ -877,10 +1102,11 @@ pub(crate) mod tests { #[test] fn test_virtio_write_config() { - let mut balloon = Balloon::new(0, true, 0, false).unwrap(); + let mut balloon = Balloon::new(0, true, 0, false, false).unwrap(); - let expected_config_space: [u8; BALLOON_CONFIG_SPACE_SIZE] = - [0x00, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; + let expected_config_space: [u8; BALLOON_CONFIG_SPACE_SIZE] = [ + 0x00, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + ]; balloon.write_config(0, &expected_config_space); let mut actual_config_space = [0u8; BALLOON_CONFIG_SPACE_SIZE]; @@ -888,7 +1114,9 @@ pub(crate) mod tests { assert_eq!(actual_config_space, expected_config_space); // Invalid write. - let new_config_space = [0xd, 0xe, 0xa, 0xd, 0xb, 0xe, 0xe, 0xf]; + let new_config_space = [ + 0xd, 0xe, 0xa, 0xd, 0xb, 0xe, 0xe, 0xf, 0x00, 0x00, 0x00, 0x00, + ]; balloon.write_config(5, &new_config_space); // Make sure nothing got written. balloon.read_config(0, &mut actual_config_space); @@ -903,7 +1131,7 @@ pub(crate) mod tests { #[test] fn test_invalid_request() { - let mut balloon = Balloon::new(0, true, 0, false).unwrap(); + let mut balloon = Balloon::new(0, true, 0, false, false).unwrap(); let mem = default_mem(); let interrupt = default_interrupt(); // Only initialize the inflate queue to demonstrate invalid request handling. @@ -964,7 +1192,7 @@ pub(crate) mod tests { #[test] fn test_inflate() { - let mut balloon = Balloon::new(0, true, 0, false).unwrap(); + let mut balloon = Balloon::new(0, true, 0, false, false).unwrap(); let mem = default_mem(); let interrupt = default_interrupt(); let infq = VirtQueue::new(GuestAddress(0), &mem, 16); @@ -1036,7 +1264,7 @@ pub(crate) mod tests { #[test] fn test_deflate() { - let mut balloon = Balloon::new(0, true, 0, false).unwrap(); + let mut balloon = Balloon::new(0, true, 0, false, false).unwrap(); let mem = default_mem(); let interrupt = default_interrupt(); let defq = VirtQueue::new(GuestAddress(0), &mem, 16); @@ -1086,7 +1314,7 @@ pub(crate) mod tests { #[test] fn test_stats() { - let mut balloon = Balloon::new(0, true, 1, false).unwrap(); + let mut balloon = Balloon::new(0, true, 1, false, false).unwrap(); let mem = default_mem(); let interrupt = default_interrupt(); let statsq = VirtQueue::new(GuestAddress(0), &mem, 16); @@ -1178,7 +1406,7 @@ pub(crate) mod tests { #[test] fn test_process_balloon_queues() { - let mut balloon = Balloon::new(0x10, true, 0, false).unwrap(); + let mut balloon = Balloon::new(0x10, true, 0, false, false).unwrap(); let mem = default_mem(); let interrupt = default_interrupt(); let infq = VirtQueue::new(GuestAddress(0), &mem, 16); @@ -1193,7 +1421,7 @@ pub(crate) mod tests { #[test] fn test_update_stats_interval() { - let mut balloon = Balloon::new(0, true, 0, false).unwrap(); + let mut balloon = Balloon::new(0, true, 0, false, false).unwrap(); let mem = default_mem(); let q = VirtQueue::new(GuestAddress(0), &mem, 16); balloon.set_queue(INFLATE_INDEX, q.create_queue()); @@ -1206,7 +1434,7 @@ pub(crate) mod tests { ); balloon.update_stats_polling_interval(0).unwrap(); - let mut balloon = Balloon::new(0, true, 1, false).unwrap(); + let mut balloon = Balloon::new(0, true, 1, false, false).unwrap(); let mem = default_mem(); let q = VirtQueue::new(GuestAddress(0), &mem, 16); balloon.set_queue(INFLATE_INDEX, q.create_queue()); @@ -1224,14 +1452,14 @@ pub(crate) mod tests { #[test] fn test_cannot_update_inactive_device() { - let mut balloon = Balloon::new(0, true, 0, false).unwrap(); + let mut balloon = Balloon::new(0, true, 0, false, false).unwrap(); // Assert that we can't update an inactive device. balloon.update_size(1).unwrap_err(); } #[test] fn test_num_pages() { - let mut balloon = Balloon::new(0, true, 0, false).unwrap(); + let mut balloon = Balloon::new(0, true, 0, false, false).unwrap(); // Switch the state to active. balloon.device_state = DeviceState::Activated(ActiveState { mem: single_region_mem(32 << 20), @@ -1249,13 +1477,16 @@ pub(crate) mod tests { let mut actual_config = vec![0; BALLOON_CONFIG_SPACE_SIZE]; balloon.read_config(0, &mut actual_config); - assert_eq!(actual_config, vec![0x0, 0x10, 0x0, 0x0, 0x34, 0x12, 0, 0]); + assert_eq!( + actual_config, + vec![0x0, 0x10, 0x0, 0x0, 0x34, 0x12, 0, 0, 0, 0, 0, 0] + ); assert_eq!(balloon.num_pages(), 0x1000); assert_eq!(balloon.actual_pages(), 0x1234); assert_eq!(balloon.size_mb(), 16); // Update fields through the config space. - let expected_config = vec![0x44, 0x33, 0x22, 0x11, 0x78, 0x56, 0x34, 0x12]; + let expected_config = vec![0x44, 0x33, 0x22, 0x11, 0x78, 0x56, 0x34, 0x12, 0, 0, 0, 0]; balloon.write_config(0, &expected_config); assert_eq!(balloon.num_pages(), 0x1122_3344); assert_eq!(balloon.actual_pages(), 0x1234_5678); diff --git a/src/vmm/src/devices/virtio/balloon/event_handler.rs b/src/vmm/src/devices/virtio/balloon/event_handler.rs index 1b057c588cd..a89cfe8477d 100644 --- a/src/vmm/src/devices/virtio/balloon/event_handler.rs +++ b/src/vmm/src/devices/virtio/balloon/event_handler.rs @@ -15,6 +15,7 @@ impl Balloon { const PROCESS_VIRTQ_DEFLATE: u32 = 2; const PROCESS_VIRTQ_STATS: u32 = 3; const PROCESS_STATS_TIMER: u32 = 4; + const PROCESS_VIRTQ_FREE_PAGE_HINTING: u32 = 5; const PROCESS_VIRTQ_FREE_PAGE_REPORTING: u32 = 6; fn register_runtime_events(&self, ops: &mut EventOps) { @@ -49,6 +50,16 @@ impl Balloon { } } + if self.free_page_hinting() + && let Err(err) = ops.add(Events::with_data( + &self.queue_evts[self.free_page_hinting_idx()], + Self::PROCESS_VIRTQ_FREE_PAGE_HINTING, + EventSet::IN, + )) + { + error!("Failed to register free page hinting queue event: {}", err); + } + if self.free_page_reporting() && let Err(err) = ops.add(Events::with_data( &self.queue_evts[self.free_page_reporting_idx()], @@ -117,6 +128,9 @@ impl MutEventSubscriber for Balloon { Self::PROCESS_STATS_TIMER => self .process_stats_timer_event() .unwrap_or_else(report_balloon_event_fail), + Self::PROCESS_VIRTQ_FREE_PAGE_HINTING => self + .process_free_page_hinting_queue_event() + .unwrap_or_else(report_balloon_event_fail), Self::PROCESS_VIRTQ_FREE_PAGE_REPORTING => self .process_free_page_reporting_queue_event() .unwrap_or_else(report_balloon_event_fail), @@ -159,7 +173,7 @@ pub mod tests { #[test] fn test_event_handler() { let mut event_manager = EventManager::new().unwrap(); - let mut balloon = Balloon::new(0, true, 10, false).unwrap(); + let mut balloon = Balloon::new(0, true, 10, false, false).unwrap(); let mem = default_mem(); let interrupt = default_interrupt(); let infq = VirtQueue::new(GuestAddress(0), &mem, 16); diff --git a/src/vmm/src/devices/virtio/balloon/mod.rs b/src/vmm/src/devices/virtio/balloon/mod.rs index a1c2b304afc..dda2ccef5d2 100644 --- a/src/vmm/src/devices/virtio/balloon/mod.rs +++ b/src/vmm/src/devices/virtio/balloon/mod.rs @@ -23,11 +23,11 @@ use crate::vstate::interrupts::InterruptError; /// Because Balloon is unique per-vm, this ID can be hardcoded. pub const BALLOON_DEV_ID: &str = "balloon"; /// The size of the config space. -pub const BALLOON_CONFIG_SPACE_SIZE: usize = 8; +pub const BALLOON_CONFIG_SPACE_SIZE: usize = 12; /// Number of virtio queues. -pub const BALLOON_NUM_QUEUES: usize = 4; +pub const BALLOON_NUM_QUEUES: usize = 5; /// Virtio queue sizes, in number of descriptor chain heads. -// There are 4 queues for a virtio device (in this order): RX, TX, Event, Reporting +// There are 5 queues for a virtio device (in this order): RX, TX, Event, Reporting, Hintng pub const BALLOON_QUEUE_SIZES: [u16; BALLOON_NUM_QUEUES] = [FIRECRACKER_MAX_QUEUE_SIZE; BALLOON_NUM_QUEUES]; // Number of 4K pages in a MiB. @@ -46,9 +46,15 @@ pub const DEFLATE_INDEX: usize = 1; /// The index of the stats queue from Balloon device queues/queues_evts vector. pub const STATS_INDEX: usize = 2; +/// Command used in free page hinting to indicate the guest has finished +pub const FREE_PAGE_HINT_STOP: u32 = 0; +/// Command used in free page hinting to indicate to the guest to release pages +pub const FREE_PAGE_HINT_DONE: u32 = 1; + // The feature bitmap for virtio balloon. const VIRTIO_BALLOON_F_STATS_VQ: u32 = 1; // Enable statistics. const VIRTIO_BALLOON_F_DEFLATE_ON_OOM: u32 = 2; // Deflate balloon on OOM. +const VIRTIO_BALLOON_F_FREE_PAGE_HINTING: u32 = 3; // Enable free page hinting const VIRTIO_BALLOON_F_FREE_PAGE_REPORTING: u32 = 5; // Enable free page reportin // The statistics tags. @@ -68,6 +74,8 @@ const VIRTIO_BALLOON_S_HTLB_PGFAIL: u16 = 9; pub enum BalloonError { /// Device not activated yet. DeviceNotActive, + /// Attempting to use hinting when not enabled + HintingNotEnabled, /// EventFd error: {0} EventFd(std::io::Error), /// Received error while sending an interrupt: {0} diff --git a/src/vmm/src/devices/virtio/balloon/persist.rs b/src/vmm/src/devices/virtio/balloon/persist.rs index 6ce9bf7fccc..1e5a1409b14 100644 --- a/src/vmm/src/devices/virtio/balloon/persist.rs +++ b/src/vmm/src/devices/virtio/balloon/persist.rs @@ -10,7 +10,7 @@ use serde::{Deserialize, Serialize}; use timerfd::{SetTimeFlags, TimerState}; use super::*; -use crate::devices::virtio::balloon::device::{BalloonStats, ConfigSpace}; +use crate::devices::virtio::balloon::device::{BalloonStats, ConfigSpace, HintingState}; use crate::devices::virtio::device::{ActiveState, DeviceState}; use crate::devices::virtio::generated::virtio_ids::VIRTIO_ID_BALLOON; use crate::devices::virtio::persist::VirtioDeviceState; @@ -87,6 +87,7 @@ pub struct BalloonState { stats_desc_index: Option, latest_stats: BalloonStatsState, config_space: BalloonConfigSpaceState, + hinting_state: HintingState, pub virtio_state: VirtioDeviceState, } @@ -107,6 +108,7 @@ impl Persist<'_> for Balloon { stats_polling_interval_s: self.stats_polling_interval_s, stats_desc_index: self.stats_desc_index, latest_stats: BalloonStatsState::from_stats(&self.latest_stats), + hinting_state: self.hinting_state, config_space: BalloonConfigSpaceState { num_pages: self.config_space.num_pages, actual_pages: self.config_space.actual_pages, @@ -119,6 +121,9 @@ impl Persist<'_> for Balloon { constructor_args: Self::ConstructorArgs, state: &Self::State, ) -> Result { + let free_page_hinting = + state.virtio_state.avail_features & (1u64 << VIRTIO_BALLOON_F_FREE_PAGE_HINTING) != 0; + let free_page_reporting = state.virtio_state.avail_features & (1u64 << VIRTIO_BALLOON_F_FREE_PAGE_REPORTING) != 0; @@ -128,6 +133,7 @@ impl Persist<'_> for Balloon { 0, false, state.stats_polling_interval_s, + free_page_hinting, free_page_reporting, )?; @@ -138,6 +144,10 @@ impl Persist<'_> for Balloon { num_queues -= 1; } + if !free_page_hinting { + num_queues -= 1; + } + if !free_page_reporting { num_queues -= 1; } @@ -157,7 +167,10 @@ impl Persist<'_> for Balloon { balloon.config_space = ConfigSpace { num_pages: state.config_space.num_pages, actual_pages: state.config_space.actual_pages, + // On restore allow the guest to reclaim pages + free_page_hint_cmd_id: FREE_PAGE_HINT_DONE, }; + balloon.hinting_state = state.hinting_state; if state.virtio_state.activated && balloon.stats_enabled() { // Restore the stats descriptor. @@ -191,7 +204,7 @@ mod tests { let mut mem = vec![0; 4096]; // Create and save the balloon device. - let balloon = Balloon::new(0x42, false, 2, false).unwrap(); + let balloon = Balloon::new(0x42, false, 2, false, false).unwrap(); Snapshot::new(balloon.save()) .save(&mut mem.as_mut_slice()) @@ -210,7 +223,18 @@ mod tests { assert_eq!(restored_balloon.acked_features, balloon.acked_features); assert_eq!(restored_balloon.avail_features, balloon.avail_features); - assert_eq!(restored_balloon.config_space, balloon.config_space); + assert_eq!( + restored_balloon.config_space.num_pages, + balloon.config_space.num_pages + ); + assert_eq!( + restored_balloon.config_space.actual_pages, + balloon.config_space.actual_pages + ); + assert_eq!( + restored_balloon.config_space.free_page_hint_cmd_id, + FREE_PAGE_HINT_DONE + ); assert_eq!(restored_balloon.queues(), balloon.queues()); assert!(!restored_balloon.is_activated()); assert!(!balloon.is_activated()); diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index a704c2b6062..fac31008030 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -600,6 +600,7 @@ mod tests { amount_mib: 0, deflate_on_oom: false, stats_polling_interval_s: 0, + free_page_hinting: false, free_page_reporting: false, }; insert_balloon_device(&mut vmm, &mut cmdline, &mut event_manager, balloon_config); diff --git a/src/vmm/src/resources.rs b/src/vmm/src/resources.rs index 85a9bb672fb..46348201c94 100644 --- a/src/vmm/src/resources.rs +++ b/src/vmm/src/resources.rs @@ -1476,6 +1476,7 @@ mod tests { amount_mib: 100, deflate_on_oom: false, stats_polling_interval_s: 0, + free_page_hinting: false, free_page_reporting: false, }) .unwrap(); @@ -1515,6 +1516,7 @@ mod tests { amount_mib: 100, deflate_on_oom: false, stats_polling_interval_s: 0, + free_page_hinting: false, free_page_reporting: false, }; assert!(vm_resources.balloon.get().is_none()); @@ -1553,7 +1555,7 @@ mod tests { .unwrap(); let err = vm_resources .update_from_restored_device(SharedDeviceType::Balloon(Arc::new(Mutex::new( - Balloon::new(128, false, 0, false).unwrap(), + Balloon::new(128, false, 0, false, false).unwrap(), )))) .unwrap_err(); assert!( diff --git a/src/vmm/src/vmm_config/balloon.rs b/src/vmm/src/vmm_config/balloon.rs index 2a306caa9b2..64003bdf9fd 100644 --- a/src/vmm/src/vmm_config/balloon.rs +++ b/src/vmm/src/vmm_config/balloon.rs @@ -36,6 +36,9 @@ pub struct BalloonDeviceConfig { /// Interval in seconds between refreshing statistics. #[serde(default)] pub stats_polling_interval_s: u16, + /// Free page hinting enabled + #[serde(default)] + pub free_page_hinting: bool, /// Free page reporting enabled #[serde(default)] pub free_page_reporting: bool, @@ -47,6 +50,7 @@ impl From for BalloonDeviceConfig { amount_mib: state.amount_mib, deflate_on_oom: state.deflate_on_oom, stats_polling_interval_s: state.stats_polling_interval_s, + free_page_hinting: state.free_page_hinting, free_page_reporting: state.free_page_reporting, } } @@ -92,6 +96,7 @@ impl BalloonBuilder { cfg.amount_mib, cfg.deflate_on_oom, cfg.stats_polling_interval_s, + cfg.free_page_hinting, cfg.free_page_reporting, )?))); @@ -135,6 +140,7 @@ pub(crate) mod tests { amount_mib: 0, deflate_on_oom: false, stats_polling_interval_s: 0, + free_page_hinting: false, free_page_reporting: false, } } @@ -146,6 +152,7 @@ pub(crate) mod tests { amount_mib: 0, deflate_on_oom: false, stats_polling_interval_s: 0, + free_page_hinting: false, free_page_reporting: false, }; assert_eq!(default_balloon_config, balloon_config); @@ -168,6 +175,7 @@ pub(crate) mod tests { amount_mib: 5, deflate_on_oom: false, stats_polling_interval_s: 3, + free_page_hinting: false, free_page_reporting: false, }; @@ -175,6 +183,7 @@ pub(crate) mod tests { amount_mib: 5, deflate_on_oom: false, stats_polling_interval_s: 3, + free_page_hinting: false, free_page_reporting: false, }); @@ -184,7 +193,7 @@ pub(crate) mod tests { #[test] fn test_set_device() { let mut builder = BalloonBuilder::new(); - let balloon = Balloon::new(0, true, 0, false).unwrap(); + let balloon = Balloon::new(0, true, 0, false, false).unwrap(); builder.set_device(Arc::new(Mutex::new(balloon))); assert!(builder.inner.is_some()); } diff --git a/tests/integration_tests/functional/test_api.py b/tests/integration_tests/functional/test_api.py index f2e6334b7a5..959c9fc5a83 100644 --- a/tests/integration_tests/functional/test_api.py +++ b/tests/integration_tests/functional/test_api.py @@ -1168,6 +1168,7 @@ def test_get_full_config_after_restoring_snapshot(microvm_factory, uvm_nano): "deflate_on_oom": True, "stats_polling_interval_s": 0, "free_page_reporting": False, + "free_page_hinting": False, } # Add a vsock device. @@ -1295,6 +1296,7 @@ def test_get_full_config(uvm_plain): "deflate_on_oom": True, "stats_polling_interval_s": 0, "free_page_reporting": False, + "free_page_hinting": False, } # Add a vsock device. From 59e8fb668f83fc2a43baace213ef2ccab0450708 Mon Sep 17 00:00:00 2001 From: Jack Thomson Date: Thu, 23 Oct 2025 11:11:58 +0000 Subject: [PATCH 3/8] virtio-balloon: Add api for free page hinting Adding API endpoints to manage free page hinting . With three different endpoint: Start - To begin a new run for free page hinting, Status - To track the state of the hinting run, Stop - To stop the hinting run and allow the guest to reclaim the pages reported. Signed-off-by: Jack Thomson --- .../src/api_server/parsed_request.rs | 72 ++++++- .../src/api_server/request/balloon.rs | 189 ++++++++++++++---- src/firecracker/swagger/firecracker.yaml | 82 +++++++- src/vmm/src/lib.rs | 26 +++ src/vmm/src/rpc_interface.rs | 35 +++- 5 files changed, 359 insertions(+), 45 deletions(-) diff --git a/src/firecracker/src/api_server/parsed_request.rs b/src/firecracker/src/api_server/parsed_request.rs index 9f1ab870061..6f6474fb828 100644 --- a/src/firecracker/src/api_server/parsed_request.rs +++ b/src/firecracker/src/api_server/parsed_request.rs @@ -78,7 +78,7 @@ impl TryFrom<&Request> for ParsedRequest { match (request.method(), path, request.body.as_ref()) { (Method::Get, "", None) => parse_get_instance_info(), - (Method::Get, "balloon", None) => parse_get_balloon(path_tokens.next()), + (Method::Get, "balloon", None) => parse_get_balloon(path_tokens), (Method::Get, "version", None) => parse_get_version(), (Method::Get, "vm", None) if path_tokens.next() == Some("config") => { Ok(ParsedRequest::new_sync(VmmAction::GetFullVmConfig)) @@ -104,7 +104,7 @@ impl TryFrom<&Request> for ParsedRequest { (Method::Put, "vsock", Some(body)) => parse_put_vsock(body), (Method::Put, "entropy", Some(body)) => parse_put_entropy(body), (Method::Put, _, None) => method_to_error(Method::Put), - (Method::Patch, "balloon", Some(body)) => parse_patch_balloon(body, path_tokens.next()), + (Method::Patch, "balloon", body) => parse_patch_balloon(body, path_tokens), (Method::Patch, "drives", Some(body)) => parse_patch_drive(body, path_tokens.next()), (Method::Patch, "machine-config", Some(body)) => parse_patch_machine_config(body), (Method::Patch, "mmds", Some(body)) => parse_patch_mmds(body), @@ -175,6 +175,9 @@ impl ParsedRequest { Self::success_response_with_data(balloon_config) } VmmData::BalloonStats(stats) => Self::success_response_with_data(stats), + VmmData::HintingStatus(hinting_status) => { + Self::success_response_with_data(hinting_status) + } VmmData::InstanceInformation(info) => Self::success_response_with_data(info), VmmData::VmmVersion(version) => Self::success_response_with_data( &serde_json::json!({ "firecracker_version": version.as_str() }), @@ -474,6 +477,17 @@ pub mod tests { &parsed_request, Err(RequestError::Generic(StatusCode::BadRequest, s)) if s == "Empty PATCH request.", )); + + sender + .write_all(http_request("PATCH", "/balloon", None).as_bytes()) + .unwrap(); + connection.try_read().unwrap(); + let req = connection.pop_parsed_request().unwrap(); + let parsed_request = ParsedRequest::try_from(&req); + assert!(matches!( + &parsed_request, + Err(RequestError::Generic(StatusCode::BadRequest, s)) if s == "Empty PATCH request.", + )); } #[test] @@ -559,6 +573,9 @@ pub mod tests { VmmData::BalloonStats(stats) => { http_response(&serde_json::to_string(stats).unwrap(), 200) } + VmmData::HintingStatus(status) => { + http_response(&serde_json::to_string(status).unwrap(), 200) + } VmmData::Empty => http_response("", 204), VmmData::FullVmConfig(cfg) => { http_response(&serde_json::to_string(cfg).unwrap(), 200) @@ -642,6 +659,18 @@ pub mod tests { ParsedRequest::try_from(&req).unwrap(); } + #[test] + fn test_try_from_get_balloon_hinting() { + let (mut sender, receiver) = UnixStream::pair().unwrap(); + let mut connection = HttpConnection::new(receiver); + sender + .write_all(http_request("GET", "/balloon/hinting/status", None).as_bytes()) + .unwrap(); + connection.try_read().unwrap(); + let req = connection.pop_parsed_request().unwrap(); + ParsedRequest::try_from(&req).unwrap(); + } + #[test] fn test_try_from_get_machine_config() { let (mut sender, receiver) = UnixStream::pair().unwrap(); @@ -910,6 +939,7 @@ pub mod tests { connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); ParsedRequest::try_from(&req).unwrap(); + let body = "{ \"stats_polling_interval_s\": 1 }"; sender .write_all(http_request("PATCH", "/balloon/statistics", Some(body)).as_bytes()) @@ -917,6 +947,44 @@ pub mod tests { connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); ParsedRequest::try_from(&req).unwrap(); + + let body = "{ \"acknowledge_on_stop\": true }"; + sender + .write_all(http_request("PATCH", "/balloon/hinting/start", Some(body)).as_bytes()) + .unwrap(); + connection.try_read().unwrap(); + let req = connection.pop_parsed_request().unwrap(); + ParsedRequest::try_from(&req).unwrap(); + + let body = "{}"; + sender + .write_all(http_request("PATCH", "/balloon/hinting/start", Some(body)).as_bytes()) + .unwrap(); + connection.try_read().unwrap(); + let req = connection.pop_parsed_request().unwrap(); + ParsedRequest::try_from(&req).unwrap(); + + sender + .write_all(http_request("PATCH", "/balloon/hinting/start", None).as_bytes()) + .unwrap(); + connection.try_read().unwrap(); + let req = connection.pop_parsed_request().unwrap(); + ParsedRequest::try_from(&req).unwrap(); + + let body = ""; + sender + .write_all(http_request("PATCH", "/balloon/hinting/stop", Some(body)).as_bytes()) + .unwrap(); + connection.try_read().unwrap(); + let req = connection.pop_parsed_request().unwrap(); + ParsedRequest::try_from(&req).unwrap(); + + sender + .write_all(http_request("PATCH", "/balloon/hinting/stop", None).as_bytes()) + .unwrap(); + connection.try_read().unwrap(); + let req = connection.pop_parsed_request().unwrap(); + ParsedRequest::try_from(&req).unwrap(); } #[test] diff --git a/src/firecracker/src/api_server/request/balloon.rs b/src/firecracker/src/api_server/request/balloon.rs index 13db502869e..b51aecf2d69 100644 --- a/src/firecracker/src/api_server/request/balloon.rs +++ b/src/firecracker/src/api_server/request/balloon.rs @@ -1,7 +1,7 @@ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 -use micro_http::StatusCode; +use micro_http::{Method, StatusCode}; use vmm::rpc_interface::VmmAction; use vmm::vmm_config::balloon::{ BalloonDeviceConfig, BalloonUpdateConfig, BalloonUpdateStatsConfig, @@ -9,18 +9,36 @@ use vmm::vmm_config::balloon::{ use super::super::parsed_request::{ParsedRequest, RequestError}; use super::Body; +use crate::api_server::parsed_request::method_to_error; -pub(crate) fn parse_get_balloon( - path_second_token: Option<&str>, -) -> Result { - match path_second_token { - Some(stats_path) => match stats_path { - "statistics" => Ok(ParsedRequest::new_sync(VmmAction::GetBalloonStats)), - _ => Err(RequestError::Generic( - StatusCode::BadRequest, - format!("Unrecognized GET request path `{}`.", stats_path), - )), - }, +fn parse_get_hinting<'a, T>(mut path_tokens: T) -> Result +where + T: Iterator, +{ + match path_tokens.next() { + Some("status") => Ok(ParsedRequest::new_sync(VmmAction::GetFreePageHintingStatus)), + Some(stats_path) => Err(RequestError::Generic( + StatusCode::BadRequest, + format!("Unrecognized GET request path `/hinting/{stats_path}`."), + )), + None => Err(RequestError::Generic( + StatusCode::BadRequest, + "Unrecognized GET request path `/hinting/`.".to_string(), + )), + } +} + +pub(crate) fn parse_get_balloon<'a, T>(mut path_tokens: T) -> Result +where + T: Iterator, +{ + match path_tokens.next() { + Some("statistics") => Ok(ParsedRequest::new_sync(VmmAction::GetBalloonStats)), + Some("hinting") => parse_get_hinting(path_tokens), + Some(stats_path) => Err(RequestError::Generic( + StatusCode::BadRequest, + format!("Unrecognized GET request path `{}`.", stats_path), + )), None => Ok(ParsedRequest::new_sync(VmmAction::GetBalloonConfig)), } } @@ -31,23 +49,55 @@ pub(crate) fn parse_put_balloon(body: &Body) -> Result, -) -> Result { - match path_second_token { - Some(config_path) => match config_path { - "statistics" => Ok(ParsedRequest::new_sync(VmmAction::UpdateBalloonStatistics( +fn parse_patch_hinting<'a, T>( + body: Option<&Body>, + mut path_tokens: T, +) -> Result +where + T: Iterator, +{ + match path_tokens.next() { + Some("start") => { + let cmd = match body { + None => Default::default(), + Some(b) if b.is_empty() => Default::default(), + Some(b) => serde_json::from_slice(b.raw())?, + }; + + Ok(ParsedRequest::new_sync(VmmAction::StartFreePageHinting( + cmd, + ))) + } + Some("stop") => Ok(ParsedRequest::new_sync(VmmAction::StopFreePageHinting)), + Some(stats_path) => Err(RequestError::Generic( + StatusCode::BadRequest, + format!("Unrecognized PATCH request path `/hinting/{stats_path}`."), + )), + None => Err(RequestError::Generic( + StatusCode::BadRequest, + "Unrecognized PATCH request path `/hinting/`.".to_string(), + )), + } +} + +pub(crate) fn parse_patch_balloon<'a, T>( + body: Option<&Body>, + mut path_tokens: T, +) -> Result +where + T: Iterator, +{ + match (path_tokens.next(), body) { + (Some("statistics"), Some(body)) => { + Ok(ParsedRequest::new_sync(VmmAction::UpdateBalloonStatistics( serde_json::from_slice::(body.raw())?, - ))), - _ => Err(RequestError::Generic( - StatusCode::BadRequest, - format!("Unrecognized PATCH request path `{}`.", config_path), - )), - }, - None => Ok(ParsedRequest::new_sync(VmmAction::UpdateBalloon( + ))) + } + (Some("hinting"), body) => parse_patch_hinting(body, path_tokens), + (_, Some(body)) => Ok(ParsedRequest::new_sync(VmmAction::UpdateBalloon( serde_json::from_slice::(body.raw())?, ))), + (_, None) => method_to_error(Method::Patch), } } @@ -58,30 +108,33 @@ mod tests { #[test] fn test_parse_get_balloon_request() { - parse_get_balloon(None).unwrap(); + parse_get_balloon([].into_iter()).unwrap(); - parse_get_balloon(Some("unrelated")).unwrap_err(); + parse_get_balloon(["unrelated"].into_iter()).unwrap_err(); - parse_get_balloon(Some("statistics")).unwrap(); + parse_get_balloon(["statistics"].into_iter()).unwrap(); + + parse_get_balloon(["hinting", "status"].into_iter()).unwrap(); + parse_get_balloon(["hinting"].into_iter()).unwrap_err(); } #[test] fn test_parse_patch_balloon_request() { - parse_patch_balloon(&Body::new("invalid_payload"), None).unwrap_err(); + parse_patch_balloon(Some(&Body::new("invalid_payload")), [].into_iter()).unwrap_err(); // PATCH with invalid fields. let body = r#"{ "amount_mib": "bar", "foo": "bar" }"#; - parse_patch_balloon(&Body::new(body), None).unwrap_err(); + parse_patch_balloon(Some(&Body::new(body)), [].into_iter()).unwrap_err(); // PATCH with invalid types on fields. Adding a polling interval as string instead of bool. let body = r#"{ "amount_mib": 1000, "stats_polling_interval_s": "false" }"#; - let res = parse_patch_balloon(&Body::new(body), None); + let res = parse_patch_balloon(Some(&Body::new(body)), [].into_iter()); res.unwrap_err(); // PATCH with invalid types on fields. Adding a amount_mib as a negative number. @@ -89,21 +142,21 @@ mod tests { "amount_mib": -1000, "stats_polling_interval_s": true }"#; - let res = parse_patch_balloon(&Body::new(body), None); + let res = parse_patch_balloon(Some(&Body::new(body)), [].into_iter()); res.unwrap_err(); // PATCH on statistics with missing ppolling interval field. let body = r#"{ "amount_mib": 100 }"#; - let res = parse_patch_balloon(&Body::new(body), Some("statistics")); + let res = parse_patch_balloon(Some(&Body::new(body)), ["statistics"].into_iter()); res.unwrap_err(); // PATCH with missing amount_mib field. let body = r#"{ "stats_polling_interval_s": 0 }"#; - let res = parse_patch_balloon(&Body::new(body), None); + let res = parse_patch_balloon(Some(&Body::new(body)), [].into_iter()); res.unwrap_err(); // PATCH that tries to update something else other than allowed fields. @@ -111,27 +164,29 @@ mod tests { "amount_mib": "dummy_id", "stats_polling_interval_s": "dummy_host" }"#; - let res = parse_patch_balloon(&Body::new(body), None); + let res = parse_patch_balloon(Some(&Body::new(body)), [].into_iter()); res.unwrap_err(); // PATCH with payload that is not a json. let body = r#"{ "fields": "dummy_field" }"#; - parse_patch_balloon(&Body::new(body), None).unwrap_err(); + parse_patch_balloon(Some(&Body::new(body)), [].into_iter()).unwrap_err(); // PATCH on unrecognized path. let body = r#"{ "fields": "dummy_field" }"#; - parse_patch_balloon(&Body::new(body), Some("config")).unwrap_err(); + parse_patch_balloon(Some(&Body::new(body)), ["config"].into_iter()).unwrap_err(); let body = r#"{ "amount_mib": 1 }"#; let expected_config = BalloonUpdateConfig { amount_mib: 1 }; assert_eq!( - vmm_action_from_request(parse_patch_balloon(&Body::new(body), None).unwrap()), + vmm_action_from_request( + parse_patch_balloon(Some(&Body::new(body)), [].into_iter()).unwrap() + ), VmmAction::UpdateBalloon(expected_config) ); @@ -143,10 +198,44 @@ mod tests { }; assert_eq!( vmm_action_from_request( - parse_patch_balloon(&Body::new(body), Some("statistics")).unwrap() + parse_patch_balloon(Some(&Body::new(body)), ["statistics"].into_iter()).unwrap() ), VmmAction::UpdateBalloonStatistics(expected_config) ); + + // PATCH start hinting run valid data + let body = r#"{ + "acknowledge_on_stop": true + }"#; + parse_patch_balloon(Some(&Body::new(body)), ["hinting", "start"].into_iter()).unwrap(); + + // PATCH start hinting run no body + parse_patch_balloon(Some(&Body::new("")), ["hinting", "start"].into_iter()).unwrap(); + + // PATCH start hinting run invalid data + let body = r#"{ + "acknowledge_on_stop": "not valid" + }"#; + parse_patch_balloon(Some(&Body::new(body)), ["hinting", "start"].into_iter()).unwrap_err(); + + // PATCH start hinting run no body + parse_patch_balloon(Some(&Body::new(body)), ["hinting", "start"].into_iter()).unwrap_err(); + + // PATCH stop hinting run + parse_patch_balloon(Some(&Body::new("")), ["hinting", "stop"].into_iter()).unwrap(); + + // PATCH stop hinting run + parse_patch_balloon(None, ["hinting", "stop"].into_iter()).unwrap(); + + // PATCH stop hinting invalid path + parse_patch_balloon(Some(&Body::new("")), ["hinting"].into_iter()).unwrap_err(); + + // PATCH stop hinting invalid path + parse_patch_balloon(Some(&Body::new("")), ["hinting", "other path"].into_iter()) + .unwrap_err(); + + // PATCH no body non hinting + parse_patch_balloon(None, ["hinting"].into_iter()).unwrap_err(); } #[test] @@ -160,12 +249,30 @@ mod tests { }"#; parse_put_balloon(&Body::new(body)).unwrap_err(); - // PUT with valid input fields. + // PUT with valid input fields. Hinting reporting missing let body = r#"{ "amount_mib": 1000, "deflate_on_oom": true, "stats_polling_interval_s": 0 }"#; parse_put_balloon(&Body::new(body)).unwrap(); + + // PUT with valid input hinting + let body = r#"{ + "amount_mib": 1000, + "deflate_on_oom": true, + "stats_polling_interval_s": 0, + "free_page_hinting": true + }"#; + parse_put_balloon(&Body::new(body)).unwrap(); + + // PUT with valid reporting + let body = r#"{ + "amount_mib": 1000, + "deflate_on_oom": true, + "stats_polling_interval_s": 0, + "free_page_reporting": true + }"#; + parse_put_balloon(&Body::new(body)).unwrap(); } } diff --git a/src/firecracker/swagger/firecracker.yaml b/src/firecracker/swagger/firecracker.yaml index 5bf55108b09..b0ad8bf8754 100644 --- a/src/firecracker/swagger/firecracker.yaml +++ b/src/firecracker/swagger/firecracker.yaml @@ -169,6 +169,63 @@ paths: schema: $ref: "#/definitions/Error" + /balloon/hinting/start: + patch: + summary: Starts a free page hinting run only if enabled pre-boot. + operationId: startBalloonHinting + parameters: + - name: body + in: body + description: When the device completes the hinting whether we shoud automatically ack this. + required: false + schema: + $ref: "#/definitions/BootSource" + responses: + 200: + description: Free page hinting run started. + 400: + description: The balloon free hinting was not enabled when the device was configured. + schema: + $ref: "#/definitions/Error" + default: + description: Internal Server Error + schema: + $ref: "#/definitions/Error" + + /balloon/hinting/status: + get: + summary: Returns the balloon hinting statistics, only if enabled pre-boot. + operationId: describeBalloonHinting + responses: + 200: + description: The balloon free page hinting statistics + schema: + $ref: "#/definitions/BalloonHintingStatus" + 400: + description: The balloon free hinting was not enabled when the device was configured. + schema: + $ref: "#/definitions/Error" + default: + description: Internal Server Error + schema: + $ref: "#/definitions/Error" + + /balloon/hinting/stop: + patch: + summary: Stops a free page hinting run only if enabled pre-boot. + operationId: stopBalloonHinting + responses: + 200: + description: Free page hinting run stopped. + 400: + description: The balloon free hinting was not enabled when the device was configured. + schema: + $ref: "#/definitions/Error" + default: + description: Internal Server Error + schema: + $ref: "#/definitions/Error" + /boot-source: put: summary: Creates or updates the boot source. Pre-boot only. @@ -182,7 +239,7 @@ paths: description: Guest boot source properties required: true schema: - $ref: "#/definitions/BootSource" + $ref: "#/definitions/BalloonStartCmd" responses: 204: description: Boot source created/updated @@ -847,6 +904,29 @@ definitions: type: integer format: int64 + BalloonStartCmd: + type: object + description: + Command used to start a free page hinting run. + properties: + acknowledge_on_stop: + description: If Firecracker should automatically acknowledge when the guest submits a done cmd. + type: boolean + + BalloonHintingStatus: + type: object + description: + Describes the free page hinting status. + required: + - host_cmd + properties: + host_cmd: + description: The last command issued by the host. + type: integer + guest_cmd: + description: The last command provided by the guest. + type: integer + BalloonStatsUpdate: type: object required: diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs index ace273bb94c..70446b58a5b 100644 --- a/src/vmm/src/lib.rs +++ b/src/vmm/src/lib.rs @@ -136,6 +136,7 @@ use vstate::kvm::Kvm; use vstate::vcpu::{self, StartThreadedError, VcpuSendEventError}; use crate::cpu_config::templates::CpuConfiguration; +use crate::devices::virtio::balloon::device::{HintingStatus, StartHintingCmd}; use crate::devices::virtio::balloon::{BALLOON_DEV_ID, Balloon, BalloonConfig, BalloonStats}; use crate::devices::virtio::block::device::Block; use crate::devices::virtio::net::Net; @@ -597,6 +598,31 @@ impl Vmm { .map_err(VmmError::FindDeviceError) } + /// Starts the balloon free page hinting run + pub fn start_balloon_hinting(&mut self, cmd: StartHintingCmd) -> Result<(), VmmError> { + self.device_manager + .try_with_virtio_device_with_id(BALLOON_DEV_ID, |dev: &mut Balloon| { + dev.start_hinting(cmd) + }) + .map_err(VmmError::FindDeviceError) + } + + /// Retrieves the status of the balloon hinting run + pub fn get_balloon_hinting_status(&mut self) -> Result { + self.device_manager + .try_with_virtio_device_with_id(BALLOON_DEV_ID, |dev: &mut Balloon| { + dev.get_hinting_status() + }) + .map_err(VmmError::FindDeviceError) + } + + /// Stops the balloon free page hinting run + pub fn stop_balloon_hinting(&mut self) -> Result<(), VmmError> { + self.device_manager + .try_with_virtio_device_with_id(BALLOON_DEV_ID, |dev: &mut Balloon| dev.stop_hinting()) + .map_err(VmmError::FindDeviceError) + } + /// Signals Vmm to stop and exit. pub fn stop(&mut self, exit_code: FcExitCode) { // To avoid cycles, all teardown paths take the following route: diff --git a/src/vmm/src/rpc_interface.rs b/src/vmm/src/rpc_interface.rs index 6bae98f3546..740d8ccdd7f 100644 --- a/src/vmm/src/rpc_interface.rs +++ b/src/vmm/src/rpc_interface.rs @@ -14,6 +14,7 @@ use super::{Vmm, VmmError}; use crate::EventManager; use crate::builder::StartMicrovmError; use crate::cpu_config::templates::{CustomCpuTemplate, GuestConfigError}; +use crate::devices::virtio::balloon::device::{HintingStatus, StartHintingCmd}; use crate::logger::{LoggerConfig, info, warn, *}; use crate::mmds::data_store::{self, Mmds}; use crate::persist::{CreateSnapshotError, RestoreFromSnapshotError, VmInfo}; @@ -119,6 +120,12 @@ pub enum VmmAction { UpdateBalloon(BalloonUpdateConfig), /// Update the balloon statistics polling interval, after microVM start. UpdateBalloonStatistics(BalloonUpdateStatsConfig), + /// Start a free page hinting run + StartFreePageHinting(StartHintingCmd), + /// Retrieve the status of the hinting run + GetFreePageHintingStatus, + /// Stops a free page hinting run + StopFreePageHinting, /// Update existing block device properties such as `path_on_host` or `rate_limiter`. UpdateBlockDevice(BlockDeviceUpdateConfig), /// Update a network interface, after microVM start. Currently, the only updatable properties @@ -201,6 +208,8 @@ pub enum VmmData { InstanceInformation(InstanceInfo), /// The microVM version. VmmVersion(String), + /// The status of the virtio-balloon hinting run + HintingStatus(HintingStatus), } /// Trait used for deduplicating the MMDS request handling across the two ApiControllers. @@ -462,7 +471,10 @@ impl<'a> PrebootApiController<'a> { | UpdateBalloon(_) | UpdateBalloonStatistics(_) | UpdateBlockDevice(_) - | UpdateNetworkInterface(_) => Err(VmmActionError::OperationNotSupportedPreBoot), + | UpdateNetworkInterface(_) + | StartFreePageHinting(_) + | GetFreePageHintingStatus + | StopFreePageHinting => Err(VmmActionError::OperationNotSupportedPreBoot), #[cfg(target_arch = "x86_64")] SendCtrlAltDel => Err(VmmActionError::OperationNotSupportedPreBoot), } @@ -692,6 +704,27 @@ impl RuntimeApiController { .update_balloon_stats_config(balloon_stats_update.stats_polling_interval_s) .map(|_| VmmData::Empty) .map_err(VmmActionError::BalloonUpdate), + StartFreePageHinting(cmd) => self + .vmm + .lock() + .expect("Poisoned lock") + .start_balloon_hinting(cmd) + .map(|_| VmmData::Empty) + .map_err(VmmActionError::BalloonUpdate), + GetFreePageHintingStatus => self + .vmm + .lock() + .expect("Poisoned lock") + .get_balloon_hinting_status() + .map(VmmData::HintingStatus) + .map_err(VmmActionError::BalloonUpdate), + StopFreePageHinting => self + .vmm + .lock() + .expect("Poisoned lock") + .stop_balloon_hinting() + .map(|_| VmmData::Empty) + .map_err(VmmActionError::BalloonUpdate), UpdateBlockDevice(new_cfg) => self.update_block_device(new_cfg), UpdateNetworkInterface(netif_update) => self.update_net_rate_limiters(netif_update), From 90621d3c4d24d59fa2b35e9e6d38aa144659352c Mon Sep 17 00:00:00 2001 From: Jack Thomson Date: Thu, 23 Oct 2025 15:20:08 +0000 Subject: [PATCH 4/8] virtio-balloon: Add metrics for hinting/reporting Add metrics to track free page hinting and reporting. For both devices track the number of ranges reported, the number of errors encountered while freeing and the total amount of memory freed. Signed-off-by: Jack Thomson --- src/vmm/src/devices/virtio/balloon/device.rs | 8 ++++++++ src/vmm/src/devices/virtio/balloon/metrics.rs | 18 ++++++++++++++++++ tests/host_tools/fcmetrics.py | 6 ++++++ 3 files changed, 32 insertions(+) diff --git a/src/vmm/src/devices/virtio/balloon/device.rs b/src/vmm/src/devices/virtio/balloon/device.rs index ca72791ae9a..0d5ea098312 100644 --- a/src/vmm/src/devices/virtio/balloon/device.rs +++ b/src/vmm/src/devices/virtio/balloon/device.rs @@ -549,8 +549,12 @@ impl Balloon { continue; } + METRICS.free_page_hint_count.inc(); if let Err(err) = mem.discard_range(desc.addr, desc.len as usize) { + METRICS.free_page_hint_fails.inc(); error!("balloon hinting: failed to remove range: {err:?}"); + } else { + METRICS.free_page_hint_freed.add(desc.len as u64); } } @@ -587,8 +591,12 @@ impl Balloon { let mut last_desc = Some(head); while let Some(desc) = last_desc { + METRICS.free_page_report_count.inc(); if let Err(err) = mem.discard_range(desc.addr, desc.len as usize) { + METRICS.free_page_report_fails.inc(); error!("balloon: failed to remove range: {err:?}"); + } else { + METRICS.free_page_report_freed.add(desc.len as u64); } last_desc = desc.next_descriptor(); } diff --git a/src/vmm/src/devices/virtio/balloon/metrics.rs b/src/vmm/src/devices/virtio/balloon/metrics.rs index 0b438cae2d4..66e33ac1799 100644 --- a/src/vmm/src/devices/virtio/balloon/metrics.rs +++ b/src/vmm/src/devices/virtio/balloon/metrics.rs @@ -63,6 +63,18 @@ pub(super) struct BalloonDeviceMetrics { pub deflate_count: SharedIncMetric, /// Number of times when handling events on a balloon device failed. pub event_fails: SharedIncMetric, + /// Number of times when free page repoting was triggered + pub free_page_report_count: SharedIncMetric, + /// Total memory freed by the reporting driver + pub free_page_report_freed: SharedIncMetric, + /// Number of errors occurred while reporting + pub free_page_report_fails: SharedIncMetric, + /// Number of times when free page hinting was triggered + pub free_page_hint_count: SharedIncMetric, + /// Total memory freed by the hinting driver + pub free_page_hint_freed: SharedIncMetric, + /// Number of errors occurred while hinting + pub free_page_hint_fails: SharedIncMetric, } impl BalloonDeviceMetrics { /// Const default construction. @@ -74,6 +86,12 @@ impl BalloonDeviceMetrics { stats_update_fails: SharedIncMetric::new(), deflate_count: SharedIncMetric::new(), event_fails: SharedIncMetric::new(), + free_page_report_count: SharedIncMetric::new(), + free_page_report_freed: SharedIncMetric::new(), + free_page_report_fails: SharedIncMetric::new(), + free_page_hint_count: SharedIncMetric::new(), + free_page_hint_freed: SharedIncMetric::new(), + free_page_hint_fails: SharedIncMetric::new(), } } } diff --git a/tests/host_tools/fcmetrics.py b/tests/host_tools/fcmetrics.py index 5b1343ffab7..df15ebbbdc1 100644 --- a/tests/host_tools/fcmetrics.py +++ b/tests/host_tools/fcmetrics.py @@ -140,6 +140,12 @@ def validate_fc_metrics(metrics): "stats_update_fails", "deflate_count", "event_fails", + "free_page_report_count", + "free_page_report_freed", + "free_page_report_fails", + "free_page_hint_count", + "free_page_hint_freed", + "free_page_hint_fails", ], "block": block_metrics, "deprecated_api": [ From 36eafae3950a525b8105369ee50078d0f2f4ad2d Mon Sep 17 00:00:00 2001 From: Jack Thomson Date: Thu, 23 Oct 2025 11:18:44 +0000 Subject: [PATCH 5/8] test: Add API hinting endpoints Adding new resources to the http api to enable testing of the hinting functionality. Signed-off-by: Jack Thomson --- src/firecracker/src/api_server/parsed_request.rs | 4 ++++ src/firecracker/src/api_server/request/balloon.rs | 1 + src/vmm/src/rpc_interface.rs | 5 +++++ tests/framework/http_api.py | 3 +++ 4 files changed, 13 insertions(+) diff --git a/src/firecracker/src/api_server/parsed_request.rs b/src/firecracker/src/api_server/parsed_request.rs index 6f6474fb828..b08b0e4ef05 100644 --- a/src/firecracker/src/api_server/parsed_request.rs +++ b/src/firecracker/src/api_server/parsed_request.rs @@ -328,6 +328,7 @@ pub mod tests { use micro_http::HttpConnection; use vmm::builder::StartMicrovmError; use vmm::cpu_config::templates::test_utils::build_test_template; + use vmm::devices::virtio::balloon::device::HintingStatus; use vmm::resources::VmmConfig; use vmm::rpc_interface::VmmActionError; use vmm::vmm_config::balloon::{BalloonDeviceConfig, BalloonStats}; @@ -605,6 +606,9 @@ pub mod tests { swap_out: Some(1), ..Default::default() })); + verify_ok_response_with(VmmData::HintingStatus(HintingStatus { + ..Default::default() + })); verify_ok_response_with(VmmData::Empty); verify_ok_response_with(VmmData::FullVmConfig(VmmConfig::default())); verify_ok_response_with(VmmData::MachineConfiguration(MachineConfig::default())); diff --git a/src/firecracker/src/api_server/request/balloon.rs b/src/firecracker/src/api_server/request/balloon.rs index b51aecf2d69..4093a0e7547 100644 --- a/src/firecracker/src/api_server/request/balloon.rs +++ b/src/firecracker/src/api_server/request/balloon.rs @@ -115,6 +115,7 @@ mod tests { parse_get_balloon(["statistics"].into_iter()).unwrap(); parse_get_balloon(["hinting", "status"].into_iter()).unwrap(); + parse_get_balloon(["hinting", "unrelated"].into_iter()).unwrap_err(); parse_get_balloon(["hinting"].into_iter()).unwrap_err(); } diff --git a/src/vmm/src/rpc_interface.rs b/src/vmm/src/rpc_interface.rs index 740d8ccdd7f..d10414494f2 100644 --- a/src/vmm/src/rpc_interface.rs +++ b/src/vmm/src/rpc_interface.rs @@ -1175,6 +1175,11 @@ mod tests { check_unsupported(preboot_request(VmmAction::UpdateBalloon( BalloonUpdateConfig { amount_mib: 0 }, ))); + check_unsupported(preboot_request(VmmAction::StartFreePageHinting( + Default::default(), + ))); + check_unsupported(preboot_request(VmmAction::GetFreePageHintingStatus)); + check_unsupported(preboot_request(VmmAction::StopFreePageHinting)); check_unsupported(preboot_request(VmmAction::UpdateBalloonStatistics( BalloonUpdateStatsConfig { stats_polling_interval_s: 0, diff --git a/tests/framework/http_api.py b/tests/framework/http_api.py index 0ae2e279571..68e75809568 100644 --- a/tests/framework/http_api.py +++ b/tests/framework/http_api.py @@ -127,6 +127,9 @@ def __init__(self, api_usocket_full_name, *, on_error=None): self.mmds_config = Resource(self, "/mmds/config") self.balloon = Resource(self, "/balloon") self.balloon_stats = Resource(self, "/balloon/statistics") + self.balloon_hinting_start = Resource(self, "/balloon/hinting/start") + self.balloon_hinting_status = Resource(self, "/balloon/hinting/status") + self.balloon_hinting_stop = Resource(self, "/balloon/hinting/stop") self.vsock = Resource(self, "/vsock") self.snapshot_create = Resource(self, "/snapshot/create") self.snapshot_load = Resource(self, "/snapshot/load") From fd889290ef71aa54e92e5a603f9ab09fbb59e098 Mon Sep 17 00:00:00 2001 From: Jack Thomson Date: Thu, 23 Oct 2025 14:41:49 +0000 Subject: [PATCH 6/8] test: Add integration tests for hinting/reporting Add integration tests for free page hinting and reporting, both functional and performance tests. Update fast_page_helper so it can run in a oneshot mode, not requiring the signal to track the performance. New functional tests to ensure that hinting and reporting are reducing the RSS as expected in the guest. Updated reduce RSS test to touch memory to reduce the chance of flakiness. New performance tests for the balloon device. First being a test to track the CPU overhead of hinting and reporting. Second being a test to measure the faulting latency while reporting is running in the guest. Signed-off-by: Jack Thomson --- .../usr/local/bin/fast_page_fault_helper.c | 44 +++-- .../functional/test_balloon.py | 160 ++++++++++++++++-- .../performance/test_balloon.py | 149 ++++++++++++++++ 3 files changed, 327 insertions(+), 26 deletions(-) create mode 100644 tests/integration_tests/performance/test_balloon.py diff --git a/resources/overlay/usr/local/bin/fast_page_fault_helper.c b/resources/overlay/usr/local/bin/fast_page_fault_helper.c index 7558f7b09fc..a4b2cfed929 100644 --- a/resources/overlay/usr/local/bin/fast_page_fault_helper.c +++ b/resources/overlay/usr/local/bin/fast_page_fault_helper.c @@ -16,6 +16,7 @@ #include // mmap #include // clock_gettime #include // open +#include // getopt #define MEM_SIZE_MIB (128 * 1024 * 1024) #define NANOS_PER_SEC 1000000000 @@ -30,20 +31,39 @@ void touch_memory(void *mem, size_t size, char val) { int main() { sigset_t set; - int signal; + int signal, character; void *ptr; struct timespec start, end; long duration_nanos; FILE *out_file; - sigemptyset(&set); - if (sigaddset(&set, SIGUSR1) == -1) { - perror("sigaddset"); - return 1; + char *options = 0; + int longindex = 0; + int signal_wait = 1; + + struct option longopts[] = { + {"nosignal", no_argument, NULL, 's'}, + {NULL, 0, NULL, 0} + }; + + while((character = getopt_long(argc, argv, "s", longopts, &longindex)) != -1) { + switch (character) { + case 's': + signal_wait = 0; + break; + } } - if (sigprocmask(SIG_BLOCK, &set, NULL) == -1) { - perror("sigprocmask"); - return 1; + + if (signal_wait) { + sigemptyset(&set); + if (sigaddset(&set, SIGUSR1) == -1) { + perror("sigaddset"); + return 1; + } + if (sigprocmask(SIG_BLOCK, &set, NULL) == -1) { + perror("sigprocmask"); + return 1; + } } ptr = mmap(NULL, MEM_SIZE_MIB, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); @@ -53,9 +73,11 @@ int main() { return 1; } - touch_memory(ptr, MEM_SIZE_MIB, 1); + if (signal_wait) { + touch_memory(ptr, MEM_SIZE_MIB, 1); - sigwait(&set, &signal); + sigwait(&set, &signal); + } clock_gettime(CLOCK_BOOTTIME, &start); touch_memory(ptr, MEM_SIZE_MIB, 2); @@ -76,4 +98,4 @@ int main() { } return 0; -} \ No newline at end of file +} diff --git a/tests/integration_tests/functional/test_balloon.py b/tests/integration_tests/functional/test_balloon.py index f8960bedb6d..7dbf86e3c0a 100644 --- a/tests/integration_tests/functional/test_balloon.py +++ b/tests/integration_tests/functional/test_balloon.py @@ -3,6 +3,7 @@ """Tests for guest-side operations on /balloon resources.""" import logging +import signal import time from subprocess import TimeoutExpired @@ -293,7 +294,8 @@ def test_reinflate_balloon(uvm_plain_any): # pylint: disable=C0103 -def test_size_reduction(uvm_plain_any): +@pytest.mark.parametrize("method", ["traditional", "hinting", "reporting"]) +def test_size_reduction(uvm_plain_any, method): """ Verify that ballooning reduces RSS usage on a newly booted guest. """ @@ -302,21 +304,42 @@ def test_size_reduction(uvm_plain_any): test_microvm.basic_config() test_microvm.add_net_iface() + traditional_balloon = method == "traditional" + free_page_reporting = method == "reporting" + free_page_hinting = method == "hinting" + # Add a memory balloon. test_microvm.api.balloon.put( - amount_mib=0, deflate_on_oom=True, stats_polling_interval_s=0 + amount_mib=0, + deflate_on_oom=True, + stats_polling_interval_s=0, + free_page_reporting=free_page_reporting, + free_page_hinting=free_page_hinting, ) # Start the microvm. test_microvm.start() firecracker_pid = test_microvm.firecracker_pid - # Check memory usage. + get_stable_rss_mem_by_pid(firecracker_pid) + + test_microvm.ssh.check_output( + "nohup /usr/local/bin/fast_page_fault_helper >/dev/null 2>&1 /proc/sys/vm/drop_caches") - time.sleep(5) + time.sleep(2) # We take the initial reading of the RSS, then calculate the amount # we need to inflate the balloon with by subtracting it from the @@ -324,8 +347,17 @@ def test_size_reduction(uvm_plain_any): # get a lower reading than the initial one. inflate_size = 256 - int(first_reading / 1024) + 10 - # Now inflate the balloon. - test_microvm.api.balloon.patch(amount_mib=inflate_size) + if traditional_balloon: + # Now inflate the balloon + test_microvm.api.balloon.patch(amount_mib=inflate_size) + elif free_page_hinting: + test_microvm.api.balloon_hinting_start.patch() + + _ = get_stable_rss_mem_by_pid(firecracker_pid) + + if traditional_balloon: + # Deflate the balloon completely. + test_microvm.api.balloon.patch(amount_mib=0) # Check memory usage again. second_reading = get_stable_rss_mem_by_pid(firecracker_pid) @@ -534,7 +566,92 @@ def test_balloon_snapshot(uvm_plain_any, microvm_factory): assert stats_after_snap["available_memory"] > latest_stats["available_memory"] -def test_memory_scrub(uvm_plain_any): +@pytest.mark.parametrize("method", ["reporting", "hinting"]) +def test_hinting_reporting_snapshot(uvm_plain_any, microvm_factory, method): + """ + Test that the balloon hinting and reporting works after pause/resume. + """ + vm = uvm_plain_any + vm.spawn() + vm.basic_config( + vcpu_count=2, + mem_size_mib=256, + ) + vm.add_net_iface() + + free_page_reporting = method == "reporting" + free_page_hinting = method == "hinting" + + # Add a memory balloon with stats enabled. + vm.api.balloon.put( + amount_mib=0, + deflate_on_oom=True, + stats_polling_interval_s=STATS_POLLING_INTERVAL_S, + free_page_reporting=free_page_reporting, + free_page_hinting=free_page_hinting, + ) + + vm.start() + + vm.ssh.check_output( + "nohup /usr/local/bin/fast_page_fault_helper >/dev/null 2>&1 second_reading + + snapshot = vm.snapshot_full() + microvm = microvm_factory.build_from_snapshot(snapshot) + + firecracker_pid = microvm.firecracker_pid + + microvm.ssh.check_output( + "nohup /usr/local/bin/fast_page_fault_helper >/dev/null 2>&1 fourth_reading + + +@pytest.mark.parametrize("method", ["none", "hinting", "reporting"]) +def test_memory_scrub(uvm_plain_any, method): """ Test that the memory is zeroed after deflate. """ @@ -543,9 +660,16 @@ def test_memory_scrub(uvm_plain_any): microvm.basic_config(vcpu_count=2, mem_size_mib=256) microvm.add_net_iface() + free_page_reporting = method == "reporting" + free_page_hinting = method == "hinting" + # Add a memory balloon with stats enabled. microvm.api.balloon.put( - amount_mib=0, deflate_on_oom=True, stats_polling_interval_s=1 + amount_mib=0, + deflate_on_oom=True, + stats_polling_interval_s=1, + free_page_reporting=free_page_reporting, + free_page_hinting=free_page_hinting, ) microvm.start() @@ -553,8 +677,14 @@ def test_memory_scrub(uvm_plain_any): # Dirty 60MB of pages. make_guest_dirty_memory(microvm.ssh, amount_mib=60) - # Now inflate the balloon with 60MB of pages. - microvm.api.balloon.patch(amount_mib=60) + if method == "none": + # Now inflate the balloon with 60MB of pages. + microvm.api.balloon.patch(amount_mib=60) + elif method == "hinting": + time.sleep(1) + microvm.api.balloon_hinting_start.patch() + elif method == "reporting": + time.sleep(2) # Get the firecracker pid, and open an ssh connection. firecracker_pid = microvm.firecracker_pid @@ -562,10 +692,10 @@ def test_memory_scrub(uvm_plain_any): # Wait for the inflate to complete. _ = get_stable_rss_mem_by_pid(firecracker_pid) - # Deflate the balloon completely. - microvm.api.balloon.patch(amount_mib=0) - - # Wait for the deflate to complete. - _ = get_stable_rss_mem_by_pid(firecracker_pid) + if method == "none": + # Deflate the balloon completely. + microvm.api.balloon.patch(amount_mib=0) + # Wait for the deflate to complete. + _ = get_stable_rss_mem_by_pid(firecracker_pid) microvm.ssh.check_output("/usr/local/bin/readmem {} {}".format(60, 1)) diff --git a/tests/integration_tests/performance/test_balloon.py b/tests/integration_tests/performance/test_balloon.py new file mode 100644 index 00000000000..bca331671fb --- /dev/null +++ b/tests/integration_tests/performance/test_balloon.py @@ -0,0 +1,149 @@ +# Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 +"""Tests for guest-side operations on /balloon resources.""" + +import concurrent +import signal +import time + +import pytest + +from framework.utils import track_cpu_utilization + +NS_IN_MSEC = 1_000_000 + + +def trigger_page_fault_run(vm): + """ + Clears old data and starts the fast_page_fault_helper script + """ + vm.ssh.check_output( + "rm -f /tmp/fast_page_fault_helper.out && /usr/local/bin/fast_page_fault_helper -s" + ) + + +def get_page_fault_duration(vm): + """ + Waits for the performance data to be available and will read the duration + """ + _, duration, _ = vm.ssh.check_output( + "while [ ! -f /tmp/fast_page_fault_helper.out ]; do sleep 1; done; cat /tmp/fast_page_fault_helper.out" + ) + return duration + + +@pytest.mark.parametrize("method", ["reporting", "hinting"]) +@pytest.mark.nonci +def test_hinting_reporting_cpu( + microvm_factory, guest_kernel_linux_6_1, rootfs, method, metrics +): + """ + Measure the CPU usage when running free page reporting and hinting + """ + test_microvm = microvm_factory.build( + guest_kernel_linux_6_1, rootfs, pci=True, monitor_memory=False + ) + test_microvm.spawn(emit_metrics=False) + test_microvm.basic_config(vcpu_count=2, mem_size_mib=1024) + test_microvm.add_net_iface() + + free_page_reporting = method == "reporting" + free_page_hinting = method == "hinting" + # Add a deflated memory balloon. + test_microvm.api.balloon.put( + amount_mib=0, + deflate_on_oom=False, + stats_polling_interval_s=0, + free_page_reporting=free_page_reporting, + free_page_hinting=free_page_hinting, + ) + test_microvm.start() + test_microvm.pin_threads(0) + + metrics.set_dimensions( + { + "performance_test": "test_balloon_cpu", + # "huge_pages": str(huge_pages), + "method": method, + **test_microvm.dimensions, + } + ) + + test_microvm.ssh.check_output( + "nohup /usr/local/bin/fast_page_fault_helper >/dev/null 2>&1 0 and (i + 1 < runs): + time.sleep(sleep_duration) From 41ed063e88ca9bd800fb41382464c458be2214d8 Mon Sep 17 00:00:00 2001 From: Jack Thomson Date: Thu, 23 Oct 2025 15:14:21 +0000 Subject: [PATCH 7/8] test: Add unit tests for hint/reporting Add integration tests for free page hinting and reporting. Asserting the features are enabled correctly. Testing the config space updates triggered by hinting are being set as expected. Signed-off-by: Jack Thomson --- src/vmm/src/devices/virtio/balloon/device.rs | 386 ++++++++++++++++-- .../src/devices/virtio/balloon/test_utils.rs | 11 + src/vmm/src/devices/virtio/rng/device.rs | 2 +- src/vmm/src/devices/virtio/test_utils.rs | 52 ++- src/vmm/src/devices/virtio/transport/mmio.rs | 10 +- src/vmm/src/devices/virtio/transport/mod.rs | 4 + .../devices/virtio/transport/pci/device.rs | 5 + 7 files changed, 440 insertions(+), 30 deletions(-) diff --git a/src/vmm/src/devices/virtio/balloon/device.rs b/src/vmm/src/devices/virtio/balloon/device.rs index 0d5ea098312..3ef2b06e177 100644 --- a/src/vmm/src/devices/virtio/balloon/device.rs +++ b/src/vmm/src/devices/virtio/balloon/device.rs @@ -85,13 +85,21 @@ fn default_ack_on_stop() -> bool { } /// Command recieved from the API to start a hinting run -#[derive(Copy, Clone, Debug, Eq, PartialEq, Default, Deserialize)] +#[derive(Copy, Clone, Debug, Eq, PartialEq, Deserialize)] pub struct StartHintingCmd { /// If we should automatically acknowledge end of the run after stop. #[serde(default = "default_ack_on_stop")] pub acknowledge_on_stop: bool, } +impl Default for StartHintingCmd { + fn default() -> Self { + Self { + acknowledge_on_stop: true, + } + } +} + /// Returned to the API for get hinting status #[derive(Copy, Clone, Debug, Eq, PartialEq, Default, Serialize)] pub struct HintingStatus { @@ -948,18 +956,49 @@ impl VirtioDevice for Balloon { #[cfg(test)] pub(crate) mod tests { + use itertools::iproduct; + use super::super::BALLOON_CONFIG_SPACE_SIZE; use super::*; + use crate::arch::host_page_size; use crate::check_metric_after_block; use crate::devices::virtio::balloon::report_balloon_event_fail; use crate::devices::virtio::balloon::test_utils::{ check_request_completion, invoke_handler_for_queue_event, set_request, }; use crate::devices::virtio::queue::{VIRTQ_DESC_F_NEXT, VIRTQ_DESC_F_WRITE}; + use crate::devices::virtio::test_utils::test::{ + VirtioTestDevice, VirtioTestHelper, create_virtio_mem, + }; use crate::devices::virtio::test_utils::{VirtQueue, default_interrupt, default_mem}; use crate::test_utils::single_region_mem; + use crate::utils::align_up; use crate::vstate::memory::GuestAddress; + impl VirtioTestDevice for Balloon { + fn set_queues(&mut self, queues: Vec) { + self.queues = queues; + } + + fn num_queues(&self) -> usize { + let mut idx = STATS_INDEX; + + if self.stats_polling_interval_s > 0 { + idx += 1; + } + + if self.free_page_hinting() { + idx += 1; + } + + if self.free_page_reporting() { + idx += 1; + } + + idx + } + } + impl Balloon { pub(crate) fn set_queue(&mut self, idx: usize, q: Queue) { self.queues[idx] = q; @@ -1042,31 +1081,38 @@ pub(crate) mod tests { #[test] fn test_virtio_features() { // Test all feature combinations. - for deflate_on_oom in [true, false].iter() { - for stats_interval in [0, 1].iter() { - let mut balloon = - Balloon::new(0, *deflate_on_oom, *stats_interval, false, false).unwrap(); - assert_eq!(balloon.device_type(), VIRTIO_ID_BALLOON); - - let features: u64 = (1u64 << VIRTIO_F_VERSION_1) - | (u64::from(*deflate_on_oom) << VIRTIO_BALLOON_F_DEFLATE_ON_OOM) - | ((u64::from(*stats_interval)) << VIRTIO_BALLOON_F_STATS_VQ); - - assert_eq!( - balloon.avail_features_by_page(0), - (features & 0xFFFFFFFF) as u32 - ); - assert_eq!(balloon.avail_features_by_page(1), (features >> 32) as u32); - for i in 2..10 { - assert_eq!(balloon.avail_features_by_page(i), 0u32); - } + let combinations = iproduct!( + &[true, false], // Reporitng + &[true, false], // Hinting + &[true, false], // Deflate + &[0, 1] // Interval + ); - for i in 0..10 { - balloon.ack_features_by_page(i, u32::MAX); - } - // Only present features should be acknowledged. - assert_eq!(balloon.acked_features, features); + for (reporting, hinting, deflate_on_oom, stats_interval) in combinations { + let mut balloon = + Balloon::new(0, *deflate_on_oom, *stats_interval, *hinting, *reporting).unwrap(); + assert_eq!(balloon.device_type(), VIRTIO_ID_BALLOON); + + let features: u64 = (1u64 << VIRTIO_F_VERSION_1) + | (u64::from(*deflate_on_oom) << VIRTIO_BALLOON_F_DEFLATE_ON_OOM) + | ((u64::from(*reporting)) << VIRTIO_BALLOON_F_FREE_PAGE_REPORTING) + | ((u64::from(*hinting)) << VIRTIO_BALLOON_F_FREE_PAGE_HINTING) + | ((u64::from(*stats_interval)) << VIRTIO_BALLOON_F_STATS_VQ); + + assert_eq!( + balloon.avail_features_by_page(0), + (features & 0xFFFFFFFF) as u32 + ); + assert_eq!(balloon.avail_features_by_page(1), (features >> 32) as u32); + for i in 2..10 { + assert_eq!(balloon.avail_features_by_page(i), 0u32); + } + + for i in 0..10 { + balloon.ack_features_by_page(i, u32::MAX); } + // Only present features should be acknowledged. + assert_eq!(balloon.acked_features, features); } } @@ -1137,6 +1183,57 @@ pub(crate) mod tests { assert_eq!(actual_config_space, expected_config_space); } + #[test] + fn test_free_page_hinting_config() { + let mut balloon = Balloon::new(0, true, 0, true, false).unwrap(); + let mem = default_mem(); + let interrupt = default_interrupt(); + let infq = VirtQueue::new(GuestAddress(0), &mem, 16); + balloon.set_queue(INFLATE_INDEX, infq.create_queue()); + balloon.set_queue(DEFLATE_INDEX, infq.create_queue()); + balloon.set_queue(balloon.free_page_hinting_idx(), infq.create_queue()); + balloon.activate(mem.clone(), interrupt).unwrap(); + + let expected_config_space: [u8; BALLOON_CONFIG_SPACE_SIZE] = [ + 0x00, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + ]; + balloon.write_config(0, &expected_config_space); + + let mut actual_config_space = [0u8; BALLOON_CONFIG_SPACE_SIZE]; + balloon.read_config(0, &mut actual_config_space); + assert_eq!(actual_config_space, expected_config_space); + + // We expect the cmd_id to be set to 2 now + balloon.start_hinting(Default::default()).unwrap(); + + let expected_config_space: [u8; BALLOON_CONFIG_SPACE_SIZE] = [ + 0x00, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + ]; + let mut actual_config_space = [0u8; BALLOON_CONFIG_SPACE_SIZE]; + balloon.read_config(0, &mut actual_config_space); + assert_eq!(actual_config_space, expected_config_space); + + // We expect the cmd_id to be set to 1 + balloon.stop_hinting().unwrap(); + + let expected_config_space: [u8; BALLOON_CONFIG_SPACE_SIZE] = [ + 0x00, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + ]; + let mut actual_config_space = [0u8; BALLOON_CONFIG_SPACE_SIZE]; + balloon.read_config(0, &mut actual_config_space); + assert_eq!(actual_config_space, expected_config_space); + + // We expect the cmd_id to be bumped up to 3 now + balloon.start_hinting(Default::default()).unwrap(); + + let expected_config_space: [u8; BALLOON_CONFIG_SPACE_SIZE] = [ + 0x00, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + ]; + let mut actual_config_space = [0u8; BALLOON_CONFIG_SPACE_SIZE]; + balloon.read_config(0, &mut actual_config_space); + assert_eq!(actual_config_space, expected_config_space); + } + #[test] fn test_invalid_request() { let mut balloon = Balloon::new(0, true, 0, false, false).unwrap(); @@ -1412,16 +1509,252 @@ pub(crate) mod tests { } } + #[test] + fn test_process_reporting() { + let mem = create_virtio_mem(); + let mut th = + VirtioTestHelper::::new(&mem, Balloon::new(0, true, 0, false, true).unwrap()); + + th.activate_device(&mem); + + let page_size = host_page_size() as u64; + + // This has to be u32 for the scatter gather + #[allow(clippy::cast_possible_truncation)] + let page_size_chain = page_size as u32; + let reporting_idx = th.device().free_page_reporting_idx(); + + let safe_addr = align_up(th.data_address(), page_size); + + th.add_scatter_gather(reporting_idx, 0, &[(0, safe_addr, page_size_chain, 0)]); + check_metric_after_block!( + METRICS.free_page_report_freed, + page_size, + invoke_handler_for_queue_event(&mut th.device(), reporting_idx) + ); + + // Test with multiple items + th.add_scatter_gather( + reporting_idx, + 0, + &[ + (0, safe_addr, page_size_chain, 0), + (1, safe_addr + page_size, page_size_chain, 0), + (2, safe_addr + (page_size * 2), page_size_chain, 0), + ], + ); + + check_metric_after_block!( + METRICS.free_page_report_freed, + page_size * 3, + invoke_handler_for_queue_event(&mut th.device(), reporting_idx) + ); + + // Test with unaligned length + th.add_scatter_gather(reporting_idx, 0, &[(1, safe_addr + 1, page_size_chain, 0)]); + + check_metric_after_block!( + METRICS.free_page_report_fails, + 1, + invoke_handler_for_queue_event(&mut th.device(), reporting_idx) + ); + } + + #[test] + fn test_process_hinting() { + let mem = create_virtio_mem(); + let mut th = + VirtioTestHelper::::new(&mem, Balloon::new(0, true, 0, true, false).unwrap()); + + th.activate_device(&mem); + + let page_size = host_page_size() as u64; + let hinting_idx = th.device().free_page_hinting_idx(); + // This has to be u32 for the scatter gather + #[allow(clippy::cast_possible_truncation)] + let page_size_chain = page_size as u32; + let safe_addr = align_up(th.data_address(), page_size); + + macro_rules! start_hinting { + ($info:expr) => { + th.device().start_hinting($info).unwrap(); + assert!( + th.device() + .interrupt_trigger() + .has_pending_interrupt(VirtioInterruptType::Config) + ); + th.device() + .interrupt_trigger() + .ack_interrupt(VirtioInterruptType::Config); + }; + () => { + start_hinting!(Default::default()) + }; + } + + macro_rules! send_stop { + ($cmd: expr) => { + mem.write_obj($cmd, GuestAddress::new(safe_addr)).unwrap(); + th.add_scatter_gather( + hinting_idx, + 0, + &[ + (0, safe_addr, 4, VIRTQ_DESC_F_WRITE), + ( + 1, + safe_addr + page_size, + page_size_chain, + VIRTQ_DESC_F_WRITE, + ), + ], + ); + check_metric_after_block!( + METRICS.free_page_hint_freed, + 0, + th.device().process_free_page_hinting_queue() + ); + th.device() + .interrupt_trigger() + .ack_interrupt(VirtioInterruptType::Queue(hinting_idx.try_into().unwrap())); + th.device() + .interrupt_trigger() + .ack_interrupt(VirtioInterruptType::Config); + }; + () => { + send_stop!(FREE_PAGE_HINT_STOP) + }; + } + + macro_rules! test_hinting { + ($cmd:expr, $expected:expr) => { + mem.write_obj($cmd as u32, GuestAddress::new(safe_addr)) + .unwrap(); + th.add_scatter_gather( + hinting_idx, + 0, + &[ + (0, safe_addr, 4, VIRTQ_DESC_F_WRITE), + ( + 1, + safe_addr + page_size, + page_size_chain, + VIRTQ_DESC_F_WRITE, + ), + ], + ); + + check_metric_after_block!( + METRICS.free_page_hint_freed, + $expected, + invoke_handler_for_queue_event(&mut th.device(), hinting_idx) + ); + }; + + ($expected:expr) => { + th.add_scatter_gather( + hinting_idx, + 0, + &[( + 0, + safe_addr + page_size, + page_size_chain, + VIRTQ_DESC_F_WRITE, + )], + ); + + check_metric_after_block!( + METRICS.free_page_hint_freed, + $expected, + invoke_handler_for_queue_event(&mut th.device(), hinting_idx) + ); + }; + } + + // Report a page before a cmd_id has even been negotiated + test_hinting!(2, 0); + + // Test the good case + start_hinting!(); + + let mut host_cmd = th.device().get_hinting_status().unwrap().host_cmd; + + // Ack the start of the hinting run and send a single page + test_hinting!(host_cmd, page_size); + + // Report pages for an invalid cmd + test_hinting!(host_cmd + 1, 0); + + // If correct cmd is again used continue again + test_hinting!(host_cmd, page_size); + + // Trigger another hinting run this will bump the cmd id + // so we should ignore any inflight requests + start_hinting!(); + test_hinting!(0); + + // Update to our new host cmd and check this now works + host_cmd = th.device().get_hinting_status().unwrap().host_cmd; + test_hinting!(host_cmd, page_size); + test_hinting!(page_size); + + // Simulate the driver finishing a run. Any reported values after + // should be ignored + send_stop!(); + // Test we handle invalid cmd from driver + send_stop!(FREE_PAGE_HINT_DONE); + test_hinting!(0); + + // As we had auto ack on finish the host cmd should be set to done + host_cmd = th.device().get_hinting_status().unwrap().host_cmd; + assert_eq!(host_cmd, FREE_PAGE_HINT_DONE); + + // Test no ack on stop behaviour + start_hinting!(StartHintingCmd { + acknowledge_on_stop: false, + }); + + host_cmd = th.device().get_hinting_status().unwrap().host_cmd; + test_hinting!(host_cmd, page_size); + test_hinting!(page_size); + + send_stop!(); + let new_host_cmd = th.device().get_hinting_status().unwrap().host_cmd; + assert_eq!(host_cmd, new_host_cmd); + + // Test misaligned writes report as an error + start_hinting!(Default::default()); + + host_cmd = th.device().get_hinting_status().unwrap().host_cmd; + test_hinting!(host_cmd, page_size); + test_hinting!(page_size); + + th.add_scatter_gather( + hinting_idx, + 0, + &[(0, safe_addr + page_size + 1, page_size_chain, 0)], + ); + + check_metric_after_block!( + METRICS.free_page_hint_fails, + 1, + th.device().process_free_page_hinting_queue().unwrap() + ); + } + #[test] fn test_process_balloon_queues() { - let mut balloon = Balloon::new(0x10, true, 0, false, false).unwrap(); + let mut balloon = Balloon::new(0x10, true, 0, true, true).unwrap(); let mem = default_mem(); let interrupt = default_interrupt(); let infq = VirtQueue::new(GuestAddress(0), &mem, 16); let defq = VirtQueue::new(GuestAddress(0), &mem, 16); + let hintq = VirtQueue::new(GuestAddress(0), &mem, 16); + let reportq = VirtQueue::new(GuestAddress(0), &mem, 16); balloon.set_queue(INFLATE_INDEX, infq.create_queue()); balloon.set_queue(DEFLATE_INDEX, defq.create_queue()); + balloon.set_queue(balloon.free_page_hinting_idx(), hintq.create_queue()); + balloon.set_queue(balloon.free_page_reporting_idx(), reportq.create_queue()); balloon.activate(mem, interrupt).unwrap(); balloon.process_virtio_queues().unwrap(); @@ -1463,6 +1796,9 @@ pub(crate) mod tests { let mut balloon = Balloon::new(0, true, 0, false, false).unwrap(); // Assert that we can't update an inactive device. balloon.update_size(1).unwrap_err(); + balloon.start_hinting(Default::default()).unwrap_err(); + balloon.get_hinting_status().unwrap_err(); + balloon.stop_hinting().unwrap_err(); } #[test] diff --git a/src/vmm/src/devices/virtio/balloon/test_utils.rs b/src/vmm/src/devices/virtio/balloon/test_utils.rs index 2665d5dbd87..d4a0f629d65 100644 --- a/src/vmm/src/devices/virtio/balloon/test_utils.rs +++ b/src/vmm/src/devices/virtio/balloon/test_utils.rs @@ -14,13 +14,22 @@ pub fn invoke_handler_for_queue_event(b: &mut Balloon, queue_index: usize) { use crate::devices::virtio::balloon::{DEFLATE_INDEX, INFLATE_INDEX, STATS_INDEX}; use crate::devices::virtio::transport::VirtioInterruptType; + let hinting_idx = b.free_page_hinting_idx(); + let reporting_idx = b.free_page_reporting_idx(); + assert!(queue_index < BALLOON_NUM_QUEUES); // Trigger the queue event. b.queue_evts[queue_index].write(1).unwrap(); // Handle event. + // Reporting -> hinting -> stats ordering is important as they will change + // depending on enabled features match queue_index { INFLATE_INDEX => b.process_inflate_queue_event().unwrap(), DEFLATE_INDEX => b.process_deflate_queue_event().unwrap(), + reporting_idx if b.free_page_reporting() => { + b.process_free_page_reporting_queue_event().unwrap() + } + hinting_idx if b.free_page_hinting() => b.process_free_page_hinting_queue_event().unwrap(), STATS_INDEX => b.process_stats_queue_event().unwrap(), _ => unreachable!(), }; @@ -30,6 +39,8 @@ pub fn invoke_handler_for_queue_event(b: &mut Balloon, queue_index: usize) { interrupt .has_pending_interrupt(VirtioInterruptType::Queue(queue_index.try_into().unwrap())) ); + + interrupt.ack_interrupt(VirtioInterruptType::Queue(queue_index.try_into().unwrap())); } pub fn set_request(queue: &VirtQueue, idx: u16, addr: u64, len: u32, flags: u16) { diff --git a/src/vmm/src/devices/virtio/rng/device.rs b/src/vmm/src/devices/virtio/rng/device.rs index 1f2ce079aed..2f9efd80909 100644 --- a/src/vmm/src/devices/virtio/rng/device.rs +++ b/src/vmm/src/devices/virtio/rng/device.rs @@ -339,7 +339,7 @@ mod tests { self.queues = queues; } - fn num_queues() -> usize { + fn num_queues(&self) -> usize { RNG_NUM_QUEUES } } diff --git a/src/vmm/src/devices/virtio/test_utils.rs b/src/vmm/src/devices/virtio/test_utils.rs index 6f1489dd380..0922cfe32f2 100644 --- a/src/vmm/src/devices/virtio/test_utils.rs +++ b/src/vmm/src/devices/virtio/test_utils.rs @@ -21,7 +21,7 @@ macro_rules! check_metric_after_block { ($metric:expr, $delta:expr, $block:expr) => {{ let before = $metric.count(); let _ = $block; - assert_eq!($metric.count(), before + $delta, "unexpected metric value"); + assert_eq!($metric.count() - before, $delta, "unexpected metric value"); }}; } @@ -345,7 +345,7 @@ pub(crate) mod test { /// Replace the queues used by the device fn set_queues(&mut self, queues: Vec); /// Number of queues this device supports - fn num_queues() -> usize; + fn num_queues(&self) -> usize; } /// A helper type to allow testing VirtIO devices @@ -401,7 +401,7 @@ pub(crate) mod test { pub fn new(mem: &'a GuestMemoryMmap, mut device: T) -> VirtioTestHelper<'a, T> { let mut event_manager = EventManager::new().unwrap(); - let virtqueues = Self::create_virtqueues(mem, T::num_queues()); + let virtqueues = Self::create_virtqueues(mem, device.num_queues()); let queues = virtqueues.iter().map(|vq| vq.create_queue()).collect(); device.set_queues(queues); let device = Arc::new(Mutex::new(device)); @@ -442,6 +442,52 @@ pub(crate) mod test { self.virtqueues.last().unwrap().end().raw_value() } + /// Add a new Descriptor in one of the device's queues in the form of scatter gather + /// + /// This function adds in one of the queues of the device a DescriptorChain at some offset + /// in the "data range" of the guest memory. The number of descriptors to create is passed + /// as a list of descriptors (a tuple of (index, addr, length, flags)). + /// + /// The total size of the buffer is the sum of all lengths of this list of descriptors. + /// The fist descriptor will be stored at `self.data_address() + addr_offset`. Subsequent + /// descriptors will be placed at random addresses after that. + /// + /// # Arguments + /// + /// * `queue` - The index of the device queue to use + /// * `addr_offset` - Offset within the data region where to put the first descriptor + /// * `desc_list` - List of descriptors to create in the chain + pub fn add_scatter_gather( + &mut self, + queue: usize, + addr_offset: u64, + desc_list: &[(u16, u64, u32, u16)], + ) { + let device = self.device.lock().unwrap(); + + let event_fd = &device.queue_events()[queue]; + let vq = &self.virtqueues[queue]; + + // Create the descriptor chain + let mut iter = desc_list.iter().peekable(); + while let Some(&(index, addr, len, flags)) = iter.next() { + let desc = &vq.dtable[index as usize]; + desc.set(addr, len, flags, 0); + if let Some(&&(next_index, _, _, _)) = iter.peek() { + desc.flags.set(flags | VIRTQ_DESC_F_NEXT); + desc.next.set(next_index); + } + } + + // Mark the chain as available. + if let Some(&(index, _, _, _)) = desc_list.first() { + let ring_index = vq.avail.idx.get(); + vq.avail.ring[ring_index as usize].set(index); + vq.avail.idx.set(ring_index + 1); + } + event_fd.write(1).unwrap(); + } + /// Add a new Descriptor in one of the device's queues /// /// This function adds in one of the queues of the device a DescriptorChain at some offset diff --git a/src/vmm/src/devices/virtio/transport/mmio.rs b/src/vmm/src/devices/virtio/transport/mmio.rs index f5039281f16..d98dd4ce365 100644 --- a/src/vmm/src/devices/virtio/transport/mmio.rs +++ b/src/vmm/src/devices/virtio/transport/mmio.rs @@ -439,9 +439,17 @@ impl VirtioInterrupt for IrqTrigger { (VIRTIO_MMIO_INT_CONFIG, IrqType::Config) | (VIRTIO_MMIO_INT_VRING, IrqType::Vring) ); } - false } + + #[cfg(test)] + fn ack_interrupt(&self, interrupt_type: VirtioInterruptType) { + let irq = match interrupt_type { + VirtioInterruptType::Config => VIRTIO_MMIO_INT_CONFIG, + VirtioInterruptType::Queue(_) => VIRTIO_MMIO_INT_VRING, + }; + self.irq_status.fetch_and(!irq, Ordering::SeqCst); + } } impl IrqTrigger { diff --git a/src/vmm/src/devices/virtio/transport/mod.rs b/src/vmm/src/devices/virtio/transport/mod.rs index 41d0730dfe0..2a87a4baa18 100644 --- a/src/vmm/src/devices/virtio/transport/mod.rs +++ b/src/vmm/src/devices/virtio/transport/mod.rs @@ -50,4 +50,8 @@ pub trait VirtioInterrupt: std::fmt::Debug + Send + Sync { /// Returns true if there is any pending interrupt #[cfg(test)] fn has_pending_interrupt(&self, interrupt_type: VirtioInterruptType) -> bool; + + /// Used to acknowledge an interrupt + #[cfg(test)] + fn ack_interrupt(&self, interrupt_type: VirtioInterruptType); } diff --git a/src/vmm/src/devices/virtio/transport/pci/device.rs b/src/vmm/src/devices/virtio/transport/pci/device.rs index 4b48562ea25..f0cc8bdefc7 100644 --- a/src/vmm/src/devices/virtio/transport/pci/device.rs +++ b/src/vmm/src/devices/virtio/transport/pci/device.rs @@ -725,6 +725,11 @@ impl VirtioInterrupt for VirtioInterruptMsix { fn has_pending_interrupt(&self, interrupt_type: VirtioInterruptType) -> bool { false } + + #[cfg(test)] + fn ack_interrupt(&self, interrupt_type: VirtioInterruptType) { + // Do nothing here + } } impl PciDevice for VirtioPciDevice { From 8dcf4fdbeb6fdf8a788d9e5019df3480411e7c68 Mon Sep 17 00:00:00 2001 From: Jack Thomson Date: Fri, 24 Oct 2025 11:31:05 +0000 Subject: [PATCH 8/8] virtio-balloon: Drop huge page restriction While the traditional balloon device would not be able to reclaim memory when back by huge pages, it could still technically be used to to restrict memory usage in the guest. With the addition of hinting and reporting, they report ranges in bigger sizes (4mb by default). Because of this, it is possible for the host reclaim huge pages backing the guest. Updates the performance tests for the balloon when back by huge pages, added varients to the size reduction tests to ensure hinting and reporting can reduce the RSS of the guest. Move the inflation test to performance to ensure it runs sequentially in CI otherwise the host can be exhausted of huge pages. Signed-off-by: Jack Thomson --- src/vmm/src/resources.rs | 41 +--- src/vmm/src/vmm_config/balloon.rs | 2 - src/vmm/src/vmm_config/machine_config.rs | 2 - tests/conftest.py | 11 +- tests/framework/microvm.py | 10 +- tests/framework/utils.py | 49 +++++ .../functional/test_balloon.py | 187 ++++-------------- .../performance/test_balloon.py | 116 ++++++++++- .../performance/test_huge_pages.py | 23 --- 9 files changed, 217 insertions(+), 224 deletions(-) diff --git a/src/vmm/src/resources.rs b/src/vmm/src/resources.rs index 46348201c94..08fdcc100bc 100644 --- a/src/vmm/src/resources.rs +++ b/src/vmm/src/resources.rs @@ -23,9 +23,7 @@ use crate::vmm_config::boot_source::{ use crate::vmm_config::drive::*; use crate::vmm_config::entropy::*; use crate::vmm_config::instance_info::InstanceInfo; -use crate::vmm_config::machine_config::{ - HugePageConfig, MachineConfig, MachineConfigError, MachineConfigUpdate, -}; +use crate::vmm_config::machine_config::{MachineConfig, MachineConfigError, MachineConfigUpdate}; use crate::vmm_config::metrics::{MetricsConfig, MetricsConfigError, init_metrics}; use crate::vmm_config::mmds::{MmdsConfig, MmdsConfigError}; use crate::vmm_config::net::*; @@ -245,10 +243,6 @@ impl VmResources { } SharedDeviceType::Balloon(balloon) => { self.balloon.set_device(balloon); - - if self.machine_config.huge_pages != HugePageConfig::None { - return Err(ResourcesError::BalloonDevice(BalloonConfigError::HugePages)); - } } SharedDeviceType::Vsock(vsock) => { self.vsock.set_device(vsock); @@ -290,9 +284,6 @@ impl VmResources { return Err(MachineConfigError::IncompatibleBalloonSize); } - if self.balloon.get().is_some() && updated.huge_pages != HugePageConfig::None { - return Err(MachineConfigError::BalloonAndHugePages); - } self.machine_config = updated; Ok(()) @@ -349,10 +340,6 @@ impl VmResources { return Err(BalloonConfigError::TooManyPagesRequested); } - if self.machine_config.huge_pages != HugePageConfig::None { - return Err(BalloonConfigError::HugePages); - } - self.balloon.set(config) } @@ -563,7 +550,6 @@ mod tests { use crate::HTTP_MAX_PAYLOAD_SIZE; use crate::cpu_config::templates::test_utils::TEST_TEMPLATE_JSON; use crate::cpu_config::templates::{CpuTemplateType, StaticCpuTemplate}; - use crate::devices::virtio::balloon::Balloon; use crate::devices::virtio::block::virtio::VirtioBlockError; use crate::devices::virtio::block::{BlockError, CacheType}; use crate::devices::virtio::vsock::VSOCK_DEV_ID; @@ -1543,31 +1529,6 @@ mod tests { .unwrap_err(); } - #[test] - fn test_negative_restore_balloon_device_with_huge_pages() { - let mut vm_resources = default_vm_resources(); - vm_resources.balloon = BalloonBuilder::new(); - vm_resources - .update_machine_config(&MachineConfigUpdate { - huge_pages: Some(HugePageConfig::Hugetlbfs2M), - ..Default::default() - }) - .unwrap(); - let err = vm_resources - .update_from_restored_device(SharedDeviceType::Balloon(Arc::new(Mutex::new( - Balloon::new(128, false, 0, false, false).unwrap(), - )))) - .unwrap_err(); - assert!( - matches!( - err, - ResourcesError::BalloonDevice(BalloonConfigError::HugePages) - ), - "{:?}", - err - ); - } - #[test] fn test_set_entropy_device() { let mut vm_resources = default_vm_resources(); diff --git a/src/vmm/src/vmm_config/balloon.rs b/src/vmm/src/vmm_config/balloon.rs index 64003bdf9fd..7151a68e99f 100644 --- a/src/vmm/src/vmm_config/balloon.rs +++ b/src/vmm/src/vmm_config/balloon.rs @@ -20,8 +20,6 @@ pub enum BalloonConfigError { TooManyPagesRequested, /// Error creating the balloon device: {0} CreateFailure(crate::devices::virtio::balloon::BalloonError), - /// Firecracker's huge pages support is incompatible with memory ballooning. - HugePages, } /// This struct represents the strongly typed equivalent of the json body diff --git a/src/vmm/src/vmm_config/machine_config.rs b/src/vmm/src/vmm_config/machine_config.rs index cfe7105fdf8..e337a5a9dcd 100644 --- a/src/vmm/src/vmm_config/machine_config.rs +++ b/src/vmm/src/vmm_config/machine_config.rs @@ -29,8 +29,6 @@ pub enum MachineConfigError { SmtNotSupported, /// Could not determine host kernel version when checking hugetlbfs compatibility KernelVersion, - /// Firecracker's huge pages support is incompatible with memory ballooning. - BalloonAndHugePages, } /// Describes the possible (huge)page configurations for a microVM's memory. diff --git a/tests/conftest.py b/tests/conftest.py index fabff84a0d8..c54924b0864 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -34,7 +34,7 @@ from framework import defs, utils from framework.artifacts import disks, kernel_params from framework.defs import DEFAULT_BINARY_DIR -from framework.microvm import MicroVMFactory, SnapshotType +from framework.microvm import HugePagesConfig, MicroVMFactory, SnapshotType from framework.properties import global_props from framework.utils_cpu_templates import ( custom_cpu_templates_params, @@ -594,6 +594,15 @@ def pci_enabled(request): yield request.param +@pytest.fixture( + params=[HugePagesConfig.NONE, HugePagesConfig.HUGETLBFS_2MB], + ids=["NO_HUGE_PAGES", "2M_HUGE_PAGES"], +) +def huge_pages(request): + """Fixture that allows configuring whether a microVM will have huge pages enabled or not""" + yield request.param + + def uvm_booted( microvm_factory, guest_kernel, diff --git a/tests/framework/microvm.py b/tests/framework/microvm.py index 74ae180950c..810cb412fa0 100644 --- a/tests/framework/microvm.py +++ b/tests/framework/microvm.py @@ -23,10 +23,11 @@ from collections import namedtuple from dataclasses import dataclass from enum import Enum, auto -from functools import lru_cache +from functools import cached_property, lru_cache from pathlib import Path from typing import Optional +import psutil from tenacity import Retrying, retry, stop_after_attempt, wait_fixed import host_tools.cargo_build as build_tools @@ -472,7 +473,7 @@ def state(self): """Get the InstanceInfo property and return the state field.""" return self.api.describe.get().json()["state"] - @property + @cached_property def firecracker_pid(self): """Return Firecracker's PID @@ -491,6 +492,11 @@ def firecracker_pid(self): with attempt: return int(self.jailer.pid_file.read_text(encoding="ascii")) + @cached_property + def ps(self): + """Returns a handle to the psutil.Process for this VM""" + return psutil.Process(self.firecracker_pid) + @property def dimensions(self): """Gets a default set of cloudwatch dimensions describing the configuration of this microvm""" diff --git a/tests/framework/utils.py b/tests/framework/utils.py index 64bc9526e5c..a70eff5ad56 100644 --- a/tests/framework/utils.py +++ b/tests/framework/utils.py @@ -14,10 +14,12 @@ import typing from collections import defaultdict, namedtuple from contextlib import contextmanager +from pathlib import Path from typing import Dict import psutil import semver +from packaging import version from tenacity import ( Retrying, retry, @@ -259,6 +261,48 @@ def get_free_mem_ssh(ssh_connection): raise Exception("Available memory not found in `/proc/meminfo") +def get_stable_rss_mem_by_pid(process, percentage_delta=1): + """ + Get the RSS memory that a guest uses, given the pid of the guest. + + Wait till the fluctuations in RSS drop below percentage_delta. + Or print a warning if this does not happen. + """ + + # All values are reported as KiB + + def get_rss_from_pmap(): + """Returns current memory utilization in KiB, including used HugeTLBFS""" + + proc_status = Path("/proc", str(process.pid), "status").read_text("utf-8") + for line in proc_status.splitlines(): + if line.startswith("HugetlbPages:"): # entry is in KiB + hugetlbfs_usage = int(line.split()[1]) + break + else: + assert False, f"HugetlbPages not found in {str(proc_status)}" + return hugetlbfs_usage + process.memory_info().rss // 1024 + + first_rss = 0 + second_rss = 0 + for _ in range(5): + first_rss = get_rss_from_pmap() + time.sleep(1) + second_rss = get_rss_from_pmap() + abs_diff = abs(first_rss - second_rss) + abs_delta = abs_diff / first_rss * 100 + print( + f"RSS readings: old: {first_rss} new: {second_rss} abs_diff: {abs_diff} abs_delta: {abs_delta}" + ) + if abs_delta < percentage_delta: + return second_rss + + time.sleep(1) + + print("WARNING: RSS readings did not stabilize") + return second_rss + + def _format_output_message(proc, stdout, stderr): output_message = f"\n[{proc.pid}] Command:\n{proc.args}" # Append stdout/stderr to the output message @@ -417,6 +461,11 @@ def get_kernel_version(level=2): return linux_version +def supports_hugetlbfs_discard(): + """Returns True if the kernel supports hugetlbfs discard""" + return version.parse(get_kernel_version()) >= version.parse("5.18.0") + + def generate_mmds_session_token( ssh_connection, ipv4_address, token_ttl, imds_compat=False ): diff --git a/tests/integration_tests/functional/test_balloon.py b/tests/integration_tests/functional/test_balloon.py index 7dbf86e3c0a..523abff624d 100644 --- a/tests/integration_tests/functional/test_balloon.py +++ b/tests/integration_tests/functional/test_balloon.py @@ -10,45 +10,11 @@ import pytest import requests -from framework.utils import check_output, get_free_mem_ssh +from framework.utils import get_free_mem_ssh, get_stable_rss_mem_by_pid STATS_POLLING_INTERVAL_S = 1 -def get_stable_rss_mem_by_pid(pid, percentage_delta=1): - """ - Get the RSS memory that a guest uses, given the pid of the guest. - - Wait till the fluctuations in RSS drop below percentage_delta. - Or print a warning if this does not happen. - """ - - # All values are reported as KiB - - def get_rss_from_pmap(): - _, output, _ = check_output("pmap -X {}".format(pid)) - return int(output.split("\n")[-2].split()[1], 10) - - first_rss = 0 - second_rss = 0 - for _ in range(5): - first_rss = get_rss_from_pmap() - time.sleep(1) - second_rss = get_rss_from_pmap() - abs_diff = abs(first_rss - second_rss) - abs_delta = abs_diff / first_rss * 100 - print( - f"RSS readings: old: {first_rss} new: {second_rss} abs_diff: {abs_diff} abs_delta: {abs_delta}" - ) - if abs_delta < percentage_delta: - return second_rss - - time.sleep(1) - - print("WARNING: RSS readings did not stabilize") - return second_rss - - def lower_ssh_oom_chance(ssh_connection): """Lure OOM away from ssh process""" logger = logging.getLogger("lower_ssh_oom_chance") @@ -88,25 +54,25 @@ def make_guest_dirty_memory(ssh_connection, amount_mib=32): def _test_rss_memory_lower(test_microvm): """Check inflating the balloon makes guest use less rss memory.""" # Get the firecracker pid, and open an ssh connection. - firecracker_pid = test_microvm.firecracker_pid + firecracker_ps = test_microvm.ps ssh_connection = test_microvm.ssh # Using deflate_on_oom, get the RSS as low as possible test_microvm.api.balloon.patch(amount_mib=200) # Get initial rss consumption. - init_rss = get_stable_rss_mem_by_pid(firecracker_pid) + init_rss = get_stable_rss_mem_by_pid(firecracker_ps) # Get the balloon back to 0. test_microvm.api.balloon.patch(amount_mib=0) # This call will internally wait for rss to become stable. - _ = get_stable_rss_mem_by_pid(firecracker_pid) + _ = get_stable_rss_mem_by_pid(firecracker_ps) # Dirty memory, then inflate balloon and get ballooned rss consumption. make_guest_dirty_memory(ssh_connection, amount_mib=32) test_microvm.api.balloon.patch(amount_mib=200) - balloon_rss = get_stable_rss_mem_by_pid(firecracker_pid) + balloon_rss = get_stable_rss_mem_by_pid(firecracker_ps) # Check that the ballooning reclaimed the memory. assert balloon_rss - init_rss <= 15000 @@ -150,7 +116,7 @@ def test_inflate_reduces_free(uvm_plain_any): # Start the microvm test_microvm.start() - firecracker_pid = test_microvm.firecracker_pid + firecracker_ps = test_microvm.ps # Get the free memory before ballooning. available_mem_deflated = get_free_mem_ssh(test_microvm.ssh) @@ -158,7 +124,7 @@ def test_inflate_reduces_free(uvm_plain_any): # Inflate 64 MB == 16384 page balloon. test_microvm.api.balloon.patch(amount_mib=64) # This call will internally wait for rss to become stable. - _ = get_stable_rss_mem_by_pid(firecracker_pid) + _ = get_stable_rss_mem_by_pid(firecracker_ps) # Get the free memory after ballooning. available_mem_inflated = get_free_mem_ssh(test_microvm.ssh) @@ -196,19 +162,19 @@ def test_deflate_on_oom(uvm_plain_any, deflate_on_oom): # Start the microvm. test_microvm.start() - firecracker_pid = test_microvm.firecracker_pid + firecracker_ps = test_microvm.ps # We get an initial reading of the RSS, then calculate the amount # we need to inflate the balloon with by subtracting it from the # VM size and adding an offset of 50 MiB in order to make sure we # get a lower reading than the initial one. - initial_rss = get_stable_rss_mem_by_pid(firecracker_pid) + initial_rss = get_stable_rss_mem_by_pid(firecracker_ps) inflate_size = 256 - (int(initial_rss / 1024) + 50) # Inflate the balloon test_microvm.api.balloon.patch(amount_mib=inflate_size) # This call will internally wait for rss to become stable. - _ = get_stable_rss_mem_by_pid(firecracker_pid) + _ = get_stable_rss_mem_by_pid(firecracker_ps) # Check that using memory leads to the balloon device automatically # deflate (or not). @@ -251,39 +217,39 @@ def test_reinflate_balloon(uvm_plain_any): # Start the microvm. test_microvm.start() - firecracker_pid = test_microvm.firecracker_pid + firecracker_ps = test_microvm.ps # First inflate the balloon to free up the uncertain amount of memory # used by the kernel at boot and establish a baseline, then give back # the memory. test_microvm.api.balloon.patch(amount_mib=200) # This call will internally wait for rss to become stable. - _ = get_stable_rss_mem_by_pid(firecracker_pid) + _ = get_stable_rss_mem_by_pid(firecracker_ps) test_microvm.api.balloon.patch(amount_mib=0) # This call will internally wait for rss to become stable. - _ = get_stable_rss_mem_by_pid(firecracker_pid) + _ = get_stable_rss_mem_by_pid(firecracker_ps) # Get the guest to dirty memory. make_guest_dirty_memory(test_microvm.ssh, amount_mib=32) - first_reading = get_stable_rss_mem_by_pid(firecracker_pid) + first_reading = get_stable_rss_mem_by_pid(firecracker_ps) # Now inflate the balloon. test_microvm.api.balloon.patch(amount_mib=200) - second_reading = get_stable_rss_mem_by_pid(firecracker_pid) + second_reading = get_stable_rss_mem_by_pid(firecracker_ps) # Now deflate the balloon. test_microvm.api.balloon.patch(amount_mib=0) # This call will internally wait for rss to become stable. - _ = get_stable_rss_mem_by_pid(firecracker_pid) + _ = get_stable_rss_mem_by_pid(firecracker_ps) # Now have the guest dirty memory again. make_guest_dirty_memory(test_microvm.ssh, amount_mib=32) - third_reading = get_stable_rss_mem_by_pid(firecracker_pid) + third_reading = get_stable_rss_mem_by_pid(firecracker_ps) # Now inflate the balloon again. test_microvm.api.balloon.patch(amount_mib=200) - fourth_reading = get_stable_rss_mem_by_pid(firecracker_pid) + fourth_reading = get_stable_rss_mem_by_pid(firecracker_ps) # Check that the memory used is the same after regardless of the previous # inflate history of the balloon (with the third reading being allowed @@ -293,79 +259,6 @@ def test_reinflate_balloon(uvm_plain_any): assert abs(second_reading - fourth_reading) <= 20000 -# pylint: disable=C0103 -@pytest.mark.parametrize("method", ["traditional", "hinting", "reporting"]) -def test_size_reduction(uvm_plain_any, method): - """ - Verify that ballooning reduces RSS usage on a newly booted guest. - """ - test_microvm = uvm_plain_any - test_microvm.spawn() - test_microvm.basic_config() - test_microvm.add_net_iface() - - traditional_balloon = method == "traditional" - free_page_reporting = method == "reporting" - free_page_hinting = method == "hinting" - - # Add a memory balloon. - test_microvm.api.balloon.put( - amount_mib=0, - deflate_on_oom=True, - stats_polling_interval_s=0, - free_page_reporting=free_page_reporting, - free_page_hinting=free_page_hinting, - ) - - # Start the microvm. - test_microvm.start() - firecracker_pid = test_microvm.firecracker_pid - - get_stable_rss_mem_by_pid(firecracker_pid) - - test_microvm.ssh.check_output( - "nohup /usr/local/bin/fast_page_fault_helper >/dev/null 2>&1 /proc/sys/vm/drop_caches") - time.sleep(2) - - # We take the initial reading of the RSS, then calculate the amount - # we need to inflate the balloon with by subtracting it from the - # VM size and adding an offset of 10 MiB in order to make sure we - # get a lower reading than the initial one. - inflate_size = 256 - int(first_reading / 1024) + 10 - - if traditional_balloon: - # Now inflate the balloon - test_microvm.api.balloon.patch(amount_mib=inflate_size) - elif free_page_hinting: - test_microvm.api.balloon_hinting_start.patch() - - _ = get_stable_rss_mem_by_pid(firecracker_pid) - - if traditional_balloon: - # Deflate the balloon completely. - test_microvm.api.balloon.patch(amount_mib=0) - - # Check memory usage again. - second_reading = get_stable_rss_mem_by_pid(firecracker_pid) - - # There should be a reduction of at least 10MB. - assert first_reading - second_reading >= 10000 - - # pylint: disable=C0103 def test_stats(uvm_plain_any): """ @@ -385,7 +278,7 @@ def test_stats(uvm_plain_any): # Start the microvm. test_microvm.start() - firecracker_pid = test_microvm.firecracker_pid + firecracker_ps = test_microvm.ps # Give Firecracker enough time to poll the stats at least once post-boot time.sleep(STATS_POLLING_INTERVAL_S * 2) @@ -403,7 +296,7 @@ def test_stats(uvm_plain_any): make_guest_dirty_memory(test_microvm.ssh, amount_mib=10) time.sleep(1) # This call will internally wait for rss to become stable. - _ = get_stable_rss_mem_by_pid(firecracker_pid) + _ = get_stable_rss_mem_by_pid(firecracker_ps) # Make sure that the stats catch the page faults. after_workload_stats = test_microvm.api.balloon_stats.get().json() @@ -412,7 +305,7 @@ def test_stats(uvm_plain_any): # Now inflate the balloon with 10MB of pages. test_microvm.api.balloon.patch(amount_mib=10) # This call will internally wait for rss to become stable. - _ = get_stable_rss_mem_by_pid(firecracker_pid) + _ = get_stable_rss_mem_by_pid(firecracker_ps) # Get another reading of the stats after the polling interval has passed. inflated_stats = test_microvm.api.balloon_stats.get().json() @@ -425,7 +318,7 @@ def test_stats(uvm_plain_any): # available memory. test_microvm.api.balloon.patch(amount_mib=0) # This call will internally wait for rss to become stable. - _ = get_stable_rss_mem_by_pid(firecracker_pid) + _ = get_stable_rss_mem_by_pid(firecracker_ps) # Get another reading of the stats after the polling interval has passed. deflated_stats = test_microvm.api.balloon_stats.get().json() @@ -453,13 +346,13 @@ def test_stats_update(uvm_plain_any): # Start the microvm. test_microvm.start() - firecracker_pid = test_microvm.firecracker_pid + firecracker_ps = test_microvm.ps # Dirty 30MB of pages. make_guest_dirty_memory(test_microvm.ssh, amount_mib=30) # This call will internally wait for rss to become stable. - _ = get_stable_rss_mem_by_pid(firecracker_pid) + _ = get_stable_rss_mem_by_pid(firecracker_ps) # Get an initial reading of the stats. initial_stats = test_microvm.api.balloon_stats.get().json() @@ -510,16 +403,16 @@ def test_balloon_snapshot(uvm_plain_any, microvm_factory): time.sleep(1) # Get the firecracker pid, and open an ssh connection. - firecracker_pid = vm.firecracker_pid + firecracker_ps = vm.ps # Check memory usage. - first_reading = get_stable_rss_mem_by_pid(firecracker_pid) + first_reading = get_stable_rss_mem_by_pid(firecracker_ps) # Now inflate the balloon with 20MB of pages. vm.api.balloon.patch(amount_mib=20) # Check memory usage again. - second_reading = get_stable_rss_mem_by_pid(firecracker_pid) + second_reading = get_stable_rss_mem_by_pid(firecracker_ps) # There should be a reduction in RSS, but it's inconsistent. # We only test that the reduction happens. @@ -529,27 +422,27 @@ def test_balloon_snapshot(uvm_plain_any, microvm_factory): microvm = microvm_factory.build_from_snapshot(snapshot) # Get the firecracker from snapshot pid, and open an ssh connection. - firecracker_pid = microvm.firecracker_pid + firecracker_ps = microvm.ps # Wait out the polling interval, then get the updated stats. time.sleep(STATS_POLLING_INTERVAL_S * 2) stats_after_snap = microvm.api.balloon_stats.get().json() # Check memory usage. - third_reading = get_stable_rss_mem_by_pid(firecracker_pid) + third_reading = get_stable_rss_mem_by_pid(firecracker_ps) # Dirty 60MB of pages. make_guest_dirty_memory(microvm.ssh, amount_mib=60) # Check memory usage. - fourth_reading = get_stable_rss_mem_by_pid(firecracker_pid) + fourth_reading = get_stable_rss_mem_by_pid(firecracker_ps) assert fourth_reading > third_reading # Inflate the balloon with another 20MB of pages. microvm.api.balloon.patch(amount_mib=40) - fifth_reading = get_stable_rss_mem_by_pid(firecracker_pid) + fifth_reading = get_stable_rss_mem_by_pid(firecracker_ps) # There should be a reduction in RSS, but it's inconsistent. # We only test that the reduction happens. @@ -600,10 +493,10 @@ def test_hinting_reporting_snapshot(uvm_plain_any, microvm_factory, method): time.sleep(1) # Get the firecracker pid, and open an ssh connection. - firecracker_pid = vm.firecracker_pid + firecracker_ps = vm.ps # Check memory usage. - first_reading = get_stable_rss_mem_by_pid(firecracker_pid) + first_reading = get_stable_rss_mem_by_pid(firecracker_ps) _, pid, _ = vm.ssh.check_output("pidof fast_page_fault_helper") # Kill the application which will free the held memory @@ -614,7 +507,7 @@ def test_hinting_reporting_snapshot(uvm_plain_any, microvm_factory, method): vm.api.balloon_hinting_start.patch() # Check memory usage again. - second_reading = get_stable_rss_mem_by_pid(firecracker_pid) + second_reading = get_stable_rss_mem_by_pid(firecracker_ps) # There should be a reduction in RSS, but it's inconsistent. # We only test that the reduction happens. @@ -623,7 +516,7 @@ def test_hinting_reporting_snapshot(uvm_plain_any, microvm_factory, method): snapshot = vm.snapshot_full() microvm = microvm_factory.build_from_snapshot(snapshot) - firecracker_pid = microvm.firecracker_pid + firecracker_ps = microvm.ps microvm.ssh.check_output( "nohup /usr/local/bin/fast_page_fault_helper >/dev/null 2>&1 0 and (i + 1 < runs): time.sleep(sleep_duration) + + +# pylint: disable=C0103 +@pytest.mark.parametrize("method", ["traditional", "hinting", "reporting"]) +def test_size_reduction(uvm_plain_any, method, huge_pages): + """ + Verify that ballooning reduces RSS usage on a newly booted guest. + """ + traditional_balloon = method == "traditional" + free_page_reporting = method == "reporting" + free_page_hinting = method == "hinting" + + if huge_pages != HugePagesConfig.NONE: + if not supports_hugetlbfs_discard(): + pytest.skip("Host does not support hugetlb discard") + + if traditional_balloon: + pytest.skip("Traditional balloon device won't reduce RSS") + + test_microvm = uvm_plain_any + test_microvm.spawn() + test_microvm.basic_config(huge_pages=huge_pages) + test_microvm.add_net_iface() + + # Add a memory balloon. + test_microvm.api.balloon.put( + amount_mib=0, + deflate_on_oom=True, + stats_polling_interval_s=0, + free_page_reporting=free_page_reporting, + free_page_hinting=free_page_hinting, + ) + + # Start the microvm. + test_microvm.start() + firecracker_ps = test_microvm.ps + + get_stable_rss_mem_by_pid(firecracker_ps) + + test_microvm.ssh.check_output( + "nohup /usr/local/bin/fast_page_fault_helper >/dev/null 2>&1 /proc/sys/vm/drop_caches") + time.sleep(2) + + # We take the initial reading of the RSS, then calculate the amount + # we need to inflate the balloon with by subtracting it from the + # VM size and adding an offset of 10 MiB in order to make sure we + # get a lower reading than the initial one. + inflate_size = 256 - int(first_reading / 1024) + 10 + + if traditional_balloon: + # Now inflate the balloon + test_microvm.api.balloon.patch(amount_mib=inflate_size) + elif free_page_hinting: + test_microvm.api.balloon_hinting_start.patch() + + _ = get_stable_rss_mem_by_pid(firecracker_ps) + + if traditional_balloon: + # Deflate the balloon completely. + test_microvm.api.balloon.patch(amount_mib=0) + + # Check memory usage again. + second_reading = get_stable_rss_mem_by_pid(firecracker_ps) + + # There should be a reduction of at least 10MB. + assert first_reading - second_reading >= 10000 diff --git a/tests/integration_tests/performance/test_huge_pages.py b/tests/integration_tests/performance/test_huge_pages.py index 1c5a14873d1..e2c120bcb8b 100644 --- a/tests/integration_tests/performance/test_huge_pages.py +++ b/tests/integration_tests/performance/test_huge_pages.py @@ -175,26 +175,3 @@ def test_ept_violation_count( ) metrics.put_metric(metric, int(metric_value), "Count") - - -def test_negative_huge_pages_plus_balloon(uvm_plain): - """Tests that huge pages and memory ballooning cannot be used together""" - uvm_plain.memory_monitor = None - uvm_plain.spawn() - - # Ensure setting huge pages and then adding a balloon device doesn't work - uvm_plain.basic_config(huge_pages=HugePagesConfig.HUGETLBFS_2MB) - with pytest.raises( - RuntimeError, - match="Firecracker's huge pages support is incompatible with memory ballooning.", - ): - uvm_plain.api.balloon.put(amount_mib=0, deflate_on_oom=False) - - # Ensure adding a balloon device and then setting huge pages doesn't work - uvm_plain.basic_config(huge_pages=HugePagesConfig.NONE) - uvm_plain.api.balloon.put(amount_mib=0, deflate_on_oom=False) - with pytest.raises( - RuntimeError, - match="Machine config error: Firecracker's huge pages support is incompatible with memory ballooning.", - ): - uvm_plain.basic_config(huge_pages=HugePagesConfig.HUGETLBFS_2MB)