Skip to content

Commit 5a1b473

Browse files
committed
virtio-balloon: Drop huge page restriction
While the traditional balloon device would not be able to reclaim memory when back by huge pages, it could still technically be used to to restrict memory usage in the guest. With the addition of hinting and reporting, they report ranges in bigger sizes (4mb by default). Because of this, it is possible for the host reclaim huge pages backing the guest. Updates the performance tests for the balloon when back by huge pages, added varients to the size reduction tests to ensure hinting and reporting can reduce the RSS of the guest. Move the inflation test to performance to ensure it runs sequentially in CI otherwise the host can be exhausted of huge pages. Signed-off-by: Jack Thomson <[email protected]>
1 parent 7a28cb3 commit 5a1b473

File tree

9 files changed

+203
-224
lines changed

9 files changed

+203
-224
lines changed

src/vmm/src/resources.rs

Lines changed: 1 addition & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,7 @@ use crate::vmm_config::boot_source::{
2323
use crate::vmm_config::drive::*;
2424
use crate::vmm_config::entropy::*;
2525
use crate::vmm_config::instance_info::InstanceInfo;
26-
use crate::vmm_config::machine_config::{
27-
HugePageConfig, MachineConfig, MachineConfigError, MachineConfigUpdate,
28-
};
26+
use crate::vmm_config::machine_config::{MachineConfig, MachineConfigError, MachineConfigUpdate};
2927
use crate::vmm_config::metrics::{MetricsConfig, MetricsConfigError, init_metrics};
3028
use crate::vmm_config::mmds::{MmdsConfig, MmdsConfigError};
3129
use crate::vmm_config::net::*;
@@ -245,10 +243,6 @@ impl VmResources {
245243
}
246244
SharedDeviceType::Balloon(balloon) => {
247245
self.balloon.set_device(balloon);
248-
249-
if self.machine_config.huge_pages != HugePageConfig::None {
250-
return Err(ResourcesError::BalloonDevice(BalloonConfigError::HugePages));
251-
}
252246
}
253247
SharedDeviceType::Vsock(vsock) => {
254248
self.vsock.set_device(vsock);
@@ -290,9 +284,6 @@ impl VmResources {
290284
return Err(MachineConfigError::IncompatibleBalloonSize);
291285
}
292286

293-
if self.balloon.get().is_some() && updated.huge_pages != HugePageConfig::None {
294-
return Err(MachineConfigError::BalloonAndHugePages);
295-
}
296287
self.machine_config = updated;
297288

298289
Ok(())
@@ -349,10 +340,6 @@ impl VmResources {
349340
return Err(BalloonConfigError::TooManyPagesRequested);
350341
}
351342

352-
if self.machine_config.huge_pages != HugePageConfig::None {
353-
return Err(BalloonConfigError::HugePages);
354-
}
355-
356343
self.balloon.set(config)
357344
}
358345

@@ -563,7 +550,6 @@ mod tests {
563550
use crate::HTTP_MAX_PAYLOAD_SIZE;
564551
use crate::cpu_config::templates::test_utils::TEST_TEMPLATE_JSON;
565552
use crate::cpu_config::templates::{CpuTemplateType, StaticCpuTemplate};
566-
use crate::devices::virtio::balloon::Balloon;
567553
use crate::devices::virtio::block::virtio::VirtioBlockError;
568554
use crate::devices::virtio::block::{BlockError, CacheType};
569555
use crate::devices::virtio::vsock::VSOCK_DEV_ID;
@@ -1543,31 +1529,6 @@ mod tests {
15431529
.unwrap_err();
15441530
}
15451531

1546-
#[test]
1547-
fn test_negative_restore_balloon_device_with_huge_pages() {
1548-
let mut vm_resources = default_vm_resources();
1549-
vm_resources.balloon = BalloonBuilder::new();
1550-
vm_resources
1551-
.update_machine_config(&MachineConfigUpdate {
1552-
huge_pages: Some(HugePageConfig::Hugetlbfs2M),
1553-
..Default::default()
1554-
})
1555-
.unwrap();
1556-
let err = vm_resources
1557-
.update_from_restored_device(SharedDeviceType::Balloon(Arc::new(Mutex::new(
1558-
Balloon::new(128, false, 0, false, false).unwrap(),
1559-
))))
1560-
.unwrap_err();
1561-
assert!(
1562-
matches!(
1563-
err,
1564-
ResourcesError::BalloonDevice(BalloonConfigError::HugePages)
1565-
),
1566-
"{:?}",
1567-
err
1568-
);
1569-
}
1570-
15711532
#[test]
15721533
fn test_set_entropy_device() {
15731534
let mut vm_resources = default_vm_resources();

src/vmm/src/vmm_config/balloon.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,6 @@ pub enum BalloonConfigError {
2020
TooManyPagesRequested,
2121
/// Error creating the balloon device: {0}
2222
CreateFailure(crate::devices::virtio::balloon::BalloonError),
23-
/// Firecracker's huge pages support is incompatible with memory ballooning.
24-
HugePages,
2523
}
2624

2725
/// This struct represents the strongly typed equivalent of the json body

src/vmm/src/vmm_config/machine_config.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,6 @@ pub enum MachineConfigError {
2929
SmtNotSupported,
3030
/// Could not determine host kernel version when checking hugetlbfs compatibility
3131
KernelVersion,
32-
/// Firecracker's huge pages support is incompatible with memory ballooning.
33-
BalloonAndHugePages,
3432
}
3533

3634
/// Describes the possible (huge)page configurations for a microVM's memory.

tests/conftest.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
from framework import defs, utils
3535
from framework.artifacts import disks, kernel_params
3636
from framework.defs import DEFAULT_BINARY_DIR
37-
from framework.microvm import MicroVMFactory, SnapshotType
37+
from framework.microvm import HugePagesConfig, MicroVMFactory, SnapshotType
3838
from framework.properties import global_props
3939
from framework.utils_cpu_templates import (
4040
custom_cpu_templates_params,
@@ -594,6 +594,15 @@ def pci_enabled(request):
594594
yield request.param
595595

596596

597+
@pytest.fixture(
598+
params=[HugePagesConfig.NONE, HugePagesConfig.HUGETLBFS_2MB],
599+
ids=["NO_HUGE_PAGES", "2M_HUGE_PAGES"],
600+
)
601+
def huge_pages(request):
602+
"""Fixture that allows configuring whether a microVM will have huge pages enabled or not"""
603+
yield request.param
604+
605+
597606
def uvm_booted(
598607
microvm_factory,
599608
guest_kernel,

tests/framework/microvm.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,11 @@
2323
from collections import namedtuple
2424
from dataclasses import dataclass
2525
from enum import Enum, auto
26-
from functools import lru_cache
26+
from functools import cached_property, lru_cache
2727
from pathlib import Path
2828
from typing import Optional
2929

30+
import psutil
3031
from tenacity import Retrying, retry, stop_after_attempt, wait_fixed
3132

3233
import host_tools.cargo_build as build_tools
@@ -472,7 +473,7 @@ def state(self):
472473
"""Get the InstanceInfo property and return the state field."""
473474
return self.api.describe.get().json()["state"]
474475

475-
@property
476+
@cached_property
476477
def firecracker_pid(self):
477478
"""Return Firecracker's PID
478479
@@ -491,6 +492,11 @@ def firecracker_pid(self):
491492
with attempt:
492493
return int(self.jailer.pid_file.read_text(encoding="ascii"))
493494

495+
@cached_property
496+
def ps(self):
497+
"""Returns a handle to the psutil.Process for this VM"""
498+
return psutil.Process(self.firecracker_pid)
499+
494500
@property
495501
def dimensions(self):
496502
"""Gets a default set of cloudwatch dimensions describing the configuration of this microvm"""

tests/framework/utils.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import typing
1515
from collections import defaultdict, namedtuple
1616
from contextlib import contextmanager
17+
from pathlib import Path
1718
from typing import Dict
1819

1920
import psutil
@@ -259,6 +260,48 @@ def get_free_mem_ssh(ssh_connection):
259260
raise Exception("Available memory not found in `/proc/meminfo")
260261

261262

263+
def get_stable_rss_mem_by_pid(process, percentage_delta=1):
264+
"""
265+
Get the RSS memory that a guest uses, given the pid of the guest.
266+
267+
Wait till the fluctuations in RSS drop below percentage_delta.
268+
Or print a warning if this does not happen.
269+
"""
270+
271+
# All values are reported as KiB
272+
273+
def get_rss_from_pmap():
274+
"""Returns current memory utilization in KiB, including used HugeTLBFS"""
275+
276+
proc_status = Path("/proc", str(process.pid), "status").read_text("utf-8")
277+
for line in proc_status.splitlines():
278+
if line.startswith("HugetlbPages:"): # entry is in KiB
279+
hugetlbfs_usage = int(line.split()[1])
280+
break
281+
else:
282+
assert False, f"HugetlbPages not found in {str(proc_status)}"
283+
return hugetlbfs_usage + process.memory_info().rss // 1024
284+
285+
first_rss = 0
286+
second_rss = 0
287+
for _ in range(5):
288+
first_rss = get_rss_from_pmap()
289+
time.sleep(1)
290+
second_rss = get_rss_from_pmap()
291+
abs_diff = abs(first_rss - second_rss)
292+
abs_delta = abs_diff / first_rss * 100
293+
print(
294+
f"RSS readings: old: {first_rss} new: {second_rss} abs_diff: {abs_diff} abs_delta: {abs_delta}"
295+
)
296+
if abs_delta < percentage_delta:
297+
return second_rss
298+
299+
time.sleep(1)
300+
301+
print("WARNING: RSS readings did not stabilize")
302+
return second_rss
303+
304+
262305
def _format_output_message(proc, stdout, stderr):
263306
output_message = f"\n[{proc.pid}] Command:\n{proc.args}"
264307
# Append stdout/stderr to the output message

0 commit comments

Comments
 (0)