Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 33 additions & 11 deletions resources/overlay/usr/local/bin/fast_page_fault_helper.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <sys/mman.h> // mmap
#include <time.h> // clock_gettime
#include <fcntl.h> // open
#include <getopt.h> // getopt
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: can extract the update to the helper into a commit and explain the changes

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense, I will do!


#define MEM_SIZE_MIB (128 * 1024 * 1024)
#define NANOS_PER_SEC 1000000000
Expand All @@ -30,20 +31,39 @@ void touch_memory(void *mem, size_t size, char val) {

int main() {
sigset_t set;
int signal;
int signal, character;
void *ptr;
struct timespec start, end;
long duration_nanos;
FILE *out_file;

sigemptyset(&set);
if (sigaddset(&set, SIGUSR1) == -1) {
perror("sigaddset");
return 1;
char *options = 0;
int longindex = 0;
int signal_wait = 1;

struct option longopts[] = {
{"nosignal", no_argument, NULL, 's'},
{NULL, 0, NULL, 0}
};

while((character = getopt_long(argc, argv, "s", longopts, &longindex)) != -1) {
switch (character) {
case 's':
signal_wait = 0;
break;
}
}
if (sigprocmask(SIG_BLOCK, &set, NULL) == -1) {
perror("sigprocmask");
return 1;

if (signal_wait) {
sigemptyset(&set);
if (sigaddset(&set, SIGUSR1) == -1) {
perror("sigaddset");
return 1;
}
if (sigprocmask(SIG_BLOCK, &set, NULL) == -1) {
perror("sigprocmask");
return 1;
}
}

ptr = mmap(NULL, MEM_SIZE_MIB, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
Expand All @@ -53,9 +73,11 @@ int main() {
return 1;
}

touch_memory(ptr, MEM_SIZE_MIB, 1);
if (signal_wait) {
touch_memory(ptr, MEM_SIZE_MIB, 1);

sigwait(&set, &signal);
sigwait(&set, &signal);
}

clock_gettime(CLOCK_BOOTTIME, &start);
touch_memory(ptr, MEM_SIZE_MIB, 2);
Expand All @@ -76,4 +98,4 @@ int main() {
}

return 0;
}
}
160 changes: 145 additions & 15 deletions tests/integration_tests/functional/test_balloon.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""Tests for guest-side operations on /balloon resources."""

import logging
import signal
import time
from subprocess import TimeoutExpired

Expand Down Expand Up @@ -293,7 +294,8 @@ def test_reinflate_balloon(uvm_plain_any):


# pylint: disable=C0103
def test_size_reduction(uvm_plain_any):
@pytest.mark.parametrize("method", ["traditional", "hinting", "reporting"])
def test_size_reduction(uvm_plain_any, method):
"""
Verify that ballooning reduces RSS usage on a newly booted guest.
"""
Expand All @@ -302,30 +304,60 @@ def test_size_reduction(uvm_plain_any):
test_microvm.basic_config()
test_microvm.add_net_iface()

traditional_balloon = method == "traditional"
free_page_reporting = method == "reporting"
free_page_hinting = method == "hinting"

# Add a memory balloon.
test_microvm.api.balloon.put(
amount_mib=0, deflate_on_oom=True, stats_polling_interval_s=0
amount_mib=0,
deflate_on_oom=True,
stats_polling_interval_s=0,
free_page_reporting=free_page_reporting,
free_page_hinting=free_page_hinting,
)

# Start the microvm.
test_microvm.start()
firecracker_pid = test_microvm.firecracker_pid

# Check memory usage.
get_stable_rss_mem_by_pid(firecracker_pid)

test_microvm.ssh.check_output(
"nohup /usr/local/bin/fast_page_fault_helper >/dev/null 2>&1 </dev/null &"
)

time.sleep(1)

first_reading = get_stable_rss_mem_by_pid(firecracker_pid)

_, pid, _ = test_microvm.ssh.check_output("pidof fast_page_fault_helper")
# Kill the application which will free the held memory
test_microvm.ssh.check_output(f"kill -s {signal.SIGUSR1} {pid}")

# Sleep to allow guest to clean up
time.sleep(1)
# Have the guest drop its caches.
test_microvm.ssh.run("sync; echo 3 > /proc/sys/vm/drop_caches")
time.sleep(5)
time.sleep(2)

# We take the initial reading of the RSS, then calculate the amount
# we need to inflate the balloon with by subtracting it from the
# VM size and adding an offset of 10 MiB in order to make sure we
# get a lower reading than the initial one.
inflate_size = 256 - int(first_reading / 1024) + 10

# Now inflate the balloon.
test_microvm.api.balloon.patch(amount_mib=inflate_size)
if traditional_balloon:
# Now inflate the balloon
test_microvm.api.balloon.patch(amount_mib=inflate_size)
elif free_page_hinting:
test_microvm.api.balloon_hinting_start.patch()

_ = get_stable_rss_mem_by_pid(firecracker_pid)

if traditional_balloon:
# Deflate the balloon completely.
test_microvm.api.balloon.patch(amount_mib=0)

# Check memory usage again.
second_reading = get_stable_rss_mem_by_pid(firecracker_pid)
Expand Down Expand Up @@ -534,7 +566,92 @@ def test_balloon_snapshot(uvm_plain_any, microvm_factory):
assert stats_after_snap["available_memory"] > latest_stats["available_memory"]


def test_memory_scrub(uvm_plain_any):
@pytest.mark.parametrize("method", ["reporting", "hinting"])
def test_hinting_reporting_snapshot(uvm_plain_any, microvm_factory, method):
"""
Test that the balloon hinting and reporting works after pause/resume.
"""
vm = uvm_plain_any
vm.spawn()
vm.basic_config(
vcpu_count=2,
mem_size_mib=256,
)
vm.add_net_iface()

free_page_reporting = method == "reporting"
free_page_hinting = method == "hinting"

# Add a memory balloon with stats enabled.
vm.api.balloon.put(
amount_mib=0,
deflate_on_oom=True,
stats_polling_interval_s=STATS_POLLING_INTERVAL_S,
free_page_reporting=free_page_reporting,
free_page_hinting=free_page_hinting,
)

vm.start()

vm.ssh.check_output(
"nohup /usr/local/bin/fast_page_fault_helper >/dev/null 2>&1 </dev/null &"
)

time.sleep(1)

# Get the firecracker pid, and open an ssh connection.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is the bit about the ssh connection relevant here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll drop that good catch

firecracker_pid = vm.firecracker_pid

# Check memory usage.
first_reading = get_stable_rss_mem_by_pid(firecracker_pid)

_, pid, _ = vm.ssh.check_output("pidof fast_page_fault_helper")
# Kill the application which will free the held memory
vm.ssh.check_output(f"kill -s {signal.SIGUSR1} {pid}")
time.sleep(2)

if free_page_hinting:
vm.api.balloon_hinting_start.patch()

# Check memory usage again.
second_reading = get_stable_rss_mem_by_pid(firecracker_pid)

# There should be a reduction in RSS, but it's inconsistent.
# We only test that the reduction happens.
assert first_reading > second_reading

snapshot = vm.snapshot_full()
microvm = microvm_factory.build_from_snapshot(snapshot)

firecracker_pid = microvm.firecracker_pid

microvm.ssh.check_output(
"nohup /usr/local/bin/fast_page_fault_helper >/dev/null 2>&1 </dev/null &"
)

time.sleep(1)

# Check memory usage.
third_reading = get_stable_rss_mem_by_pid(firecracker_pid)

_, pid, _ = microvm.ssh.check_output("pidof fast_page_fault_helper")
# Kill the application which will free the held memory
microvm.ssh.check_output(f"kill -s {signal.SIGUSR1} {pid}")
time.sleep(2)

if free_page_hinting:
microvm.api.balloon_hinting_start.patch()

# Check memory usage again.
fourth_reading = get_stable_rss_mem_by_pid(firecracker_pid)

# There should be a reduction in RSS, but it's inconsistent.
# We only test that the reduction happens.
assert third_reading > fourth_reading


@pytest.mark.parametrize("method", ["none", "hinting", "reporting"])
def test_memory_scrub(uvm_plain_any, method):
"""
Test that the memory is zeroed after deflate.
"""
Expand All @@ -543,29 +660,42 @@ def test_memory_scrub(uvm_plain_any):
microvm.basic_config(vcpu_count=2, mem_size_mib=256)
microvm.add_net_iface()

free_page_reporting = method == "reporting"
free_page_hinting = method == "hinting"

# Add a memory balloon with stats enabled.
microvm.api.balloon.put(
amount_mib=0, deflate_on_oom=True, stats_polling_interval_s=1
amount_mib=0,
deflate_on_oom=True,
stats_polling_interval_s=1,
free_page_reporting=free_page_reporting,
free_page_hinting=free_page_hinting,
)

microvm.start()

# Dirty 60MB of pages.
make_guest_dirty_memory(microvm.ssh, amount_mib=60)

# Now inflate the balloon with 60MB of pages.
microvm.api.balloon.patch(amount_mib=60)
if method == "none":
# Now inflate the balloon with 60MB of pages.
microvm.api.balloon.patch(amount_mib=60)
elif method == "hinting":
time.sleep(1)
microvm.api.balloon_hinting_start.patch()
elif method == "reporting":
time.sleep(2)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

any reason why reporting requires a longer delay than hinting?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reporting is expected to start in ~2 seconds and hinting in my testing takes ~200ms so that's why I've picked these. I can add a comment as they do seem like magic numbers


# Get the firecracker pid, and open an ssh connection.
firecracker_pid = microvm.firecracker_pid

# Wait for the inflate to complete.
_ = get_stable_rss_mem_by_pid(firecracker_pid)

# Deflate the balloon completely.
microvm.api.balloon.patch(amount_mib=0)

# Wait for the deflate to complete.
_ = get_stable_rss_mem_by_pid(firecracker_pid)
if method == "none":
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this not a "traditional" device?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch thanks

# Deflate the balloon completely.
microvm.api.balloon.patch(amount_mib=0)
# Wait for the deflate to complete.
_ = get_stable_rss_mem_by_pid(firecracker_pid)

microvm.ssh.check_output("/usr/local/bin/readmem {} {}".format(60, 1))
Loading