-
Notifications
You must be signed in to change notification settings - Fork 2.1k
[RFC] virtio-balloon: Add free page reporting hinting #5491
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 1 commit
5c9e4c7
1ca36f2
59e8fb6
90621d3
36eafae
fd88929
41ed063
8dcf4fd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,6 +3,7 @@ | |
| """Tests for guest-side operations on /balloon resources.""" | ||
|
|
||
| import logging | ||
| import signal | ||
| import time | ||
| from subprocess import TimeoutExpired | ||
|
|
||
|
|
@@ -293,7 +294,8 @@ def test_reinflate_balloon(uvm_plain_any): | |
|
|
||
|
|
||
| # pylint: disable=C0103 | ||
| def test_size_reduction(uvm_plain_any): | ||
| @pytest.mark.parametrize("method", ["traditional", "hinting", "reporting"]) | ||
| def test_size_reduction(uvm_plain_any, method): | ||
| """ | ||
| Verify that ballooning reduces RSS usage on a newly booted guest. | ||
| """ | ||
|
|
@@ -302,30 +304,60 @@ def test_size_reduction(uvm_plain_any): | |
| test_microvm.basic_config() | ||
| test_microvm.add_net_iface() | ||
|
|
||
| traditional_balloon = method == "traditional" | ||
| free_page_reporting = method == "reporting" | ||
| free_page_hinting = method == "hinting" | ||
|
|
||
| # Add a memory balloon. | ||
| test_microvm.api.balloon.put( | ||
| amount_mib=0, deflate_on_oom=True, stats_polling_interval_s=0 | ||
| amount_mib=0, | ||
| deflate_on_oom=True, | ||
| stats_polling_interval_s=0, | ||
| free_page_reporting=free_page_reporting, | ||
| free_page_hinting=free_page_hinting, | ||
| ) | ||
|
|
||
| # Start the microvm. | ||
| test_microvm.start() | ||
| firecracker_pid = test_microvm.firecracker_pid | ||
|
|
||
| # Check memory usage. | ||
| get_stable_rss_mem_by_pid(firecracker_pid) | ||
|
|
||
| test_microvm.ssh.check_output( | ||
| "nohup /usr/local/bin/fast_page_fault_helper >/dev/null 2>&1 </dev/null &" | ||
| ) | ||
|
|
||
| time.sleep(1) | ||
|
|
||
| first_reading = get_stable_rss_mem_by_pid(firecracker_pid) | ||
|
|
||
| _, pid, _ = test_microvm.ssh.check_output("pidof fast_page_fault_helper") | ||
| # Kill the application which will free the held memory | ||
| test_microvm.ssh.check_output(f"kill -s {signal.SIGUSR1} {pid}") | ||
|
|
||
| # Sleep to allow guest to clean up | ||
| time.sleep(1) | ||
| # Have the guest drop its caches. | ||
| test_microvm.ssh.run("sync; echo 3 > /proc/sys/vm/drop_caches") | ||
| time.sleep(5) | ||
| time.sleep(2) | ||
|
|
||
| # We take the initial reading of the RSS, then calculate the amount | ||
| # we need to inflate the balloon with by subtracting it from the | ||
| # VM size and adding an offset of 10 MiB in order to make sure we | ||
| # get a lower reading than the initial one. | ||
| inflate_size = 256 - int(first_reading / 1024) + 10 | ||
|
|
||
| # Now inflate the balloon. | ||
| test_microvm.api.balloon.patch(amount_mib=inflate_size) | ||
| if traditional_balloon: | ||
| # Now inflate the balloon | ||
| test_microvm.api.balloon.patch(amount_mib=inflate_size) | ||
| elif free_page_hinting: | ||
| test_microvm.api.balloon_hinting_start.patch() | ||
|
|
||
| _ = get_stable_rss_mem_by_pid(firecracker_pid) | ||
|
|
||
| if traditional_balloon: | ||
| # Deflate the balloon completely. | ||
| test_microvm.api.balloon.patch(amount_mib=0) | ||
|
|
||
| # Check memory usage again. | ||
| second_reading = get_stable_rss_mem_by_pid(firecracker_pid) | ||
|
|
@@ -534,7 +566,92 @@ def test_balloon_snapshot(uvm_plain_any, microvm_factory): | |
| assert stats_after_snap["available_memory"] > latest_stats["available_memory"] | ||
|
|
||
|
|
||
| def test_memory_scrub(uvm_plain_any): | ||
| @pytest.mark.parametrize("method", ["reporting", "hinting"]) | ||
| def test_hinting_reporting_snapshot(uvm_plain_any, microvm_factory, method): | ||
| """ | ||
| Test that the balloon hinting and reporting works after pause/resume. | ||
| """ | ||
| vm = uvm_plain_any | ||
| vm.spawn() | ||
| vm.basic_config( | ||
| vcpu_count=2, | ||
| mem_size_mib=256, | ||
| ) | ||
| vm.add_net_iface() | ||
|
|
||
| free_page_reporting = method == "reporting" | ||
| free_page_hinting = method == "hinting" | ||
|
|
||
| # Add a memory balloon with stats enabled. | ||
| vm.api.balloon.put( | ||
| amount_mib=0, | ||
| deflate_on_oom=True, | ||
| stats_polling_interval_s=STATS_POLLING_INTERVAL_S, | ||
| free_page_reporting=free_page_reporting, | ||
| free_page_hinting=free_page_hinting, | ||
| ) | ||
|
|
||
| vm.start() | ||
|
|
||
| vm.ssh.check_output( | ||
| "nohup /usr/local/bin/fast_page_fault_helper >/dev/null 2>&1 </dev/null &" | ||
| ) | ||
|
|
||
| time.sleep(1) | ||
|
|
||
| # Get the firecracker pid, and open an ssh connection. | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is the bit about the ssh connection relevant here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll drop that good catch |
||
| firecracker_pid = vm.firecracker_pid | ||
|
|
||
| # Check memory usage. | ||
| first_reading = get_stable_rss_mem_by_pid(firecracker_pid) | ||
|
|
||
| _, pid, _ = vm.ssh.check_output("pidof fast_page_fault_helper") | ||
| # Kill the application which will free the held memory | ||
| vm.ssh.check_output(f"kill -s {signal.SIGUSR1} {pid}") | ||
| time.sleep(2) | ||
|
|
||
| if free_page_hinting: | ||
| vm.api.balloon_hinting_start.patch() | ||
|
|
||
| # Check memory usage again. | ||
| second_reading = get_stable_rss_mem_by_pid(firecracker_pid) | ||
|
|
||
| # There should be a reduction in RSS, but it's inconsistent. | ||
| # We only test that the reduction happens. | ||
| assert first_reading > second_reading | ||
|
|
||
| snapshot = vm.snapshot_full() | ||
| microvm = microvm_factory.build_from_snapshot(snapshot) | ||
|
|
||
| firecracker_pid = microvm.firecracker_pid | ||
|
|
||
| microvm.ssh.check_output( | ||
| "nohup /usr/local/bin/fast_page_fault_helper >/dev/null 2>&1 </dev/null &" | ||
| ) | ||
|
|
||
| time.sleep(1) | ||
|
|
||
| # Check memory usage. | ||
| third_reading = get_stable_rss_mem_by_pid(firecracker_pid) | ||
|
|
||
| _, pid, _ = microvm.ssh.check_output("pidof fast_page_fault_helper") | ||
| # Kill the application which will free the held memory | ||
| microvm.ssh.check_output(f"kill -s {signal.SIGUSR1} {pid}") | ||
| time.sleep(2) | ||
|
|
||
| if free_page_hinting: | ||
| microvm.api.balloon_hinting_start.patch() | ||
|
|
||
| # Check memory usage again. | ||
| fourth_reading = get_stable_rss_mem_by_pid(firecracker_pid) | ||
|
|
||
| # There should be a reduction in RSS, but it's inconsistent. | ||
| # We only test that the reduction happens. | ||
| assert third_reading > fourth_reading | ||
|
|
||
|
|
||
| @pytest.mark.parametrize("method", ["none", "hinting", "reporting"]) | ||
| def test_memory_scrub(uvm_plain_any, method): | ||
| """ | ||
| Test that the memory is zeroed after deflate. | ||
| """ | ||
|
|
@@ -543,29 +660,42 @@ def test_memory_scrub(uvm_plain_any): | |
| microvm.basic_config(vcpu_count=2, mem_size_mib=256) | ||
| microvm.add_net_iface() | ||
|
|
||
| free_page_reporting = method == "reporting" | ||
| free_page_hinting = method == "hinting" | ||
|
|
||
| # Add a memory balloon with stats enabled. | ||
| microvm.api.balloon.put( | ||
| amount_mib=0, deflate_on_oom=True, stats_polling_interval_s=1 | ||
| amount_mib=0, | ||
| deflate_on_oom=True, | ||
| stats_polling_interval_s=1, | ||
| free_page_reporting=free_page_reporting, | ||
| free_page_hinting=free_page_hinting, | ||
| ) | ||
|
|
||
| microvm.start() | ||
|
|
||
| # Dirty 60MB of pages. | ||
| make_guest_dirty_memory(microvm.ssh, amount_mib=60) | ||
|
|
||
| # Now inflate the balloon with 60MB of pages. | ||
| microvm.api.balloon.patch(amount_mib=60) | ||
| if method == "none": | ||
| # Now inflate the balloon with 60MB of pages. | ||
| microvm.api.balloon.patch(amount_mib=60) | ||
| elif method == "hinting": | ||
| time.sleep(1) | ||
| microvm.api.balloon_hinting_start.patch() | ||
| elif method == "reporting": | ||
| time.sleep(2) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. any reason why reporting requires a longer delay than hinting? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Reporting is expected to start in ~2 seconds and hinting in my testing takes ~200ms so that's why I've picked these. I can add a comment as they do seem like magic numbers |
||
|
|
||
| # Get the firecracker pid, and open an ssh connection. | ||
| firecracker_pid = microvm.firecracker_pid | ||
|
|
||
| # Wait for the inflate to complete. | ||
| _ = get_stable_rss_mem_by_pid(firecracker_pid) | ||
|
|
||
| # Deflate the balloon completely. | ||
| microvm.api.balloon.patch(amount_mib=0) | ||
|
|
||
| # Wait for the deflate to complete. | ||
| _ = get_stable_rss_mem_by_pid(firecracker_pid) | ||
| if method == "none": | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this not a "traditional" device? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good catch thanks |
||
| # Deflate the balloon completely. | ||
| microvm.api.balloon.patch(amount_mib=0) | ||
| # Wait for the deflate to complete. | ||
| _ = get_stable_rss_mem_by_pid(firecracker_pid) | ||
|
|
||
| microvm.ssh.check_output("/usr/local/bin/readmem {} {}".format(60, 1)) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: can extract the update to the helper into a commit and explain the changes
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Makes sense, I will do!