From fd36f8f44b74f424aef29ac6de6b7402da571d79 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Fri, 2 May 2025 12:29:16 +0100 Subject: [PATCH 1/9] ab: ignore some block throughput metrics on m8g block throughput metrics on m8g.metal instances for test scenarios using the async fio engine and more than 1 vcpu are volatile, so exclude them from A/B-testing. Suggested-by: Riccardo Mancini Signed-off-by: Patrick Roy Signed-off-by: LDagnachew --- tools/ab_test.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/ab_test.py b/tools/ab_test.py index 10857a35480..2ad3037e86d 100755 --- a/tools/ab_test.py +++ b/tools/ab_test.py @@ -51,6 +51,9 @@ {"fio_engine": "libaio", "metric": "clat_write"}, # boot time metrics {"performance_test": "test_boottime", "metric": "resume_time"}, + # block throughput on m8g + {"fio_engine": "libaio", "vcpus": 2, "instance": "m8g.metal-24xl"}, + {"fio_engine": "libaio", "vcpus": 2, "instance": "m8g.metal-48xl"}, ] From bd6e153c83d18f2e730b93fa92f1f741fa830fd0 Mon Sep 17 00:00:00 2001 From: LDagnachew Date: Sat, 3 May 2025 03:27:18 +0000 Subject: [PATCH 2/9] Script for finding Unused Metrics script finds metric files, extracts fields, and reports validity of each metrics Signed-off-by: Milan Dhaduk Signed-off-by: LDagnachew --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 155e4cbd8a8..bdb781253ec 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,4 @@ test_results/* /resources/linux /resources/x86_64 /resources/aarch64 +/src/check_metrics.py From 9dcc7a5f9a5742143e5ad8839214b921edd101d5 Mon Sep 17 00:00:00 2001 From: LDagnachew Date: Mon, 5 May 2025 00:28:16 +0000 Subject: [PATCH 3/9] Removed sync_response_fails metric - metric is not used Signed-off-by: Milan Dhaduk Signed-off-by: LDagnachew --- src/vmm/src/logger/metrics.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/vmm/src/logger/metrics.rs b/src/vmm/src/logger/metrics.rs index e793495e1f1..a4010dd0db2 100644 --- a/src/vmm/src/logger/metrics.rs +++ b/src/vmm/src/logger/metrics.rs @@ -354,8 +354,6 @@ pub struct ApiServerMetrics { pub process_startup_time_us: SharedStoreMetric, /// Measures the cpu's startup time in microseconds. pub process_startup_time_cpu_us: SharedStoreMetric, - /// Number of failures on API requests triggered by internal errors. - pub sync_response_fails: SharedIncMetric, /// Number of timeouts during communication with the VMM. pub sync_vmm_send_timeout_count: SharedIncMetric, } From 309727c48a69b78973d9a4c31c43760a5af9e3c1 Mon Sep 17 00:00:00 2001 From: LDagnachew Date: Mon, 5 May 2025 00:29:48 +0000 Subject: [PATCH 4/9] Removed sync_vmm_send_timeout_count + cleaned up - metric is not used Signed-off-by: Milan Dhaduk Signed-off-by: LDagnachew --- src/vmm/src/logger/metrics.rs | 4 ---- tests/host_tools/fcmetrics.py | 2 -- 2 files changed, 6 deletions(-) diff --git a/src/vmm/src/logger/metrics.rs b/src/vmm/src/logger/metrics.rs index a4010dd0db2..7f094fb4bad 100644 --- a/src/vmm/src/logger/metrics.rs +++ b/src/vmm/src/logger/metrics.rs @@ -354,8 +354,6 @@ pub struct ApiServerMetrics { pub process_startup_time_us: SharedStoreMetric, /// Measures the cpu's startup time in microseconds. pub process_startup_time_cpu_us: SharedStoreMetric, - /// Number of timeouts during communication with the VMM. - pub sync_vmm_send_timeout_count: SharedIncMetric, } impl ApiServerMetrics { /// Const default construction. @@ -363,8 +361,6 @@ impl ApiServerMetrics { Self { process_startup_time_us: SharedStoreMetric::new(), process_startup_time_cpu_us: SharedStoreMetric::new(), - sync_response_fails: SharedIncMetric::new(), - sync_vmm_send_timeout_count: SharedIncMetric::new(), } } } diff --git a/tests/host_tools/fcmetrics.py b/tests/host_tools/fcmetrics.py index 47661d5b27d..ec4b9b3d792 100644 --- a/tests/host_tools/fcmetrics.py +++ b/tests/host_tools/fcmetrics.py @@ -131,8 +131,6 @@ def validate_fc_metrics(metrics): "api_server": [ "process_startup_time_us", "process_startup_time_cpu_us", - "sync_response_fails", - "sync_vmm_send_timeout_count", ], "balloon": [ "activate_fails", From bcd569785bc219aead424e8da80e11ab8656aa27 Mon Sep 17 00:00:00 2001 From: LDagnachew Date: Mon, 5 May 2025 02:21:12 +0000 Subject: [PATCH 5/9] Removed deprecated_cmd_line_api_calls metric - metric is not used Signed-off-by: Milan Dhaduk Signed-off-by: LDagnachew --- src/vmm/src/logger/metrics.rs | 3 --- tests/host_tools/fcmetrics.py | 1 - 2 files changed, 4 deletions(-) diff --git a/src/vmm/src/logger/metrics.rs b/src/vmm/src/logger/metrics.rs index 7f094fb4bad..b85c32d12f0 100644 --- a/src/vmm/src/logger/metrics.rs +++ b/src/vmm/src/logger/metrics.rs @@ -502,15 +502,12 @@ impl PatchRequestsMetrics { pub struct DeprecatedApiMetrics { /// Total number of calls to deprecated HTTP endpoints. pub deprecated_http_api_calls: SharedIncMetric, - /// Total number of calls to deprecated CMD line parameters. - pub deprecated_cmd_line_api_calls: SharedIncMetric, } impl DeprecatedApiMetrics { /// Const default construction. pub const fn new() -> Self { Self { deprecated_http_api_calls: SharedIncMetric::new(), - deprecated_cmd_line_api_calls: SharedIncMetric::new(), } } } diff --git a/tests/host_tools/fcmetrics.py b/tests/host_tools/fcmetrics.py index ec4b9b3d792..4d4d8e53025 100644 --- a/tests/host_tools/fcmetrics.py +++ b/tests/host_tools/fcmetrics.py @@ -143,7 +143,6 @@ def validate_fc_metrics(metrics): "block": block_metrics, "deprecated_api": [ "deprecated_http_api_calls", - "deprecated_cmd_line_api_calls", ], "get_api_requests": [ "instance_info_count", From de9644bcdf8e35beb6ae52036da0f25f744483c5 Mon Sep 17 00:00:00 2001 From: LDagnachew Date: Mon, 5 May 2025 02:22:05 +0000 Subject: [PATCH 6/9] Removed log_fails metric - metric is not used Signed-off-by: Milan Dhaduk Signed-off-by: LDagnachew --- src/vmm/src/logger/metrics.rs | 3 --- tests/host_tools/fcmetrics.py | 1 - 2 files changed, 4 deletions(-) diff --git a/src/vmm/src/logger/metrics.rs b/src/vmm/src/logger/metrics.rs index b85c32d12f0..c989c459a19 100644 --- a/src/vmm/src/logger/metrics.rs +++ b/src/vmm/src/logger/metrics.rs @@ -521,8 +521,6 @@ pub struct LoggerSystemMetrics { pub metrics_fails: SharedIncMetric, /// Number of misses on logging human readable content. pub missed_log_count: SharedIncMetric, - /// Number of errors while trying to log human readable content. - pub log_fails: SharedIncMetric, } impl LoggerSystemMetrics { /// Const default construction. @@ -531,7 +529,6 @@ impl LoggerSystemMetrics { missed_metrics_count: SharedIncMetric::new(), metrics_fails: SharedIncMetric::new(), missed_log_count: SharedIncMetric::new(), - log_fails: SharedIncMetric::new(), } } } diff --git a/tests/host_tools/fcmetrics.py b/tests/host_tools/fcmetrics.py index 4d4d8e53025..a23ca8f460f 100644 --- a/tests/host_tools/fcmetrics.py +++ b/tests/host_tools/fcmetrics.py @@ -174,7 +174,6 @@ def validate_fc_metrics(metrics): "missed_metrics_count", "metrics_fails", "missed_log_count", - "log_fails", ], "mmds": [ "rx_accepted", From 99f50491c80d778c974bdb9ca09246ff16dbcf26 Mon Sep 17 00:00:00 2001 From: LDagnachew Date: Mon, 5 May 2025 02:24:35 +0000 Subject: [PATCH 7/9] Removed device_events metric - metric is not used Signed-off-by: Milan Dhaduk Signed-off-by: LDagnachew --- src/vmm/src/logger/metrics.rs | 3 --- tests/host_tools/fcmetrics.py | 1 - 2 files changed, 4 deletions(-) diff --git a/src/vmm/src/logger/metrics.rs b/src/vmm/src/logger/metrics.rs index c989c459a19..6b9f5a17699 100644 --- a/src/vmm/src/logger/metrics.rs +++ b/src/vmm/src/logger/metrics.rs @@ -799,8 +799,6 @@ impl VcpuMetrics { /// Metrics specific to the machine manager as a whole. #[derive(Debug, Default, Serialize)] pub struct VmmMetrics { - /// Number of device related events received for a VM. - pub device_events: SharedIncMetric, /// Metric for signaling a panic has occurred. pub panic_count: SharedStoreMetric, } @@ -808,7 +806,6 @@ impl VmmMetrics { /// Const default construction. pub const fn new() -> Self { Self { - device_events: SharedIncMetric::new(), panic_count: SharedStoreMetric::new(), } } diff --git a/tests/host_tools/fcmetrics.py b/tests/host_tools/fcmetrics.py index a23ca8f460f..12f1c388d7d 100644 --- a/tests/host_tools/fcmetrics.py +++ b/tests/host_tools/fcmetrics.py @@ -237,7 +237,6 @@ def validate_fc_metrics(metrics): {"exit_mmio_write_agg": latency_agg_metrics_fields}, ], "vmm": [ - "device_events", "panic_count", ], "uart": [ From 02817e8fbcf924d12a73a1051c57773d4b4cbf92 Mon Sep 17 00:00:00 2001 From: LDagnachew Date: Wed, 7 May 2025 19:36:18 +0000 Subject: [PATCH 8/9] Removed rx_partial_writes - metric is not used Signed-off-by: Milan Dhaduk Signed-off-by: LDagnachew --- src/vmm/src/devices/virtio/net/metrics.rs | 4 ---- tests/host_tools/fcmetrics.py | 1 - tests/integration_tests/functional/test_pause_resume.py | 1 - 3 files changed, 6 deletions(-) diff --git a/src/vmm/src/devices/virtio/net/metrics.rs b/src/vmm/src/devices/virtio/net/metrics.rs index 60e03f224de..bfc871b8813 100644 --- a/src/vmm/src/devices/virtio/net/metrics.rs +++ b/src/vmm/src/devices/virtio/net/metrics.rs @@ -161,8 +161,6 @@ pub struct NetDeviceMetrics { pub rx_queue_event_count: SharedIncMetric, /// Number of events associated with the rate limiter installed on the receiving path. pub rx_event_rate_limiter_count: SharedIncMetric, - /// Number of RX partial writes to guest. - pub rx_partial_writes: SharedIncMetric, /// Number of RX rate limiter throttling events. pub rx_rate_limiter_throttled: SharedIncMetric, /// Number of events received on the associated tap. @@ -233,8 +231,6 @@ impl NetDeviceMetrics { .add(other.rx_queue_event_count.fetch_diff()); self.rx_event_rate_limiter_count .add(other.rx_event_rate_limiter_count.fetch_diff()); - self.rx_partial_writes - .add(other.rx_partial_writes.fetch_diff()); self.rx_rate_limiter_throttled .add(other.rx_rate_limiter_throttled.fetch_diff()); self.rx_tap_event_count diff --git a/tests/host_tools/fcmetrics.py b/tests/host_tools/fcmetrics.py index 12f1c388d7d..4eaea494a90 100644 --- a/tests/host_tools/fcmetrics.py +++ b/tests/host_tools/fcmetrics.py @@ -104,7 +104,6 @@ def validate_fc_metrics(metrics): "event_fails", "rx_queue_event_count", "rx_event_rate_limiter_count", - "rx_partial_writes", "rx_rate_limiter_throttled", "rx_tap_event_count", "rx_bytes_count", diff --git a/tests/integration_tests/functional/test_pause_resume.py b/tests/integration_tests/functional/test_pause_resume.py index ca2bb6936b3..d27e21f3103 100644 --- a/tests/integration_tests/functional/test_pause_resume.py +++ b/tests/integration_tests/functional/test_pause_resume.py @@ -13,7 +13,6 @@ def verify_net_emulation_paused(metrics): """Verify net emulation is paused based on provided metrics.""" net_metrics = metrics["net"] assert net_metrics["rx_queue_event_count"] == 0 - assert net_metrics["rx_partial_writes"] == 0 assert net_metrics["rx_tap_event_count"] == 0 assert net_metrics["rx_bytes_count"] == 0 assert net_metrics["rx_packets_count"] == 0 From 1e1a61f47d8ee52ccb3d3ec360eeaf6cdfbffdf4 Mon Sep 17 00:00:00 2001 From: LDagnachew Date: Wed, 7 May 2025 19:38:04 +0000 Subject: [PATCH 9/9] Removed tx_partial_reads - metric is not used Signed-off-by: Milan Dhaduk Signed-off-by: LDagnachew --- src/vmm/src/devices/virtio/net/metrics.rs | 4 ---- tests/host_tools/fcmetrics.py | 1 - 2 files changed, 5 deletions(-) diff --git a/src/vmm/src/devices/virtio/net/metrics.rs b/src/vmm/src/devices/virtio/net/metrics.rs index bfc871b8813..a2d18c8412b 100644 --- a/src/vmm/src/devices/virtio/net/metrics.rs +++ b/src/vmm/src/devices/virtio/net/metrics.rs @@ -189,8 +189,6 @@ pub struct NetDeviceMetrics { pub tx_count: SharedIncMetric, /// Number of transmitted packets. pub tx_packets_count: SharedIncMetric, - /// Number of TX partial reads from guest. - pub tx_partial_reads: SharedIncMetric, /// Number of events associated with the transmitting queue. pub tx_queue_event_count: SharedIncMetric, /// Number of events associated with the rate limiter installed on the transmitting path. @@ -252,8 +250,6 @@ impl NetDeviceMetrics { self.tx_count.add(other.tx_count.fetch_diff()); self.tx_packets_count .add(other.tx_packets_count.fetch_diff()); - self.tx_partial_reads - .add(other.tx_partial_reads.fetch_diff()); self.tx_queue_event_count .add(other.tx_queue_event_count.fetch_diff()); self.tx_rate_limiter_event_count diff --git a/tests/host_tools/fcmetrics.py b/tests/host_tools/fcmetrics.py index 4eaea494a90..9ca8630e751 100644 --- a/tests/host_tools/fcmetrics.py +++ b/tests/host_tools/fcmetrics.py @@ -117,7 +117,6 @@ def validate_fc_metrics(metrics): "tx_fails", "tx_count", "tx_packets_count", - "tx_partial_reads", "tx_queue_event_count", "tx_rate_limiter_event_count", "tx_rate_limiter_throttled",