diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 6acc0e7d..342d5610 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -1011,8 +1011,18 @@ version = "0.14.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b750cb3417fd1b327431a470f388520309479ab0bf5e323505daf0290cd3850" dependencies = [ - "darling_core", - "darling_macro", + "darling_core 0.14.4", + "darling_macro 0.14.4", +] + +[[package]] +name = "darling" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" +dependencies = [ + "darling_core 0.20.11", + "darling_macro 0.20.11", ] [[package]] @@ -1029,17 +1039,42 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "darling_core" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim 0.11.1", + "syn 2.0.100", +] + [[package]] name = "darling_macro" version = "0.14.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4aab4dbc9f7611d8b55048a3a16d2d010c2c8334e46304b40ac1cc14bf3b48e" dependencies = [ - "darling_core", + "darling_core 0.14.4", "quote", "syn 1.0.109", ] +[[package]] +name = "darling_macro" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" +dependencies = [ + "darling_core 0.20.11", + "quote", + "syn 2.0.100", +] + [[package]] name = "dasp_sample" version = "0.11.0" @@ -1087,7 +1122,7 @@ version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c11bdc11a0c47bc7d37d582b5285da6849c96681023680b906673c5707af7b0f" dependencies = [ - "darling", + "darling 0.14.4", "proc-macro2", "quote", "syn 1.0.109", @@ -1131,7 +1166,7 @@ dependencies = [ "libc", "option-ext", "redox_users", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -1971,7 +2006,7 @@ dependencies = [ "js-sys", "log", "wasm-bindgen", - "windows-core 0.61.0", + "windows-core 0.61.2", ] [[package]] @@ -2333,7 +2368,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34" dependencies = [ "cfg-if", - "windows-targets 0.52.6", + "windows-targets 0.48.5", ] [[package]] @@ -2709,6 +2744,7 @@ dependencies = [ "log", "moshi", "numpy", + "nvml-wrapper", "ogg", "opus", "prometheus", @@ -2722,6 +2758,7 @@ dependencies = [ "serde", "serde_json", "symphonia", + "sysinfo 0.35.2", "tokio", "toml", "tower 0.4.13", @@ -2986,6 +3023,29 @@ dependencies = [ "rustc-hash 2.1.1", ] +[[package]] +name = "nvml-wrapper" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d5c6c0ef9702176a570f06ad94f3198bc29c524c8b498f1b9346e1b1bdcbb3a" +dependencies = [ + "bitflags 2.9.0", + "libloading", + "nvml-wrapper-sys", + "static_assertions", + "thiserror 1.0.69", + "wrapcenum-derive", +] + +[[package]] +name = "nvml-wrapper-sys" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd23dbe2eb8d8335d2bce0299e0a07d6a63c089243d626ca75b770a962ff49e6" +dependencies = [ + "libloading", +] + [[package]] name = "objc" version = "0.2.7" @@ -2996,6 +3056,25 @@ dependencies = [ "objc_exception", ] +[[package]] +name = "objc2-core-foundation" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c10c2894a6fed806ade6027bcd50662746363a9589d3ec9d9bef30a4e4bc166" +dependencies = [ + "bitflags 2.9.0", +] + +[[package]] +name = "objc2-io-kit" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71c1c64d6120e51cd86033f67176b1cb66780c2efe34dec55176f77befd93c0a" +dependencies = [ + "libc", + "objc2-core-foundation", +] + [[package]] name = "objc_exception" version = "0.1.2" @@ -4578,6 +4657,20 @@ dependencies = [ "windows 0.52.0", ] +[[package]] +name = "sysinfo" +version = "0.35.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c3ffa3e4ff2b324a57f7aeb3c349656c7b127c3c189520251a648102a92496e" +dependencies = [ + "libc", + "memchr", + "ntapi", + "objc2-core-foundation", + "objc2-io-kit", + "windows 0.61.3", +] + [[package]] name = "system-configuration" version = "0.6.1" @@ -5343,7 +5436,7 @@ dependencies = [ "regex", "rustc_version", "rustversion", - "sysinfo", + "sysinfo 0.30.13", "time", ] @@ -5563,6 +5656,28 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows" +version = "0.61.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893" +dependencies = [ + "windows-collections", + "windows-core 0.61.2", + "windows-future", + "windows-link", + "windows-numerics", +] + +[[package]] +name = "windows-collections" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8" +dependencies = [ + "windows-core 0.61.2", +] + [[package]] name = "windows-core" version = "0.52.0" @@ -5584,15 +5699,26 @@ dependencies = [ [[package]] name = "windows-core" -version = "0.61.0" +version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4763c1de310c86d75a878046489e2e5ba02c649d185f21c67d4cf8a56d098980" +checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" dependencies = [ "windows-implement", "windows-interface", "windows-link", - "windows-result 0.3.2", - "windows-strings 0.4.0", + "windows-result 0.3.4", + "windows-strings 0.4.2", +] + +[[package]] +name = "windows-future" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e" +dependencies = [ + "windows-core 0.61.2", + "windows-link", + "windows-threading", ] [[package]] @@ -5619,9 +5745,19 @@ dependencies = [ [[package]] name = "windows-link" -version = "0.1.1" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38" +checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" + +[[package]] +name = "windows-numerics" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1" +dependencies = [ + "windows-core 0.61.2", + "windows-link", +] [[package]] name = "windows-registry" @@ -5629,7 +5765,7 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4286ad90ddb45071efd1a66dfa43eb02dd0dfbae1545ad6cc3c51cf34d7e8ba3" dependencies = [ - "windows-result 0.3.2", + "windows-result 0.3.4", "windows-strings 0.3.1", "windows-targets 0.53.2", ] @@ -5645,9 +5781,9 @@ dependencies = [ [[package]] name = "windows-result" -version = "0.3.2" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c64fd11a4fd95df68efcfee5f44a294fe71b8bc6a91993e2791938abcc712252" +checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" dependencies = [ "windows-link", ] @@ -5663,9 +5799,9 @@ dependencies = [ [[package]] name = "windows-strings" -version = "0.4.0" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2ba9642430ee452d5a7aa78d72907ebe8cfda358e8cb7918a2050581322f97" +checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" dependencies = [ "windows-link", ] @@ -5777,6 +5913,15 @@ dependencies = [ "windows_x86_64_msvc 0.53.0", ] +[[package]] +name = "windows-threading" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b66463ad2e0ea3bbf808b7f1d371311c80e115c0b71d60efc142cafbcfb057a6" +dependencies = [ + "windows-link", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.42.2" @@ -5975,6 +6120,18 @@ dependencies = [ "bitflags 2.9.0", ] +[[package]] +name = "wrapcenum-derive" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a76ff259533532054cfbaefb115c613203c73707017459206380f03b3b3f266e" +dependencies = [ + "darling 0.20.11", + "proc-macro2", + "quote", + "syn 2.0.100", +] + [[package]] name = "write16" version = "1.0.0" diff --git a/rust/Cargo.toml b/rust/Cargo.toml index a828ec94..f5a9961d 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -44,6 +44,7 @@ log = "0.4.20" moshi = { path = "./moshi-core", version = "0.6.3" } native-tls = "0.2.11" numpy = "0.23.0" +nvml-wrapper = "0.11.0" ogg = { version = "0.9.1", features = ["async"] } opus = "0.3.0" prometheus = "0.13.4" @@ -63,6 +64,7 @@ serde = { version = "1.0", features = ["derive"] } serde_json = "1.0.115" sha3 = "0.10.8" symphonia = { version = "0.5.3", features = ["all"] } +sysinfo = "0.35.2" tokenizers = "0.15.2" tokio = { version = "1.35.1", features = ["full"] } tokio-rustls = "0.24.1" diff --git a/rust/moshi-server/Cargo.toml b/rust/moshi-server/Cargo.toml index e02403a0..103c3704 100644 --- a/rust/moshi-server/Cargo.toml +++ b/rust/moshi-server/Cargo.toml @@ -25,6 +25,7 @@ lazy_static = { workspace = true } log = { workspace = true } moshi = { workspace = true } numpy = { workspace = true } +nvml-wrapper = { workspace = true, optional = true } ogg = { workspace = true } opus = { workspace = true } prometheus = { workspace = true } @@ -38,6 +39,7 @@ sentencepiece = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } symphonia = { workspace = true } +sysinfo = { workspace = true } tokio = { workspace = true } toml = { workspace = true } tower = { workspace = true } @@ -52,5 +54,5 @@ vergen = { workspace = true } [features] default = [] -cuda = ["moshi/cuda", "candle/cuda", "candle-nn/cuda", "candle-transformers/cuda"] +cuda = ["moshi/cuda", "candle/cuda", "candle-nn/cuda", "candle-transformers/cuda", "dep:nvml-wrapper"] metal = ["moshi/metal", "candle/metal", "candle-nn/metal", "candle-transformers/metal"] diff --git a/rust/moshi-server/src/main.rs b/rust/moshi-server/src/main.rs index d558d52c..b0a75eeb 100644 --- a/rust/moshi-server/src/main.rs +++ b/rust/moshi-server/src/main.rs @@ -2,6 +2,7 @@ // This source code is licensed under the license found in the // LICENSE file in the root directory of this source tree. +use crate::metrics::system::update_system_metrics; use anyhow::Result; use axum::{http::StatusCode, response::IntoResponse, response::Response}; use candle::Device; @@ -521,6 +522,14 @@ async fn main_() -> Result<()> { app = app.merge(module.router(&shared_state)?) } + // Starts updating system metrics + tokio::task::spawn(async { + loop { + update_system_metrics().await; + tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; + } + }); + let sock_addr = std::net::SocketAddr::from(( std::net::IpAddr::from_str(args.addr.as_str()) .unwrap_or(std::net::IpAddr::V6(std::net::Ipv6Addr::LOCALHOST)), diff --git a/rust/moshi-server/src/metrics.rs b/rust/moshi-server/src/metrics.rs index a6871940..9b901c7b 100644 --- a/rust/moshi-server/src/metrics.rs +++ b/rust/moshi-server/src/metrics.rs @@ -3,10 +3,133 @@ // LICENSE file in the root directory of this source tree. use lazy_static::lazy_static; +#[cfg(feature = "cuda")] +use nvml_wrapper::{enum_wrappers::device::TemperatureSensor, error::NvmlError, Nvml}; use prometheus::{ histogram_opts, labels, opts, register_counter, register_gauge, register_histogram, }; use prometheus::{Counter, Gauge, Histogram}; +use sysinfo::System; + +pub mod system { + use super::*; + #[cfg(feature = "cuda")] + lazy_static! { + pub static ref GPU_UTILIZATION: Gauge = register_gauge!(opts!( + "system_gpu_utilization", + "Utilization of the GPU. (percentage)", + labels! {"handler" => "all",} + )) + .unwrap(); + pub static ref GPU_MEMORY_UTILIZATION: Gauge = register_gauge!(opts!( + "system_gpu_memory_utilization", + "Utilization of the GPU's memory. (percentage)", + labels! {"handler" => "all",} + )) + .unwrap(); + pub static ref GPU_TEMPERATURE: Gauge = register_gauge!(opts!( + "system_gpu_temperature", + "Temperature of the GPU.", + labels! {"handler" => "all",} + )) + .unwrap(); + } + + lazy_static! { + pub static ref CPU_UTILIZATION: Gauge = register_gauge!(opts!( + "system_cpu_utilization", + "Utilization of the CPU. (percentage)", + labels! {"handler" => "all",} + )) + .unwrap(); + pub static ref MEMORY_UTILIZATION: Gauge = register_gauge!(opts!( + "system_memory_utilization", + "Utilization of the system memory. (percentage)", + labels! {"handler" => "all",} + )) + .unwrap(); + } + + #[cfg(feature = "cuda")] + struct GpuMetrics { + utilization: f64, + total_memory: f64, + used_memory: f64, + temperature: f64, + } + + #[cfg(feature = "cuda")] + fn get_gpu_metrics(nvml: &Nvml, device_idx: u32) -> Result { + let dev = nvml.device_by_index(device_idx)?; + let memory_info = dev.memory_info()?; + + Ok(GpuMetrics { + utilization: dev.utilization_rates()?.gpu as f64, + total_memory: memory_info.total as f64, + used_memory: memory_info.used as f64, + temperature: dev.temperature(TemperatureSensor::Gpu).map_or(f64::NAN, |x| x as f64), + }) + } + + #[cfg(feature = "cuda")] + fn get_gpus_metrics() -> Result, NvmlError> { + let nvml = Nvml::init()?; + + let mut global_metrics = GpuMetrics { + utilization: 0.0, + total_memory: 0.0, + used_memory: 0.0, + temperature: f64::NAN, + }; + let mut gpu_count: usize = 0; + + for device_idx in 0..nvml.device_count()? { + match get_gpu_metrics(&nvml, device_idx) { + Ok(metrics) => { + global_metrics.utilization += metrics.utilization; + global_metrics.total_memory += metrics.total_memory; + global_metrics.used_memory += metrics.used_memory; + global_metrics.temperature = global_metrics.temperature.max(metrics.temperature); + + gpu_count += 1; + } + Err(err) => { + tracing::debug!(?err, "couldn't get statistics on one gpu"); + } + } + } + + if gpu_count == 0 { + tracing::debug!("no gpu was found while collecting metrics"); + + Ok(None) + } else { + global_metrics.utilization /= (gpu_count as f64); + Ok(Some(global_metrics)) + } + } + + pub(crate) async fn update_system_metrics() { + #[cfg(feature = "cuda")] + { + match get_gpus_metrics() { + Ok(Some(metrics)) => { + GPU_UTILIZATION.set(metrics.utilization); + GPU_MEMORY_UTILIZATION.set(metrics.used_memory / metrics.total_memory); + GPU_TEMPERATURE.set(metrics.temperature); + } + Err(err) => { + tracing::debug!(?err, "error while collecting gpu statistics"); + } + _ => () + }; + } + + let sys = System::new_all(); + CPU_UTILIZATION.set((sys.global_cpu_usage() as f64) / 100.0); + MEMORY_UTILIZATION.set((sys.used_memory() as f64) / (sys.total_memory() as f64)); + } +} pub mod asr { use super::*;