Skip to content

Commit

Permalink
check OS network limits when starting validator (solana-labs#20874)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbiseda authored Oct 26, 2021
1 parent 58aa2b9 commit 6470560
Show file tree
Hide file tree
Showing 5 changed files with 96 additions and 0 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions multinode-demo/bootstrap-validator.sh
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ args+=(
--vote-account "$vote_account"
--rpc-faucet-address 127.0.0.1:9900
--no-poh-speed-test
--no-os-network-limits-test
--no-wait-for-vote-to-start-leader
)
default_arg --gossip-port 8001
Expand Down
1 change: 1 addition & 0 deletions multinode-demo/validator.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ source "$here"/common.sh
args=(
--max-genesis-archive-unpacked-size 1073741824
--no-poh-speed-test
--no-os-network-limits-test
)
airdrops_enabled=1
node_sol=500 # 500 SOL: number of SOL to airdrop the node for transaction fees and vote account rent exemption (ignored if airdrops_enabled=0)
Expand Down
1 change: 1 addition & 0 deletions validator/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ jemallocator = {package = "tikv-jemallocator", version = "0.4.1", features = ["u
[target."cfg(unix)".dependencies]
libc = "0.2.104"
signal-hook = "0.2.3"
sysctl = "0.4.2"

[package.metadata.docs.rs]
targets = ["x86_64-unknown-linux-gnu"]
92 changes: 92 additions & 0 deletions validator/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ use {
contact_info::ContactInfo,
},
solana_ledger::blockstore_db::BlockstoreRecoveryMode,
solana_metrics::datapoint_info,
solana_perf::recycler::enable_recycler_warming,
solana_poh::poh_service,
solana_replica_lib::accountsdb_repl_server::AccountsDbReplServiceConfig,
Expand Down Expand Up @@ -411,6 +412,87 @@ fn get_cluster_shred_version(entrypoints: &[SocketAddr]) -> Option<u16> {
None
}

fn platform_id() -> String {
format!(
"{}/{}/{}",
std::env::consts::FAMILY,
std::env::consts::OS,
std::env::consts::ARCH
)
}

#[cfg(target_os = "linux")]
fn check_os_network_limits() {
use solana_metrics::datapoint_warn;
use std::collections::HashMap;
use sysctl::Sysctl;

fn sysctl_read(name: &str) -> Result<String, sysctl::SysctlError> {
let ctl = sysctl::Ctl::new(name)?;
let val = ctl.value_string()?;
Ok(val)
}
let mut check_failed = false;

info!("Testing OS network limits:");

// Reference: https://medium.com/@CameronSparr/increase-os-udp-buffers-to-improve-performance-51d167bb1360
let mut recommended_limits: HashMap<&str, i64> = HashMap::default();
recommended_limits.insert("net.core.rmem_max", 134217728);
recommended_limits.insert("net.core.rmem_default", 134217728);
recommended_limits.insert("net.core.wmem_max", 134217728);
recommended_limits.insert("net.core.wmem_default", 134217728);
recommended_limits.insert("vm.max_map_count", 1000000);

// Additionally collect the following limits
recommended_limits.insert("net.core.optmem_max", 0);
recommended_limits.insert("net.core.netdev_max_backlog", 0);

let mut current_limits: HashMap<&str, i64> = HashMap::default();
for (key, _) in recommended_limits.iter() {
let current_val = match sysctl_read(key) {
Ok(val) => val.parse::<i64>().unwrap(),
Err(e) => {
error!("Failed to query value for {}: {}", key, e);
check_failed = true;
-1
}
};
current_limits.insert(key, current_val);
}

for (key, recommended_val) in recommended_limits.iter() {
let current_val = *current_limits.get(key).unwrap();
if current_val < *recommended_val {
datapoint_warn!("os-config", (key, current_val, i64));
warn!(
" {}: recommended={} current={}, too small",
key, recommended_val, current_val
);
check_failed = true;
} else {
datapoint_info!("os-config", (key, current_val, i64));
info!(
" {}: recommended={} current={}",
key, recommended_val, current_val
);
}
}
datapoint_info!("os-config", ("platform", platform_id(), String));

if check_failed {
datapoint_warn!("os-config", ("network_limit_test_failed", 1, i64));
warn!("OS network limit test failed. solana-sys-tuner may be used to configure OS network limits. Bypass check with --no-os-network-limits-test.");
} else {
info!("OS network limits test passed.");
}
}

#[cfg(not(target_os = "linux"))]
fn check_os_network_limits() {
datapoint_info!("os-config", ("platform", platform_id(), String));
}

pub fn main() {
let default_dynamic_port_range =
&format!("{}-{}", VALIDATOR_PORT_RANGE.0, VALIDATOR_PORT_RANGE.1);
Expand Down Expand Up @@ -867,6 +949,12 @@ pub fn main() {
.long("no-poh-speed-test")
.help("Skip the check for PoH speed."),
)
.arg(
Arg::with_name("no_os_network_limits_test")
.hidden(true)
.long("no-os-network-limits-test")
.help("Skip checks for OS network limits.")
)
.arg(
Arg::with_name("accounts-hash-interval-slots")
.long("accounts-hash-interval-slots")
Expand Down Expand Up @@ -2345,6 +2433,10 @@ pub fn main() {
})
});

if !matches.is_present("no_os_network_limits_test") {
check_os_network_limits();
}

let mut ledger_lock = ledger_lockfile(&ledger_path);
let _ledger_write_guard = lock_ledger(&ledger_path, &mut ledger_lock);

Expand Down

0 comments on commit 6470560

Please sign in to comment.