Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions tests/cli/test_machines_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -581,6 +581,28 @@ def test_preflight_direct_port_minimum_scales_by_gpu_count(self):
assert direct_ports["operator"] == ">="
assert "3 directly mapped ports per listed GPU" in direct_ports["purpose"]

def test_preflight_does_not_gate_on_virtual_cpu_count(self):
from vastai.cli.self_test.machine_diagnostics import (
failed_checks,
preflight_requirement_checks,
)

offer = _self_test_offer(
num_gpus=8,
gpu_ram=24 * 1024,
gpu_total_ram=8 * 24 * 1024,
cpu_ram=256 * 1024,
cpu_cores=1,
direct_port_count=24,
inet_down=600,
inet_up=600,
)

checks = preflight_requirement_checks(offer)

assert "cpu.cores" not in {check["id"] for check in checks}
assert failed_checks(checks) == []

def test_preflight_direct_port_overage_renders_advisory(
self, parse_argv, patch_get_client, monkeypatch, capsys
):
Expand Down
2 changes: 1 addition & 1 deletion tests/cli/test_self_test_support_bundle.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def test_self_test_bundle_creation_error_preserves_original_failure(
captured = capsys.readouterr()
assert exc_info.value.code == 1
assert "WARNING: failed to create self-test diagnostic bundle: disk full" in captured.out
assert "Test failed: 8 preflight requirement check(s) failed." in captured.out
assert "Test failed: 7 preflight requirement check(s) failed." in captured.out


def test_self_test_runtime_failure_bundle_includes_instance_logs(
Expand Down
16 changes: 3 additions & 13 deletions vast.py
Original file line number Diff line number Diff line change
Expand Up @@ -9459,8 +9459,9 @@ def check_requirements(machine_id, api_key, args):
Validates whether a machine meets the specified hardware and performance requirements.

This function queries the machine's offers and checks various criteria such as CUDA
version, reliability, port count, PCIe bandwidth, internet speeds, GPU RAM, system
RAM, and CPU cores relative to the number of GPUs. If any of these requirements are
version, reliability, port count, PCIe bandwidth, internet speeds, GPU RAM, and
system RAM. Physical CPU core capacity is validated by the self-test image at
runtime. If any of these requirements are
not met, it records the reasons for the failure.

Args:
Expand Down Expand Up @@ -9562,17 +9563,6 @@ def check_requirements(machine_id, api_key, args):
debug_print(args, f"CPU RAM: {cpu_ram} MB")
debug_print(args, f"Total GPU RAM: {gpu_total_ram} MB")

# 9. CPU Cores vs. Number of GPUs
cpu_cores = int(safe_float(top_offer.get('cpu_cores')))
num_gpus = int(safe_float(top_offer.get('num_gpus')))
if cpu_cores < 2 * num_gpus:
unmet_reasons.append("Number of CPU cores is less than twice the number of GPUs.")

# Debugging Information for CPU Cores
if args.debugging:
debug_print(args, f"CPU Cores: {cpu_cores}")
debug_print(args, f"Number of GPUs: {num_gpus}")

# Return True if all requirements are met, False otherwise
if unmet_reasons:
progress_print(args, f"Machine ID {machine_id} does not meet the requirements:")
Expand Down
13 changes: 0 additions & 13 deletions vastai/cli/self_test/machine_diagnostics.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,15 +153,13 @@ def preflight_requirement_checks(offer):
per_gpu_ram_gib = per_gpu_vram_gib(offer)
required_mbps = required_inet_mbps(gpu_total_ram)
cpu_ram = safe_float(offer.get("cpu_ram"))
cpu_cores = int(safe_float(offer.get("cpu_cores")))
num_gpus = int(safe_float(offer.get("num_gpus")))
listed_gpus = num_gpus if num_gpus > 0 else 1
direct_port_count = safe_float(offer.get("direct_port_count"))
required_min_ports = 3 * listed_gpus
recommended_max_ports = 64 * listed_gpus
uncapped_required_cpu_ram = 0.95 * gpu_total_ram
required_cpu_ram = min(uncapped_required_cpu_ram, SYSTEM_RAM_REQUIREMENT_CAP_MIB)
required_cpu_cores = 2 * num_gpus

checks = [
_check(
Expand Down Expand Up @@ -265,17 +263,6 @@ def preflight_requirement_checks(offer):
"total VRAM, up to the 2 TB cap."
),
),
_check(
"cpu.cores",
"CPU cores",
cpu_cores,
required_cpu_cores,
">=",
"cores",
cpu_cores >= required_cpu_cores,
"The tester expects at least two CPU cores per GPU for stable orchestration.",
"Expose more CPU cores to the host or reduce the GPU count for this offer.",
),
]
if direct_port_count > recommended_max_ports:
checks.append(
Expand Down
Loading