flashinfer-ai · yzh119 · Nov 5, 2025 · Oct 20, 2025 · Oct 23, 2025 · Oct 24, 2025
diff --git a/tests/utils/test_green_ctx.py b/tests/utils/test_green_ctx.py
@@ -2,6 +2,26 @@
 import torch
 
 import flashinfer.green_ctx as green_ctx
+from flashinfer.utils import get_compute_capability, get_device_sm_count, round_up
+
+
+def calculate_required_sms(num_groups: int, min_count: int, device: str) -> int:
+    """Calculate total SM count required for the test."""
+    dev = torch.device(device)
+    min_sm, alignment = green_ctx.get_sm_count_constraint(*get_compute_capability(dev))
+    rounded_min = round_up(max(min_count, min_sm), alignment)
+    return num_groups * rounded_min
+
+
+def calculate_required_sms_by_counts(sm_counts: list, device: str) -> int:
+    """Calculate total SM count required for the test with specific SM counts."""
+    dev = torch.device(device)
+    min_sm, alignment = green_ctx.get_sm_count_constraint(*get_compute_capability(dev))
+    total = 0
+    for sm_count in sm_counts:
+        rounded = round_up(max(sm_count, min_sm), alignment)
+        total += rounded
+    return total
-    total = 0
-    for sm_count in sm_counts:
-        rounded = round_up(max(sm_count, min_sm), alignment)
-        total += rounded
-    return total
+    return sum(round_up(max(sm_count, min_sm), alignment) for sm_count in sm_counts)
-    total = 0
-    for sm_count in sm_counts:
-        rounded = round_up(max(sm_count, min_sm), alignment)
-        total += rounded
-    return total
+    return sum(round_up(max(sm_count, min_sm), alignment) for sm_count in sm_counts)
 
 
 @pytest.mark.parametrize("device", ["cuda:0"])
@@ -12,8 +32,16 @@ def test_green_ctx_creation(
     num_groups: int,
     min_count: int,
 ):
+    dev = torch.device(device)
+    required_sms = calculate_required_sms(num_groups, min_count, device)
+    available_sms = get_device_sm_count(dev)
+    if required_sms > available_sms:
+        pytest.skip(
+            f"Test requires {required_sms} SMs but device only has {available_sms} SMs"
+        )
+
     streams, resources = green_ctx.split_device_green_ctx(
-        torch.device(device), num_groups, min_count
+        dev, num_groups, min_count
     )
 
     assert len(resources) == num_groups + 1
@@ -30,6 +58,13 @@ def test_green_ctx_kernel_execution(
     num_groups: int,
     min_count: int,
 ):
+    required_sms = calculate_required_sms(num_groups, min_count, device)
+    available_sms = get_device_sm_count(torch.device(device))
+    if required_sms > available_sms:
+        pytest.skip(
+            f"Test requires {required_sms} SMs but device only has {available_sms} SMs"
+        )
+
     streams, resources = green_ctx.split_device_green_ctx(
         torch.device(device), num_groups, min_count
     )
@@ -59,6 +94,13 @@ def test_split_device_green_ctx_by_sm_count_creation(
     device: str,
     sm_counts: list,
 ):
+    required_sms = calculate_required_sms_by_counts(sm_counts, device)
+    available_sms = get_device_sm_count(torch.device(device))
+    if required_sms > available_sms:
+        pytest.skip(
+            f"Test requires {required_sms} SMs but device only has {available_sms} SMs"
+        )
+
     streams, resources = green_ctx.split_device_green_ctx_by_sm_count(
         torch.device(device), sm_counts
     )
@@ -85,6 +127,13 @@ def test_split_device_green_ctx_by_sm_count_kernel_execution(
     device: str,
     sm_counts: list,
 ):
+    required_sms = calculate_required_sms_by_counts(sm_counts, device)
+    available_sms = get_device_sm_count(torch.device(device))
+    if required_sms > available_sms:
+        pytest.skip(
+            f"Test requires {required_sms} SMs but device only has {available_sms} SMs"
+        )
+
     streams, resources = green_ctx.split_device_green_ctx_by_sm_count(
         torch.device(device), sm_counts
     )
@@ -113,6 +162,13 @@ def test_split_device_green_ctx_by_sm_count_alignment(
     device: str,
     sm_counts: list,
 ):
+    required_sms = calculate_required_sms_by_counts(sm_counts, device)
+    available_sms = get_device_sm_count(torch.device(device))
+    if required_sms > available_sms:
+        pytest.skip(
+            f"Test requires {required_sms} SMs but device only has {available_sms} SMs"
+        )
+
     _, resources = green_ctx.split_device_green_ctx_by_sm_count(
         torch.device(device), sm_counts
     )

diff --git a/tests/utils/test_jit_example.py b/tests/utils/test_jit_example.py
@@ -11,7 +11,7 @@
     gen_customize_single_prefill_module,
 )
 from flashinfer.prefill import single_prefill_with_kv_cache_with_jit_module
-from flashinfer.utils import MaskMode, is_sm90a_supported
+from flashinfer.utils import MaskMode, is_sm90a_supported, get_compute_capability
 
 
 def test_single_decode_mask():
@@ -166,6 +166,10 @@ def test_flash_sigmoid():
     torch.testing.assert_close(o, o_ref, rtol=2e-2, atol=2e-2)
 
 
+@pytest.mark.xfail(
+    get_compute_capability(torch.device("cuda:0")) == (12, 1),
+    reason="Numerical accuracy issue on SM 121 (Spark)",
+)
 def test_dump_logits():
     torch.manual_seed(42)
     variant_decl = r"""

diff --git a/tests/utils/test_sampling.py b/tests/utils/test_sampling.py
@@ -72,7 +72,7 @@ def test_softmax(
 
     probs_ref = torch.softmax(logits_scaled, dim=-1)
 
-    assert torch.allclose(probs, probs_ref, atol=1e-5)
+    assert torch.allclose(probs, probs_ref, rtol=1e-5, atol=1e-5)
 
 
 @pytest.mark.parametrize("vocab_size", [111, 32000, 128256])