|
14 | 14 | @pytest.mark.cb |
15 | 15 | @pytest.mark.parametrize("model", get_spyre_model_list()) |
16 | 16 | @pytest.mark.parametrize("backend", get_spyre_backend_list()) |
17 | | -def prompts_aligned_with_tkv_boundaries(model: str, backend: str, |
18 | | - monkeypatch: pytest.MonkeyPatch): |
| 17 | +def test_prompts_aligned_with_tkv_boundaries(model: str, backend: str, |
| 18 | + monkeypatch: pytest.MonkeyPatch): |
19 | 19 | """ Scenario where it happens that all the sequences get scheduled in a |
20 | 20 | fashion where they are aligned with the block boundaries (i.e. tkv multiple |
21 | 21 | of 64 at the time of prefilling). |
@@ -176,8 +176,8 @@ def prompts_aligned_with_tkv_boundaries(model: str, backend: str, |
176 | 176 | @pytest.mark.cb |
177 | 177 | @pytest.mark.parametrize("model", get_spyre_model_list()) |
178 | 178 | @pytest.mark.parametrize("backend", get_spyre_backend_list()) |
179 | | -def prompts_misaligned_with_tkv_boundaries(model: str, backend: str, |
180 | | - monkeypatch: pytest.MonkeyPatch): |
| 179 | +def test_prompts_misaligned_with_tkv_boundaries( |
| 180 | + model: str, backend: str, monkeypatch: pytest.MonkeyPatch): |
181 | 181 | """ Scenario where it happens that some sequence gets scheduled in a way |
182 | 182 | that it is misaligned with the block boundary (i.e. tkv is not a multiple |
183 | 183 | of 64 at the time of prefilling). |
@@ -338,7 +338,7 @@ def prompts_misaligned_with_tkv_boundaries(model: str, backend: str, |
338 | 338 | @pytest.mark.cb |
339 | 339 | @pytest.mark.parametrize("model", get_spyre_model_list()) |
340 | 340 | @pytest.mark.parametrize("backend", get_spyre_backend_list()) |
341 | | -def two_sequences_finish_same_time_as_new_arrive( |
| 341 | +def test_two_sequences_finish_same_time_as_new_arrive( |
342 | 342 | model: str, backend: str, monkeypatch: pytest.MonkeyPatch): |
343 | 343 | """ 2-cases-in-1: (1) Two sequences finish at the same time and (2) a new |
344 | 344 | request arrives when another finishes. |
@@ -476,8 +476,8 @@ def two_sequences_finish_same_time_as_new_arrive( |
476 | 476 | @pytest.mark.cb |
477 | 477 | @pytest.mark.parametrize("model", get_spyre_model_list()) |
478 | 478 | @pytest.mark.parametrize("backend", get_spyre_backend_list()) |
479 | | -def prompt_too_long_for_current_tkv(model: str, backend: str, |
480 | | - monkeypatch: pytest.MonkeyPatch): |
| 479 | +def test_prompt_too_long_for_current_tkv(model: str, backend: str, |
| 480 | + monkeypatch: pytest.MonkeyPatch): |
481 | 481 | """ Scenario where the requested prompt is too long for current tkv value |
482 | 482 |
|
483 | 483 | Configuration: |
@@ -623,7 +623,7 @@ def prompt_too_long_for_current_tkv(model: str, backend: str, |
623 | 623 | @pytest.mark.cb |
624 | 624 | @pytest.mark.parametrize("model", get_spyre_model_list()) |
625 | 625 | @pytest.mark.parametrize("backend", get_spyre_backend_list()) |
626 | | -def requested_tokens_not_fitting_remaining_space( |
| 626 | +def test_requested_tokens_not_fitting_remaining_space( |
627 | 627 | model: str, backend: str, monkeypatch: pytest.MonkeyPatch): |
628 | 628 | """ Scenario where the request goes beyond max_model_len |
629 | 629 |
|
@@ -808,8 +808,8 @@ def requested_tokens_not_fitting_remaining_space( |
808 | 808 | @pytest.mark.cb |
809 | 809 | @pytest.mark.parametrize("model", get_spyre_model_list()) |
810 | 810 | @pytest.mark.parametrize("backend", get_spyre_backend_list()) |
811 | | -def requests_use_all_available_blocks(model: str, backend: str, |
812 | | - monkeypatch: pytest.MonkeyPatch): |
| 811 | +def test_requests_use_all_available_blocks(model: str, backend: str, |
| 812 | + monkeypatch: pytest.MonkeyPatch): |
813 | 813 | """ Scenario where the requests use all of the available blocks |
814 | 814 | |
815 | 815 | Configuration: |
@@ -939,8 +939,8 @@ def requests_use_all_available_blocks(model: str, backend: str, |
939 | 939 | @pytest.mark.cb |
940 | 940 | @pytest.mark.parametrize("model", get_spyre_model_list()) |
941 | 941 | @pytest.mark.parametrize("backend", get_spyre_backend_list()) |
942 | | -def requests_use_more_than_available_blocks(model: str, backend: str, |
943 | | - monkeypatch: pytest.MonkeyPatch): |
| 942 | +def test_requests_use_more_than_available_blocks( |
| 943 | + model: str, backend: str, monkeypatch: pytest.MonkeyPatch): |
944 | 944 | """ Scenario where some request need to wait because of the number of |
945 | 945 | available blocks. |
946 | 946 | |
|
0 commit comments