Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 9 additions & 18 deletions tests/e2e/test_spyre_cb.py
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if get_params_test_remove_left_padding still has any value left since removing the left padding is the default cc @sducouedic

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good point, do you agree @sducouedic ?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep we should remove that one

Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,6 @@ def get_params_test_blocks_borders_aligned_prompts():
prompts_lengths = [49, 41, 47]
steps_add_reqs = [0, 0, 0] # add all requests in the beginning
max_model_len = 2048
remove_left_padding = False

checked_steps = [
{
Expand Down Expand Up @@ -203,7 +202,7 @@ def get_params_test_blocks_borders_aligned_prompts():
},
{
"step": 70, # Decode sequence 2
"tkv": 131,
"tkv": 67,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Worth adding a comment on why the tkv decreased since left padding reduction is default now

"waiting": [],
"running": ["2"],
"request_outputs": ["2"]
Expand All @@ -212,7 +211,7 @@ def get_params_test_blocks_borders_aligned_prompts():
# Sequence 2 finishes at step 73
# (start step + 1 prefill + 6 decodes - 1) = 67 + 1 + 6 - 1 = 73
"step": 73,
"tkv": 134,
"tkv": 70,
"waiting": [],
"running": [],
"request_outputs": ["2"],
Expand All @@ -229,7 +228,7 @@ def get_params_test_blocks_borders_aligned_prompts():
]

return (seqs_max_tokens, prompts_lengths, steps_add_reqs, checked_steps,
max_model_len, remove_left_padding)
max_model_len)


def get_params_test_blocks_borders_misaligned_prompts():
Expand All @@ -241,7 +240,6 @@ def get_params_test_blocks_borders_misaligned_prompts():
prompts_lengths = [49, 41, 47]
steps_add_reqs = [0, 0, 0] # add all requests in the beginning
max_model_len = 2048
remove_left_padding = False

checked_steps = [
{
Expand Down Expand Up @@ -334,7 +332,7 @@ def get_params_test_blocks_borders_misaligned_prompts():
]

return (seqs_max_tokens, prompts_lengths, steps_add_reqs, checked_steps,
max_model_len, remove_left_padding)
max_model_len)


def get_params_test_special_finish():
Expand All @@ -345,7 +343,6 @@ def get_params_test_special_finish():
prompts_lengths = [49, 30, 20]
steps_add_reqs = [0, 0, 31]
max_model_len = 2048
remove_left_padding = False

checked_steps = [
{
Expand Down Expand Up @@ -426,7 +423,7 @@ def get_params_test_special_finish():
]

return (seqs_max_tokens, prompts_lengths, steps_add_reqs, checked_steps,
max_model_len, remove_left_padding)
max_model_len)


def get_params_test_scheduler_constraints_tkv():
Expand All @@ -436,7 +433,6 @@ def get_params_test_scheduler_constraints_tkv():
prompts_lengths = [49, 70]
steps_add_reqs = [0, 0]
max_model_len = 2048
remove_left_padding = False

checked_steps = [
{
Expand Down Expand Up @@ -518,7 +514,7 @@ def get_params_test_scheduler_constraints_tkv():
]

return (seqs_max_tokens, prompts_lengths, steps_add_reqs, checked_steps,
max_model_len, remove_left_padding)
max_model_len)


def get_params_test_scheduler_constraints_max_prompt_len():
Expand All @@ -528,7 +524,6 @@ def get_params_test_scheduler_constraints_max_prompt_len():
prompts_lengths = [70, 49, 41]
steps_add_reqs = [0, 0, 0]
max_model_len = 256
remove_left_padding = False

checked_steps = [
{
Expand Down Expand Up @@ -628,7 +623,7 @@ def get_params_test_scheduler_constraints_max_prompt_len():
]

return (seqs_max_tokens, prompts_lengths, steps_add_reqs, checked_steps,
max_model_len, remove_left_padding)
max_model_len)


def get_params_test_remove_left_padding():
Expand All @@ -638,7 +633,6 @@ def get_params_test_remove_left_padding():
prompts_lengths = [20, 14, 5]
steps_add_reqs = [0, 30, 31]
max_model_len = 2048
remove_left_padding = True

checked_steps = [
{
Expand Down Expand Up @@ -745,7 +739,7 @@ def get_params_test_remove_left_padding():
]

return (seqs_max_tokens, prompts_lengths, steps_add_reqs, checked_steps,
max_model_len, remove_left_padding)
max_model_len)


def augment_checked_steps(
Expand Down Expand Up @@ -775,7 +769,7 @@ def augment_checked_steps(
@pytest.mark.parametrize("max_num_seqs", [2])
@pytest.mark.parametrize(
"seqs_max_tokens,prompts_lengths,steps_add_reqs,checked_steps,"
"max_model_len,remove_left_padding", [
"max_model_len", [
get_params_test_blocks_borders_aligned_prompts(),
get_params_test_blocks_borders_misaligned_prompts(),
get_params_test_special_finish(),
Expand All @@ -793,7 +787,6 @@ def test_scheduler_cb_steps_tkv(
steps_add_reqs: list[int],
checked_steps: list[dict[str, Any]],
max_model_len: int,
remove_left_padding: bool,
):
"""
Test the scheduler execution by comparing the scheduler attributes at each
Expand All @@ -809,8 +802,6 @@ def test_scheduler_cb_steps_tkv(
monkeypatch.setenv("VLLM_SPYRE_USE_CB", "1")
monkeypatch.setenv("VLLM_USE_V1", "1")
monkeypatch.setenv("VLLM_SPYRE_DYNAMO_BACKEND", backend)
monkeypatch.setenv("VLLM_SPYRE_RM_PADDED_BLOCKS",
"1" if remove_left_padding else "0")

# To get deterministic execution in V1
# and to enable InprocClient
Expand Down
6 changes: 0 additions & 6 deletions vllm_spyre/envs.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
VLLM_SPYRE_WARMUP_NEW_TOKENS: Optional[list[int]] = None
VLLM_SPYRE_WARMUP_BATCH_SIZES: Optional[list[int]] = None
VLLM_SPYRE_USE_CB: bool = False
VLLM_SPYRE_RM_PADDED_BLOCKS: bool = False
VLLM_SPYRE_PERF_METRIC_LOGGING_ENABLED: int = 0
VLLM_SPYRE_PERF_METRIC_LOGGING_DIR: str = "/tmp"
VLLM_SPYRE_OVERRIDE_SIGNALS_HANDLER: bool = False
Expand Down Expand Up @@ -69,11 +68,6 @@ def _backend_backwards_compat() -> str:
"VLLM_SPYRE_USE_CB":
lambda: bool(int(os.getenv("VLLM_SPYRE_USE_CB", "0"))),

# If set, remove redundant (left) padded blocks. Only applicable in
# continuous batching mode.
"VLLM_SPYRE_RM_PADDED_BLOCKS":
lambda: bool(int(os.getenv("VLLM_SPYRE_RM_PADDED_BLOCKS", "0"))),

# Enable performance metric logging. This captures startup information
# such as warmup times, and loading times. It is turned off by default.
"VLLM_SPYRE_PERF_METRIC_LOGGING_ENABLED":
Expand Down
5 changes: 1 addition & 4 deletions vllm_spyre/v1/worker/spyre_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@

from vllm.v1.outputs import ModelRunnerOutput

import vllm_spyre.envs as envs_spyre

logger = init_logger(__name__)


Expand Down Expand Up @@ -906,8 +904,7 @@ def prepare_model_input(
self, scheduler_output: SchedulerOutput) -> ModelForwardInputs:

# remove left padding if applicable before next prefil/decode step
if envs_spyre.VLLM_SPYRE_RM_PADDED_BLOCKS:
self.reduce_left_padding()
self.reduce_left_padding()

# NOTE: We assume that all sequences in the group are all prompts or
# all decodes.
Expand Down