@@ -136,7 +136,6 @@ def get_params_test_blocks_borders_aligned_prompts():
136136 prompts_lengths = [49 , 41 , 47 ]
137137 steps_add_reqs = [0 , 0 , 0 ] # add all requests in the beginning
138138 max_model_len = 2048
139- remove_left_padding = False
140139
141140 checked_steps = [
142141 {
@@ -203,7 +202,7 @@ def get_params_test_blocks_borders_aligned_prompts():
203202 },
204203 {
205204 "step" : 70 , # Decode sequence 2
206- "tkv" : 131 ,
205+ "tkv" : 67 , # tkv is reset by 64 due to removing the padded block
207206 "waiting" : [],
208207 "running" : ["2" ],
209208 "request_outputs" : ["2" ]
@@ -212,7 +211,7 @@ def get_params_test_blocks_borders_aligned_prompts():
212211 # Sequence 2 finishes at step 73
213212 # (start step + 1 prefill + 6 decodes - 1) = 67 + 1 + 6 - 1 = 73
214213 "step" : 73 ,
215- "tkv" : 134 ,
214+ "tkv" : 70 ,
216215 "waiting" : [],
217216 "running" : [],
218217 "request_outputs" : ["2" ],
@@ -229,7 +228,7 @@ def get_params_test_blocks_borders_aligned_prompts():
229228 ]
230229
231230 return (seqs_max_tokens , prompts_lengths , steps_add_reqs , checked_steps ,
232- max_model_len , remove_left_padding )
231+ max_model_len )
233232
234233
235234def get_params_test_blocks_borders_misaligned_prompts ():
@@ -241,7 +240,6 @@ def get_params_test_blocks_borders_misaligned_prompts():
241240 prompts_lengths = [49 , 41 , 47 ]
242241 steps_add_reqs = [0 , 0 , 0 ] # add all requests in the beginning
243242 max_model_len = 2048
244- remove_left_padding = False
245243
246244 checked_steps = [
247245 {
@@ -334,7 +332,7 @@ def get_params_test_blocks_borders_misaligned_prompts():
334332 ]
335333
336334 return (seqs_max_tokens , prompts_lengths , steps_add_reqs , checked_steps ,
337- max_model_len , remove_left_padding )
335+ max_model_len )
338336
339337
340338def get_params_test_special_finish ():
@@ -345,7 +343,6 @@ def get_params_test_special_finish():
345343 prompts_lengths = [49 , 30 , 20 ]
346344 steps_add_reqs = [0 , 0 , 31 ]
347345 max_model_len = 2048
348- remove_left_padding = False
349346
350347 checked_steps = [
351348 {
@@ -426,7 +423,7 @@ def get_params_test_special_finish():
426423 ]
427424
428425 return (seqs_max_tokens , prompts_lengths , steps_add_reqs , checked_steps ,
429- max_model_len , remove_left_padding )
426+ max_model_len )
430427
431428
432429def get_params_test_scheduler_constraints_tkv ():
@@ -436,7 +433,6 @@ def get_params_test_scheduler_constraints_tkv():
436433 prompts_lengths = [49 , 70 ]
437434 steps_add_reqs = [0 , 0 ]
438435 max_model_len = 2048
439- remove_left_padding = False
440436
441437 checked_steps = [
442438 {
@@ -518,7 +514,7 @@ def get_params_test_scheduler_constraints_tkv():
518514 ]
519515
520516 return (seqs_max_tokens , prompts_lengths , steps_add_reqs , checked_steps ,
521- max_model_len , remove_left_padding )
517+ max_model_len )
522518
523519
524520def get_params_test_scheduler_constraints_max_prompt_len ():
@@ -528,7 +524,6 @@ def get_params_test_scheduler_constraints_max_prompt_len():
528524 prompts_lengths = [70 , 49 , 41 ]
529525 steps_add_reqs = [0 , 0 , 0 ]
530526 max_model_len = 256
531- remove_left_padding = False
532527
533528 checked_steps = [
534529 {
@@ -628,124 +623,7 @@ def get_params_test_scheduler_constraints_max_prompt_len():
628623 ]
629624
630625 return (seqs_max_tokens , prompts_lengths , steps_add_reqs , checked_steps ,
631- max_model_len , remove_left_padding )
632-
633-
634- def get_params_test_remove_left_padding ():
635- """" Test the stripping of repeated left padding in continuous batching """
636-
637- seqs_max_tokens = [40 , 20 , 11 ]
638- prompts_lengths = [20 , 14 , 5 ]
639- steps_add_reqs = [0 , 30 , 31 ]
640- max_model_len = 2048
641- remove_left_padding = True
642-
643- checked_steps = [
644- {
645- "step" : 0 ,
646- "tkv" : 0 ,
647- "waiting" : ["0" ],
648- "running" : [],
649- "request_outputs" : []
650- },
651- {
652- # Prefill sequence 0
653- "step" : 1 ,
654- "tkv" : 64 ,
655- "waiting" : [],
656- "running" : ["0" ],
657- "request_outputs" : ["0" ]
658- },
659- {
660- # Decode sequence 0
661- "step" : 2 ,
662- "tkv" : 65 ,
663- "waiting" : [],
664- "running" : ["0" ],
665- "request_outputs" : ["0" ]
666- },
667- {
668- # Decode sequence 0, sequence 1 enters
669- "step" : 30 ,
670- "tkv" : 93 ,
671- "waiting" : ["1" ],
672- "running" : ["0" ],
673- "request_outputs" : ["0" ]
674- },
675- {
676- # Prefill sequence 1, sequence 2 enters
677- "step" : 31 ,
678- "tkv" : 93 ,
679- "waiting" : ["2" ],
680- "running" : ["1" , "0" ],
681- "request_outputs" : ["1" ]
682- },
683- {
684- # Decode sequences 0 and 1
685- "step" : 32 ,
686- "tkv" : 94 ,
687- "waiting" : ["2" ],
688- "running" : ["1" , "0" ],
689- "request_outputs" : ["1" , "0" ]
690- },
691- {
692- # Sequence 0 finishes at step 41
693- # (start step + 2 prefills + 39 decodes - 1) = 1 + 2 + 39 - 1 = 41
694- "step" : 41 ,
695- "tkv" : 103 ,
696- "waiting" : ["2" ],
697- "running" : ["1" ],
698- "request_outputs" : ["1" , "0" ],
699- "finished_requests" : ["0" ]
700- },
701- {
702- # Prefill sequence 2
703- "step" : 42 ,
704- "tkv" : 39 , # left padding reduction: 103 - 64 (block size)
705- "waiting" : [],
706- "running" : ["2" , "1" ],
707- "request_outputs" : ["2" ]
708- },
709- {
710- # Decode sequences 1 and 2
711- "step" : 43 ,
712- "tkv" : 40 ,
713- "waiting" : [],
714- "running" : ["2" , "1" ],
715- "request_outputs" : ["2" , "1" ]
716- },
717- {
718- # Sequences 1 finishes at step 51
719- # (start step + 2 prefill + 19 decodes - 1) = 31 + 2 + 19 - 1 = 51
720- "step" : 51 ,
721- "tkv" : 48 ,
722- "waiting" : [],
723- "running" : ["2" ],
724- "request_outputs" : ["2" , "1" ],
725- "finished_requests" : ["1" ]
726- },
727- {
728- # Sequences 2 finishes at step 52
729- # (start step + 1 prefill + 10 decodes - 1) = 42 + 1 + 10 - 1 = 52
730- "step" : 52 ,
731- "tkv" : 49 ,
732- "waiting" : [],
733- "running" : [],
734- "request_outputs" : ["2" ],
735- "finished_requests" : ["2" ]
736- },
737- {
738- # Tkv should be cleared one step later
739- "step" : 53 ,
740- "tkv" : 0 ,
741- "waiting" : [],
742- "running" : [],
743- "request_outputs" : [],
744- },
745- ]
746-
747- return (seqs_max_tokens , prompts_lengths , steps_add_reqs , checked_steps ,
748- max_model_len , remove_left_padding )
626+ max_model_len )
749627
750628
751629def augment_checked_steps (
@@ -775,13 +653,12 @@ def augment_checked_steps(
775653@pytest .mark .parametrize ("max_num_seqs" , [2 ])
776654@pytest .mark .parametrize (
777655 "seqs_max_tokens,prompts_lengths,steps_add_reqs,checked_steps,"
778- "max_model_len,remove_left_padding " , [
656+ "max_model_len" , [
779657 get_params_test_blocks_borders_aligned_prompts (),
780658 get_params_test_blocks_borders_misaligned_prompts (),
781659 get_params_test_special_finish (),
782660 get_params_test_scheduler_constraints_tkv (),
783661 get_params_test_scheduler_constraints_max_prompt_len (),
784- get_params_test_remove_left_padding (),
785662 ])
786663def test_scheduler_cb_steps_tkv (
787664 model : str ,
@@ -793,7 +670,6 @@ def test_scheduler_cb_steps_tkv(
793670 steps_add_reqs : list [int ],
794671 checked_steps : list [dict [str , Any ]],
795672 max_model_len : int ,
796- remove_left_padding : bool ,
797673):
798674 """
799675 Test the scheduler execution by comparing the scheduler attributes at each
@@ -809,8 +685,6 @@ def test_scheduler_cb_steps_tkv(
809685 monkeypatch .setenv ("VLLM_SPYRE_USE_CB" , "1" )
810686 monkeypatch .setenv ("VLLM_USE_V1" , "1" )
811687 monkeypatch .setenv ("VLLM_SPYRE_DYNAMO_BACKEND" , backend )
812- monkeypatch .setenv ("VLLM_SPYRE_RM_PADDED_BLOCKS" ,
813- "1" if remove_left_padding else "0" )
814688
815689 # To get deterministic execution in V1
816690 # and to enable InprocClient
0 commit comments