Commit f5d34d2 authored Nov 18, 2024 · 17 / 17 · Verified
File tree 3 files changed +22
-22
lines changed
3 files changed +22
-22
lines changed Original file line number Diff line number Diff line change @@ -83,7 +83,6 @@ def vllm_init(args):
83
83
sampling_params = SamplingParams (n = 1 ,
84
84
temperature = 0.0 ,
85
85
top_p = 1 ,
86
- use_beam_search = False ,
87
86
ignore_eos = True ,
88
87
ppl_measurement = True ,
89
88
future_context = [],
Original file line number Diff line number Diff line change @@ -619,7 +619,8 @@ def forward(
619
619
# QKV for prefill.
620
620
query = query [:num_prefill_tokens ]
621
621
622
- if key is not None and value is not None :
622
+ if key is not None and value is not None \
623
+ and attn_type != AttentionType .ENCODER_DECODER :
623
624
key = key [:num_prefill_tokens ]
624
625
value = value [:num_prefill_tokens ]
625
626
Original file line number Diff line number Diff line change @@ -314,26 +314,26 @@ def attn_fwd(
314
314
sm_scale ,
315
315
L ,
316
316
Out ,
317
- stride_qz ,
318
- stride_qh ,
319
- stride_qm ,
320
- stride_qk ,
321
- stride_kz ,
322
- stride_kh ,
323
- stride_kn ,
324
- stride_kk ,
325
- stride_vz ,
326
- stride_vh ,
327
- stride_vk ,
328
- stride_vn ,
329
- stride_oz ,
330
- stride_oh ,
331
- stride_om ,
332
- stride_on ,
333
- stride_bz ,
334
- stride_bh ,
335
- stride_bm ,
336
- stride_bn ,
317
+ stride_qz : tl . int64 ,
318
+ stride_qh : tl . int64 ,
319
+ stride_qm : tl . int64 ,
320
+ stride_qk : tl . int64 ,
321
+ stride_kz : tl . int64 ,
322
+ stride_kh : tl . int64 ,
323
+ stride_kn : tl . int64 ,
324
+ stride_kk : tl . int64 ,
325
+ stride_vz : tl . int64 ,
326
+ stride_vh : tl . int64 ,
327
+ stride_vk : tl . int64 ,
328
+ stride_vn : tl . int64 ,
329
+ stride_oz : tl . int64 ,
330
+ stride_oh : tl . int64 ,
331
+ stride_om : tl . int64 ,
332
+ stride_on : tl . int64 ,
333
+ stride_bz : tl . int64 ,
334
+ stride_bh : tl . int64 ,
335
+ stride_bm : tl . int64 ,
336
+ stride_bn : tl . int64 ,
337
337
cu_seqlens_q ,
338
338
cu_seqlens_k ,
339
339
dropout_p ,
You can’t perform that action at this time.
0 commit comments