Skip to content

Commit f5d34d2

Browse files
authoredNov 18, 2024··
Merge branch 'main' into Mixtral8x7b_MoE_Config
2 parents e49b6c4 + efb0432 commit f5d34d2

File tree

3 files changed

+22
-22
lines changed

3 files changed

+22
-22
lines changed
 

‎benchmarks/P3L.py

-1
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,6 @@ def vllm_init(args):
8383
sampling_params = SamplingParams(n=1,
8484
temperature=0.0,
8585
top_p=1,
86-
use_beam_search=False,
8786
ignore_eos=True,
8887
ppl_measurement=True,
8988
future_context=[],

‎vllm/attention/backends/rocm_flash_attn.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -619,7 +619,8 @@ def forward(
619619
# QKV for prefill.
620620
query = query[:num_prefill_tokens]
621621

622-
if key is not None and value is not None:
622+
if key is not None and value is not None \
623+
and attn_type != AttentionType.ENCODER_DECODER:
623624
key = key[:num_prefill_tokens]
624625
value = value[:num_prefill_tokens]
625626

‎vllm/attention/ops/triton_flash_attention.py

+20-20
Original file line numberDiff line numberDiff line change
@@ -314,26 +314,26 @@ def attn_fwd(
314314
sm_scale,
315315
L,
316316
Out,
317-
stride_qz,
318-
stride_qh,
319-
stride_qm,
320-
stride_qk,
321-
stride_kz,
322-
stride_kh,
323-
stride_kn,
324-
stride_kk,
325-
stride_vz,
326-
stride_vh,
327-
stride_vk,
328-
stride_vn,
329-
stride_oz,
330-
stride_oh,
331-
stride_om,
332-
stride_on,
333-
stride_bz,
334-
stride_bh,
335-
stride_bm,
336-
stride_bn,
317+
stride_qz: tl.int64,
318+
stride_qh: tl.int64,
319+
stride_qm: tl.int64,
320+
stride_qk: tl.int64,
321+
stride_kz: tl.int64,
322+
stride_kh: tl.int64,
323+
stride_kn: tl.int64,
324+
stride_kk: tl.int64,
325+
stride_vz: tl.int64,
326+
stride_vh: tl.int64,
327+
stride_vk: tl.int64,
328+
stride_vn: tl.int64,
329+
stride_oz: tl.int64,
330+
stride_oh: tl.int64,
331+
stride_om: tl.int64,
332+
stride_on: tl.int64,
333+
stride_bz: tl.int64,
334+
stride_bh: tl.int64,
335+
stride_bm: tl.int64,
336+
stride_bn: tl.int64,
337337
cu_seqlens_q,
338338
cu_seqlens_k,
339339
dropout_p,

0 commit comments

Comments
 (0)
Please sign in to comment.