Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix default window size for sparse_attn #39

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 16 additions & 2 deletions csrc/flash_attn/flash_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,8 @@ void set_params_fprop_sparse(Flash_fwd_params &params,
void *softmax_lse_d,
float p_dropout,
float softmax_scale,
int64_t window_size_left,
int64_t window_size_right,
const float softcap,
bool seqlenq_ngroups_swapped=false,
const bool unpadded_lse=false) {
Expand All @@ -198,8 +200,8 @@ void set_params_fprop_sparse(Flash_fwd_params &params,
softmax_lse_d,
p_dropout,
softmax_scale,
-1, // window_size_left
-1, // window_size_right
window_size_left,
window_size_right,
softcap,
seqlenq_ngroups_swapped,
unpadded_lse
Expand Down Expand Up @@ -395,6 +397,10 @@ mha_fwd_sparse(at::Tensor &q, // batch_size x seqlen_q x num_heads x hea
// causal=true is the same as causal=false in this case
if (seqlen_q == 1 && !alibi_slopes_.has_value()) { is_causal = false; }

int64_t window_size_left = -1;
int64_t window_size_right = -1;
if (is_causal) { window_size_right = 0; }

CHECK_SHAPE(q, batch_size, seqlen_q, num_heads, head_size_og);
CHECK_SHAPE(k, batch_size, seqlen_k, num_heads_k, head_size_og);
CHECK_SHAPE(v, batch_size, seqlen_k, num_heads_k, head_size_og);
Expand Down Expand Up @@ -460,6 +466,8 @@ mha_fwd_sparse(at::Tensor &q, // batch_size x seqlen_q x num_heads x hea
softmax_lse.data_ptr(),
p_dropout,
softmax_scale,
window_size_left,
window_size_right,
softcap
);

Expand Down Expand Up @@ -572,6 +580,10 @@ mha_varlen_fwd_sparse(at::Tensor &q, // total_q x num_heads x head_size, total_

if (max_seqlen_q == 1 && !alibi_slopes_.has_value()) { is_causal = false; } // causal=true is the same as causal=false in this case

int64_t window_size_left = -1;
int64_t window_size_right = -1;
if (is_causal) { window_size_right = 0; }

void *cu_seqlens_q_d = cu_seqlens_q.data_ptr();

const int total_q = q.sizes()[0];
Expand Down Expand Up @@ -662,6 +674,8 @@ mha_varlen_fwd_sparse(at::Tensor &q, // total_q x num_heads x head_size, total_
softmax_lse.data_ptr(),
p_dropout,
softmax_scale,
window_size_left,
window_size_right,
softcap
);
params.total_q = total_q;
Expand Down