Skip to content
Closed
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
d332c97
Add Nemotron MOE support for AutoDeploy
nvchenghaoz Oct 18, 2025
da4b7cb
Merge branch 'main' into feat/autodeploy-nemotron-moe
nvchenghaoz Oct 18, 2025
00c3e47
[TRTLLM-8480][chore] clean create_py_executor API (#8412)
QiJune Oct 18, 2025
17b045d
[None][feat] AutoDeploy: chunked prefill support (#8158)
lucaslie Oct 18, 2025
c146851
Address reviewers' comments
nvchenghaoz Oct 20, 2025
a387307
Add the cu file to the package
nvchenghaoz Oct 20, 2025
1b01016
Move the compiled files to a custom folder
nvchenghaoz Oct 20, 2025
aca433e
Update the AD cache location
nvchenghaoz Oct 20, 2025
f97c084
Fix test error
nvchenghaoz Oct 20, 2025
bd63125
Merge branch 'main' into feat/autodeploy-nemotron-moe
nvchenghaoz Oct 20, 2025
a74ad94
Add Nemotron MOE support for AutoDeploy
nvchenghaoz Oct 18, 2025
5a40059
skip the test due to permission issue
nvchenghaoz Oct 21, 2025
be129fa
Resolve the rebase issue
nvchenghaoz Oct 21, 2025
5c8e567
add nemotron-h rms norm pattern
suyoggupta Oct 22, 2025
56746f0
merge main
suyoggupta Oct 22, 2025
d50402e
update rms norm fusion unit test
suyoggupta Oct 22, 2025
c741482
Merge branch 'main' into sg/8k16k_fuserms
suyoggupta Oct 22, 2025
d3f3b99
Merge branch 'main' into sg/8k16k_fuserms
suyoggupta Oct 23, 2025
7ea21cb
Merge branch 'main' into sg/8k16k_fuserms
suyoggupta Oct 23, 2025
44ce3e4
Merge branch 'main' into sg/8k16k_fuserms
suyoggupta Oct 23, 2025
804736e
Merge branch 'main' into sg/8k16k_fuserms
suyoggupta Oct 23, 2025
f489dfe
add skip condition
suyoggupta Oct 23, 2025
b4f0152
merge main
suyoggupta Oct 23, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def fused_mlp_moe_kernel(
topk_weights_ptr,
sorted_token_ids_ptr,
expert_ids_ptr,
num_tokens_post_padded_ptr,
# Matrix dimensions
N,
K,
Expand Down Expand Up @@ -84,6 +85,10 @@ def fused_mlp_moe_kernel(
pid_m = first_pid_m + ((pid % num_pid_in_group) % group_size_m)
pid_n = (pid % num_pid_in_group) // group_size_m

num_tokens_post_padded = tl.load(num_tokens_post_padded_ptr)
if pid_m * BLOCK_SIZE_M >= num_tokens_post_padded:
return

offs_token_id = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M).to(tl.int64)
# Bounds check: EM might not be a multiple of BLOCK_SIZE_M
# so offs_token_id can exceed EM-1. Load with mask to avoid out-of-bounds.
Expand Down Expand Up @@ -270,6 +275,7 @@ def _grid(META):
topk_weights if topk_weights is not None else C,
sorted_token_ids,
expert_ids,
num_tokens_post_padded,
B.size(1),
B.size(2),
EM,
Expand Down
Loading