We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 2e56818 commit 01a12b7Copy full SHA for 01a12b7
vllm_gaudi/v1/worker/hpu_dp_utils.py
@@ -22,6 +22,11 @@ def make(
22
dp_size = vllm_config.parallel_config.data_parallel_size
23
tp_size = vllm_config.parallel_config.tensor_parallel_size
24
25
+ if num_tokens % tp_size != 0:
26
+ # make sure num_tokens is enough to be divided by tp_size for
27
+ # sequence parallel MOE
28
+ num_tokens = (num_tokens // tp_size + 1) * tp_size
29
+
30
num_tokens_across_dp = num_tokens * dp_size
31
32
dtype = vllm_config.model_config.dtype
0 commit comments