File tree Expand file tree Collapse file tree 1 file changed +3
-1
lines changed
tensorrt_llm/_torch/pyexecutor Expand file tree Collapse file tree 1 file changed +3
-1
lines changed Original file line number Diff line number Diff line change @@ -153,10 +153,12 @@ def _get_token_num_for_estimation(self) -> int:
153
153
num_cache_blocks = 0
154
154
num_extra_tokens_per_seq = 1 # account for generated tokens
155
155
pytorch_backend_config = executor_config .pytorch_backend_config
156
+ spec_cfg = executor_config .speculative_config
156
157
if not pytorch_backend_config .disable_overlap_scheduler :
157
158
num_extra_tokens_per_seq = num_extra_tokens_per_seq + 1
159
+ if spec_cfg is not None :
160
+ num_extra_tokens_per_seq += spec_cfg .max_draft_tokens
158
161
159
- spec_cfg = executor_config .speculative_config
160
162
if spec_cfg is not None :
161
163
num_extra_tokens_per_seq += spec_cfg .max_draft_tokens
162
164
num_extra_tokens_per_seq += spec_cfg .num_extra_kv_tokens
You can’t perform that action at this time.
0 commit comments