File tree Expand file tree Collapse file tree 1 file changed +6
-0
lines changed
tensorrt_llm/_torch/pyexecutor Expand file tree Collapse file tree 1 file changed +6
-0
lines changed Original file line number Diff line number Diff line change @@ -151,7 +151,13 @@ def _get_token_num_for_estimation(self) -> int:
151
151
# estimate_max_kv_cache_tokens submits self._dummy_reqs
152
152
num_cache_blocks = 0
153
153
num_extra_tokens_per_seq = 1 # account for generated tokens
154
+ pytorch_backend_config = executor_config .pytorch_backend_config
154
155
spec_cfg = executor_config .speculative_config
156
+ if not pytorch_backend_config .disable_overlap_scheduler :
157
+ num_extra_tokens_per_seq = num_extra_tokens_per_seq + 1
158
+ if spec_cfg is not None :
159
+ num_extra_tokens_per_seq += spec_cfg .max_draft_tokens
160
+
155
161
if spec_cfg is not None :
156
162
num_extra_tokens_per_seq += spec_cfg .max_draft_tokens
157
163
num_extra_tokens_per_seq += spec_cfg .num_extra_kv_tokens
You can’t perform that action at this time.
0 commit comments