We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent beafc39 commit e9aa8b2Copy full SHA for e9aa8b2
tensorrt_llm/_torch/speculative/model_drafter.py
@@ -470,9 +470,9 @@ def _update_target_inputs_with_draft_tokens(
470
continue
471
472
# Get the index of the draft/target tokens in the device tensor
473
- draft_idx = req_idx if self.use_static_draft_loop else request.py_batch_idx
+ draft_idx = req_idx if self.use_static_draft_loop else request.py_seq_slot
474
target_idx = req_id_to_old_request[
475
- request.py_request_id].py_batch_idx
+ request.py_request_id].py_seq_slot
476
target_inputs.new_tokens[draft_position + 1:draft_position +
477
draft_length + 1, target_idx,
478
0] = draft_tensors[0:draft_length,
0 commit comments