[https://nvbugs/5556020][fix] cherry-pick fix test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_eagle3 dimension mismatch (#8644)

sunnyqgg · web-flow · commit e9aa8b222fb7 · 2025-10-29T15:44:25.000+08:00
Signed-off-by: qgai &lt;qgai@nvidia.com&gt;
diff --git a/tensorrt_llm/_torch/speculative/model_drafter.py b/tensorrt_llm/_torch/speculative/model_drafter.py
@@ -470,9 +470,9 @@ def _update_target_inputs_with_draft_tokens(
                     continue
 
                 # Get the index of the draft/target tokens in the device tensor
-                draft_idx = req_idx if self.use_static_draft_loop else request.py_batch_idx
+                draft_idx = req_idx if self.use_static_draft_loop else request.py_seq_slot
                 target_idx = req_id_to_old_request[
-                    request.py_request_id].py_batch_idx
+                    request.py_request_id].py_seq_slot
                 target_inputs.new_tokens[draft_position + 1:draft_position +
                                          draft_length + 1, target_idx,
                                          0] = draft_tensors[0:draft_length,