From 311544061dd925bb736c3ddb87245b3a974c9773 Mon Sep 17 00:00:00 2001 From: activezhao Date: Thu, 4 Jul 2024 15:28:29 +0800 Subject: [PATCH] fix inference quality caused by temperature parameter in bls --- .../tensorrt_llm_bls/1/lib/triton_decoder.py | 1 + 1 file changed, 1 insertion(+) diff --git a/all_models/inflight_batcher_llm/tensorrt_llm_bls/1/lib/triton_decoder.py b/all_models/inflight_batcher_llm/tensorrt_llm_bls/1/lib/triton_decoder.py index 456ded5a..bf0f84b9 100644 --- a/all_models/inflight_batcher_llm/tensorrt_llm_bls/1/lib/triton_decoder.py +++ b/all_models/inflight_batcher_llm/tensorrt_llm_bls/1/lib/triton_decoder.py @@ -339,6 +339,7 @@ def _get_llm_tensors_from_request( "stream": "streaming", "prompt_embedding_table": "prompt_embedding_table", "prompt_vocab_size": "prompt_vocab_size", + "temperature": "temperature", } tensors = self.create_triton_tensors(request, name_map)