From 5ac4b384b6781e939c96e23c3c1a0e3c28d8900e Mon Sep 17 00:00:00 2001 From: lujianghu Date: Tue, 28 Jan 2025 19:15:54 +0800 Subject: [PATCH] set encode add_special_tokens=False --- tllm/generate/token_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tllm/generate/token_utils.py b/tllm/generate/token_utils.py index 5302b8f..376245a 100644 --- a/tllm/generate/token_utils.py +++ b/tllm/generate/token_utils.py @@ -26,7 +26,7 @@ def preprocess( messages, tokenize=False, add_generation_prompt=add_generation_prompt ) assert text is not None, "Either text or messages must be provided." - input_ids = self.tokenizer.encode(text, add_special_tokens=True) + input_ids = self.tokenizer.encode(text, add_special_tokens=False) return TokenizerResult(input_ids=input_ids, input_str=text) def preprocess_old(self, text: str = None, messages: List[List[Dict[str, str]]] = None) -> TokenizerResult: