From 361ae0a8083457f0bfaa42e49cb94ec6a9023165 Mon Sep 17 00:00:00 2001 From: yvonwin Date: Thu, 18 Apr 2024 10:03:12 +0800 Subject: [PATCH] Add codeqwen.py and edit convertpy: A quick test for codeqwen that has switched its tokenizer to SentencePiece, so I test it using Pybind11. --- examples/codeqwen.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ qwen_cpp/convert.py | 10 +++++----- 2 files changed, 49 insertions(+), 5 deletions(-) create mode 100644 examples/codeqwen.py diff --git a/examples/codeqwen.py b/examples/codeqwen.py new file mode 100644 index 0000000..7f77bf9 --- /dev/null +++ b/examples/codeqwen.py @@ -0,0 +1,44 @@ +# quick test for codeqwen: use transformers tokenizer +from transformers import AutoTokenizer +import qwen_cpp + +device = "cpu" # the device to load the model onto + +pipeline = qwen_cpp.Pipeline("../codeqwen2_7b-ggml.bin", "../qwen.tiktoken", 2048) +tokenizer = AutoTokenizer.from_pretrained("Qwen/CodeQwen1.5-7B-Chat") + +prompt = "Write a quicksort algorithm in python." +messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": prompt} +] +text = tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True +) +print(text) +model_inputs = tokenizer([text], return_tensors="pt").to(device) + +input_ids = model_inputs.input_ids.tolist()[0] + +print(input_ids) + +gen_config = qwen_cpp._C.GenerationConfig( + max_length=2048, + # max_new_tokens=args.max_new_tokens, + max_context_length=512, + do_sample=False, + top_k=1, + top_p=1, + temperature=1, + repetition_penalty=0.9, + num_threads = 0, +) + +out_ids = pipeline._sync_generate_ids(input_ids, gen_config) +print(out_ids) + +response = tokenizer.decode(out_ids, skip_special_tokens=True) + +print(response) diff --git a/qwen_cpp/convert.py b/qwen_cpp/convert.py index 18e9f04..93654b4 100644 --- a/qwen_cpp/convert.py +++ b/qwen_cpp/convert.py @@ -367,10 +367,10 @@ def dump_config(f, config, generation_config, tokenizer, ggml_type): config.num_hidden_layers, config.intermediate_size, config.seq_length, - config.bos_token_id if config.bos_token_id is not None else -1, - config.eos_token_id if config.eos_token_id is not None else -1, - config.pad_token_id if config.pad_token_id is not None else -1, - config.sep_token_id if config.sep_token_id is not None else -1, + generation_config.eos_token_id[0], # eos_token_id[2, 4] + generation_config.pad_token_id, # 92298 + list(tokenizer.added_tokens_decoder.keys())[3], #3 <|im_start|> + list(tokenizer.added_tokens_decoder.keys())[4], #4 <|im_end|> ] f.write(struct.pack("i" * len(config_values), *config_values)) @@ -431,7 +431,7 @@ def convert(f: BinaryIO, model_name_or_path: str, dtype: str = "q4_0"): Qwen2MOEConverter.convert(f, model, tokenizer, ggml_type) else: print('Warning: Qwen1 is not supported now') - # QwenConverter.convert(f, model, tokenizer, ggml_type) + QwenConverter.convert(f, model, tokenizer, ggml_type) def main():