Skip to content

Commit

Permalink
Add codeqwen.py and edit convertpy: A quick test for codeqwen that ha…
Browse files Browse the repository at this point in the history
…s switched its tokenizer to SentencePiece, so I test it using Pybind11.
  • Loading branch information
yvonwin committed Apr 18, 2024
1 parent 34a1c7b commit 361ae0a
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 5 deletions.
44 changes: 44 additions & 0 deletions examples/codeqwen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# quick test for codeqwen: use transformers tokenizer
from transformers import AutoTokenizer
import qwen_cpp

device = "cpu" # the device to load the model onto

pipeline = qwen_cpp.Pipeline("../codeqwen2_7b-ggml.bin", "../qwen.tiktoken", 2048)
tokenizer = AutoTokenizer.from_pretrained("Qwen/CodeQwen1.5-7B-Chat")

prompt = "Write a quicksort algorithm in python."
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
print(text)
model_inputs = tokenizer([text], return_tensors="pt").to(device)

input_ids = model_inputs.input_ids.tolist()[0]

print(input_ids)

gen_config = qwen_cpp._C.GenerationConfig(
max_length=2048,
# max_new_tokens=args.max_new_tokens,
max_context_length=512,
do_sample=False,
top_k=1,
top_p=1,
temperature=1,
repetition_penalty=0.9,
num_threads = 0,
)

out_ids = pipeline._sync_generate_ids(input_ids, gen_config)
print(out_ids)

response = tokenizer.decode(out_ids, skip_special_tokens=True)

print(response)
10 changes: 5 additions & 5 deletions qwen_cpp/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,10 +367,10 @@ def dump_config(f, config, generation_config, tokenizer, ggml_type):
config.num_hidden_layers,
config.intermediate_size,
config.seq_length,
config.bos_token_id if config.bos_token_id is not None else -1,
config.eos_token_id if config.eos_token_id is not None else -1,
config.pad_token_id if config.pad_token_id is not None else -1,
config.sep_token_id if config.sep_token_id is not None else -1,
generation_config.eos_token_id[0], # eos_token_id[2, 4]
generation_config.pad_token_id, # 92298
list(tokenizer.added_tokens_decoder.keys())[3], #3 <|im_start|>
list(tokenizer.added_tokens_decoder.keys())[4], #4 <|im_end|>
]
f.write(struct.pack("i" * len(config_values), *config_values))

Expand Down Expand Up @@ -431,7 +431,7 @@ def convert(f: BinaryIO, model_name_or_path: str, dtype: str = "q4_0"):
Qwen2MOEConverter.convert(f, model, tokenizer, ggml_type)
else:
print('Warning: Qwen1 is not supported now')
# QwenConverter.convert(f, model, tokenizer, ggml_type)
QwenConverter.convert(f, model, tokenizer, ggml_type)


def main():
Expand Down

0 comments on commit 361ae0a

Please sign in to comment.