Skip to content

Commit

Permalink
fix special_tokens load in llama3; add and remove some comment
Browse files Browse the repository at this point in the history
  • Loading branch information
yvonwin committed Apr 29, 2024
1 parent dac5306 commit 5810160
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 13 deletions.
20 changes: 7 additions & 13 deletions qwen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,7 @@ auto RMSNorm::forward(ModelContext *ctx, ggml_tensor *input, float eps) const ->


// ===== Tokenizer =====
// Consider moving this code block to the 'tokenizer' directory for better organization.

// parse tiktoken file
static std::pair<std::string, int> _parse(const std::string &line) {
Expand Down Expand Up @@ -360,8 +361,6 @@ QwenTokenizer::QwenTokenizer(const std::string & tiktoken_path, const QwenConfig
}
}

// qwen
// std::cout<< "init qwen tokenizer" << std::endl;
std::vector<std::string> special_tokens_s{"<|endoftext|>", "<|im_start|>", "<|im_end|>"};
char buffer[14];
for (size_t i = 0; i < 205; i++) { // 205 for extra control token
Expand Down Expand Up @@ -398,8 +397,6 @@ LlamaTokenizer::LlamaTokenizer(const std::string & tiktoken_path, const QwenConf
}
}

//llama3
// std::cout<< "init llama3 tokenizer" << std::endl;
std::vector<std::string> special_tokens_s{
"<|begin_of_text|>",
"<|end_of_text|>",
Expand All @@ -412,9 +409,9 @@ LlamaTokenizer::LlamaTokenizer(const std::string & tiktoken_path, const QwenConf
"<|reserved_special_token_4|>",
"<|eot_id|>", // end of turn
};
char buffer[14];
for (size_t i = 5; i < 250; i++) {
snprintf(buffer, 14, "<|reserved_special_token_%zu|>", i);
char buffer[31];
for (size_t i = 5; i < 251; i++) {
snprintf(buffer, 31, "<|reserved_special_token_%zu|>", i);
special_tokens_s.push_back(buffer);
}

Expand Down Expand Up @@ -464,7 +461,7 @@ std::string QwenTokenizer::build_prompt(const std::vector<ChatMessage> &messages

std::ostringstream oss_prompt;

// chatml:
// chatml template example
// <|im_start|>system
// You are a helpful assistant.<|im_end|>
// <|im_start|>user
Expand Down Expand Up @@ -493,7 +490,7 @@ std::string LlamaTokenizer::build_prompt(const std::vector<ChatMessage> &message

std::ostringstream oss_prompt;

// llama3
// llama3 chat template example
// <|begin_of_text|><|start_header_id|>system<|end_header_id|>

// You are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>
Expand Down Expand Up @@ -1083,8 +1080,7 @@ Llama3ForCausalLM::Llama3ForCausalLM(const Llama3Config &config)
ctx_.ctx_kv = make_unique_ggml_context(ctx_kv_size + 1 * MB, nullptr, false); // 1MB extra for MPS


transformer = LlamaModel(&ctx_, config); // failed here
// std::cout << "hello here2" << std::endl;
transformer = LlamaModel(&ctx_, config);
lm_head = Linear(&ctx_, config.hidden_size, config.vocab_size, false);


Expand Down Expand Up @@ -1181,7 +1177,6 @@ auto QwenForCausalLM::forward_graph_compute(const std::vector<int> &input_ids, i
lm_logits->backend = GGML_BACKEND_CPU;
// lm_logits->backend = GGML_BACKEND_TYPE_CPU; //newer ggml


ggml_build_forward_expand(ctx_.gf, lm_logits);
#ifdef GGML_USE_METAL
ggml_metal_graph_compute(ctx_.ctx_metal.get(), ctx_.gf);
Expand Down Expand Up @@ -1475,7 +1470,6 @@ auto Llama3ForCausalLM::forward(
}



// ===== pipeline =====

Pipeline::Pipeline(const std::string &path, const std::string &tiktoken_path, int max_length) {
Expand Down
1 change: 1 addition & 0 deletions qwen.h
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,7 @@ struct GenerationConfig {
top_p(top_p), temperature(temperature), repetition_penalty(repetition_penalty), num_threads(num_threads) {}
};

// for sample
struct TokenIdScore {
int id;
float score;
Expand Down

0 comments on commit 5810160

Please sign in to comment.