From 5810160de985ed0a9fee31f6cbaea7bfed5bf836 Mon Sep 17 00:00:00 2001
From: yvonwin <wangkuixz@gmail.com>
Date: Mon, 29 Apr 2024 13:13:05 +0800
Subject: [PATCH] fix special_tokens load in llama3; add and remove some
 comment

---
 qwen.cpp | 20 +++++++-------------
 qwen.h   |  1 +
 2 files changed, 8 insertions(+), 13 deletions(-)
diff --git a/qwen.cpp b/qwen.cpp
index c7a1e57..ad59661 100644
--- a/qwen.cpp
+++ b/qwen.cpp
@@ -324,6 +324,7 @@ auto RMSNorm::forward(ModelContext *ctx, ggml_tensor *input, float eps) const ->
 
 
 // ===== Tokenizer =====
+// Consider moving this code block to the 'tokenizer' directory for better organization.
 
 // parse tiktoken file
 static std::pair<std::string, int> _parse(const std::string &line) {
@@ -360,8 +361,6 @@ QwenTokenizer::QwenTokenizer(const std::string & tiktoken_path, const QwenConfig
     }
   }
 
-  // qwen
-  // std::cout<< "init qwen tokenizer" << std::endl;
   std::vector<std::string> special_tokens_s{"<|endoftext|>", "<|im_start|>", "<|im_end|>"};
   char buffer[14];
   for (size_t i = 0; i < 205; i++) { // 205 for extra control token
@@ -398,8 +397,6 @@ LlamaTokenizer::LlamaTokenizer(const std::string & tiktoken_path, const QwenConf
     }
   }
 
-  //llama3
-  // std::cout<< "init llama3 tokenizer" << std::endl;
   std::vector<std::string> special_tokens_s{
             "<|begin_of_text|>",
             "<|end_of_text|>",
@@ -412,9 +409,9 @@ LlamaTokenizer::LlamaTokenizer(const std::string & tiktoken_path, const QwenConf
             "<|reserved_special_token_4|>",
             "<|eot_id|>",  // end of turn
   };
-  char buffer[14];
-  for (size_t i = 5; i < 250; i++) {
-    snprintf(buffer, 14, "<|reserved_special_token_%zu|>", i);
+  char buffer[31];
+  for (size_t i = 5; i < 251; i++) {
+    snprintf(buffer, 31, "<|reserved_special_token_%zu|>", i);
     special_tokens_s.push_back(buffer);
   }
 
@@ -464,7 +461,7 @@ std::string QwenTokenizer::build_prompt(const std::vector<ChatMessage> &messages
 
   std::ostringstream oss_prompt;
 
-  // chatml:
+  // chatml template example
   // <|im_start|>system
   // You are a helpful assistant.<|im_end|>
   // <|im_start|>user
@@ -493,7 +490,7 @@ std::string LlamaTokenizer::build_prompt(const std::vector<ChatMessage> &message
 
   std::ostringstream oss_prompt;
 
-  // llama3
+  // llama3 chat template example
   // <|begin_of_text|><|start_header_id|>system<|end_header_id|>
 
   // You are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>
@@ -1083,8 +1080,7 @@ Llama3ForCausalLM::Llama3ForCausalLM(const Llama3Config &config)
   ctx_.ctx_kv = make_unique_ggml_context(ctx_kv_size + 1 * MB, nullptr, false); // 1MB extra for MPS
 
 
-  transformer = LlamaModel(&ctx_, config);  // failed here
-    //  std::cout << "hello here2" << std::endl; 
+  transformer = LlamaModel(&ctx_, config);
   lm_head = Linear(&ctx_, config.hidden_size, config.vocab_size, false);
   
 
@@ -1181,7 +1177,6 @@ auto QwenForCausalLM::forward_graph_compute(const std::vector<int> &input_ids, i
   lm_logits->backend = GGML_BACKEND_CPU;
   // lm_logits->backend = GGML_BACKEND_TYPE_CPU; //newer ggml
 
-
   ggml_build_forward_expand(ctx_.gf, lm_logits);
 #ifdef GGML_USE_METAL
   ggml_metal_graph_compute(ctx_.ctx_metal.get(), ctx_.gf);
@@ -1475,7 +1470,6 @@ auto Llama3ForCausalLM::forward(
 }
 
 
-
 // ===== pipeline =====
 
 Pipeline::Pipeline(const std::string &path, const std::string &tiktoken_path, int max_length) {
diff --git a/qwen.h b/qwen.h
index 488c5c4..20d1bbe 100644
--- a/qwen.h
+++ b/qwen.h
@@ -257,6 +257,7 @@ struct GenerationConfig {
         top_p(top_p), temperature(temperature), repetition_penalty(repetition_penalty), num_threads(num_threads) {}
 };
 
+// for sample
 struct TokenIdScore {
   int id;
   float score;