Support neural-chat-7b-v3 and neural-chat-7b-v3-1 (#698)

lvliang-intel · web-flow · commit d9a86419f01b · 2023-11-17T20:14:08.000+08:00
* Support neural-chat-7b-v3 and neural-chat-7b-v3-1

Signed-off-by: lvliang-intel &lt;liang1.lv@intel.com&gt;
diff --git a/intel_extension_for_transformers/neural_chat/models/model_utils.py b/intel_extension_for_transformers/neural_chat/models/model_utils.py
@@ -362,6 +362,7 @@ def load_model(
         or re.search("llama", model_name, re.IGNORECASE)
         or re.search("neural-chat-7b-v1", model_name, re.IGNORECASE)
         or re.search("neural-chat-7b-v2", model_name, re.IGNORECASE)
+        or re.search("neural-chat-7b-v3", model_name, re.IGNORECASE)
         or re.search("qwen", model_name, re.IGNORECASE)
         or re.search("starcoder", model_name, re.IGNORECASE)
         or re.search("Mistral", model_name, re.IGNORECASE)
@@ -990,4 +991,12 @@ def predict(**params):
         output = tokenizer.decode(generation_output.sequences[0], skip_special_tokens=True)
     if "### Response:" in output:
         return output.split("### Response:")[1].strip()
+    if "### Assistant" in output:
+        return output.split("### Assistant:")[1].strip()
+    if "\nassistant\n" in output:
+        return output.split("\nassistant\n")[1].strip()
+    if "[/INST]" in output:
+        return output.split("[/INST]")[1].strip()
+    if "答：" in output:
+        return output.split("答：")[1].strip()
     return output
diff --git a/intel_extension_for_transformers/neural_chat/models/neuralchat_model.py b/intel_extension_for_transformers/neural_chat/models/neuralchat_model.py
@@ -51,6 +51,8 @@ def get_default_conv_template(self, model_path: str) -> Conversation:
         """
         if "neural-chat-7b-v2" in model_path.lower():
             return get_conv_template("neural-chat-7b-v2")
+        elif "neural-chat-7b-v3" in model_path.lower():
+            return get_conv_template("neural-chat-7b-v3")
         else:
             return get_conv_template("neural-chat-7b-v1-1")
 
diff --git a/intel_extension_for_transformers/neural_chat/prompts/prompt.py b/intel_extension_for_transformers/neural_chat/prompts/prompt.py
@@ -17,6 +17,40 @@
 
 from fastchat.conversation import get_conv_template, register_conv_template, Conversation, SeparatorStyle
 
+# neuralchat-v3-1 prompt template
+register_conv_template(
+    Conversation(
+        name="neural-chat-7b-v3-1",
+        system_message="""### System:
+- You are a helpful assistant chatbot trained by Intel.
+- You answer questions.
+- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
+- You are more than just an information source, you are also able to write poetry, \
+short stories, and make jokes.</s>\n""",
+        roles=("### User:", "### Assistant:"),
+        sep_style=SeparatorStyle.NO_COLON_TWO,
+        sep="\n",
+        sep2="</s>",
+    )
+)
+
+# neuralchat-v3 prompt template
+register_conv_template(
+    Conversation(
+        name="neural-chat-7b-v3",
+        system_message="""### System:
+- You are a helpful assistant chatbot trained by Intel.
+- You answer questions.
+- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
+- You are more than just an information source, you are also able to write poetry, \
+short stories, and make jokes.</s>\n""",
+        roles=("### User:", "### Assistant:"),
+        sep_style=SeparatorStyle.NO_COLON_TWO,
+        sep="\n",
+        sep2="</s>",
+    )
+)
+
 # neuralchat-v2 prompt template
 register_conv_template(
     Conversation(
diff --git a/intel_extension_for_transformers/neural_chat/tests/ci/plugins/cache/test_cache.py b/intel_extension_for_transformers/neural_chat/tests/ci/plugins/cache/test_cache.py
@@ -18,7 +18,7 @@
 from intel_extension_for_transformers.neural_chat.pipeline.plugins.caching.cache import ChatCache
 from intel_extension_for_transformers.neural_chat import build_chatbot, PipelineConfig
 import unittest
-import os
+import os, shutil
 
 class TestChatCache(unittest.TestCase):
     def setUp(self):
@@ -36,18 +36,17 @@ def tearDown(self) -> None:
         return super().tearDown()
     
     def test_chat_cache(self):
-        cache_plugin = ChatCache(embedding_model_dir="/tf_dataset2/models/nlp_toolkit/instructor-large")
+        cache_plugin = ChatCache(embedding_model_dir="hkunlp/instructor-large")
         cache_plugin.init_similar_cache_from_config()
 
-        prompt = "Tell me about Intel Xeon Scable Processors."
+        prompt = "Tell me about Intel Xeon Scalable Processors."
         config = PipelineConfig(model_name_or_path="facebook/opt-125m")
         chatbot = build_chatbot(config)
         response = chatbot.predict(prompt)
         cache_plugin.put(prompt, response)
 
         answer = cache_plugin.get(prompt)
-        self.assertIn('Tell me about Intel Xeon Scable Processors.', str(answer))
+        self.assertIn('Intel Xeon Scalable', str(answer['choices'][0]['text']))
 
-        
 if __name__ == "__main__":
     unittest.main()
diff --git a/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_model.py b/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_model.py
@@ -19,6 +19,7 @@
 from intel_extension_for_transformers.neural_chat.models.llama_model import LlamaModel
 from intel_extension_for_transformers.neural_chat.models.mpt_model import MptModel
 from intel_extension_for_transformers.neural_chat.models.neuralchat_model import NeuralChatModel
+from intel_extension_for_transformers.neural_chat import build_chatbot, PipelineConfig
 import unittest
 
 class TestChatGlmModel(unittest.TestCase):
@@ -29,12 +30,17 @@ def tearDown(self) -> None:
         return super().tearDown()
 
     def test_match(self):
-        result = ChatGlmModel().match(model_path='/tf_dataset2/models/nlp_toolkit/chatglm2-6b')
+        result = ChatGlmModel().match(model_path='THUDM/chatglm2-6b')
         self.assertTrue(result)
 
     def test_get_default_conv_template(self):
-        result = ChatGlmModel().get_default_conv_template(model_path='/tf_dataset2/models/nlp_toolkit/chatglm-6b')
+        result = ChatGlmModel().get_default_conv_template(model_path='THUDM/chatglm2-6b')
         self.assertIn('问', str(result))
+        config = PipelineConfig(model_name_or_path="THUDM/chatglm2-6b")
+        chatbot = build_chatbot(config=config)
+        result = chatbot.predict("中国最大的城市是哪个？")
+        print(result)
+        self.assertIn('上海', str(result))
 
 class TestLlamaModel(unittest.TestCase):
     def setUp(self):
@@ -44,12 +50,16 @@ def tearDown(self) -> None:
         return super().tearDown()
 
     def test_match(self):
-        result = LlamaModel().match(model_path='/tf_dataset2/models/nlp_toolkit/llama-2-7b-chat')
+        result = LlamaModel().match(model_path='meta-llama/Llama-2-7b-chat-hf')
         self.assertTrue(result)
 
     def test_get_default_conv_template(self):
-        result = LlamaModel().get_default_conv_template(model_path='/tf_dataset2/models/nlp_toolkit/llama-2-7b-chat')
+        result = LlamaModel().get_default_conv_template(model_path='meta-llama/Llama-2-7b-chat-hf')
         self.assertIn("[INST] <<SYS>>", str(result))
+        chatbot = build_chatbot()
+        result = chatbot.predict("Tell me about Intel Xeon Scalable Processors.")
+        print(result)
+        self.assertIn('Intel Xeon Scalable Processors', str(result))
 
 class TestMptModel(unittest.TestCase):
     def setUp(self):
@@ -59,12 +69,17 @@ def tearDown(self) -> None:
         return super().tearDown()
 
     def test_match(self):
-        result = MptModel().match(model_path='/tf_dataset2/models/nlp_toolkit/mpt-7b')
+        result = MptModel().match(model_path='mosaicml/mpt-7b-chat')
         self.assertTrue(result)
 
     def test_get_default_conv_template(self):
-        result = MptModel().get_default_conv_template(model_path='/tf_dataset2/models/nlp_toolkit/mpt-7b')
+        result = MptModel().get_default_conv_template(model_path='mosaicml/mpt-7b-chat')
         self.assertIn("<|im_start|>system", str(result))
+        config = PipelineConfig(model_name_or_path="mosaicml/mpt-7b-chat")
+        chatbot = build_chatbot(config=config)
+        result = chatbot.predict("Tell me about Intel Xeon Scalable Processors.")
+        print(result)
+        self.assertIn('Intel Xeon Scalable processors', str(result))
 
 class TestNeuralChatModel(unittest.TestCase):
     def setUp(self):
@@ -81,10 +96,33 @@ def test_get_default_conv_template_v1(self):
         result = NeuralChatModel().get_default_conv_template(
             model_path='Intel/neural-chat-7b-v1-1')
         self.assertIn("<|im_start|>system", str(result))
+        config = PipelineConfig(model_name_or_path="Intel/neural-chat-7b-v1-1")
+        chatbot = build_chatbot(config=config)
+        result = chatbot.predict("Tell me about Intel Xeon Scalable Processors.")
+        print(result)
+        self.assertIn('Intel® Xeon® Scalable processors', str(result))
 
     def test_get_default_conv_template_v2(self):
         result = NeuralChatModel().get_default_conv_template(model_path='Intel/neural-chat-7b-v2')
         self.assertIn("### System:", str(result))
 
+    def test_get_default_conv_template_v3(self):
+        result = NeuralChatModel().get_default_conv_template(model_path='Intel/neural-chat-7b-v3')
+        self.assertIn("### System:", str(result))
+        config = PipelineConfig(model_name_or_path="Intel/neural-chat-7b-v3")
+        chatbot = build_chatbot(config=config)
+        result = chatbot.predict("Tell me about Intel Xeon Scalable Processors.")
+        print(result)
+        self.assertIn('The Intel Xeon Scalable Processors', str(result))
+
+    def test_get_default_conv_template_v3_1(self):
+        result = NeuralChatModel().get_default_conv_template(model_path='Intel/neural-chat-7b-v3-1')
+        self.assertIn("### System:", str(result))
+        config = PipelineConfig(model_name_or_path="Intel/neural-chat-7b-v3-1")
+        chatbot = build_chatbot(config=config)
+        result = chatbot.predict("Tell me about Intel Xeon Scalable Processors.")
+        print(result)
+        self.assertIn('The Intel Xeon Scalable Processors', str(result))
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/intel_extension_for_transformers/neural_chat/ui/textbot/app.py b/intel_extension_for_transformers/neural_chat/ui/textbot/app.py
@@ -322,7 +322,8 @@ def http_bot(state, model_selector, temperature, max_new_tokens, topk, request:
 
     if len(state.messages) == state.offset + 2:
         # model conversation name: "mpt-7b-chat", "chatglm", "chatglm2", "llama-2",
-        #                        "neural-chat-7b-v2", "neural-chat-7b-v1-1"
+        #                          "neural-chat-7b-v3-1", "neural-chat-7b-v3",
+        #                          "neural-chat-7b-v2", "neural-chat-7b-v1-1"
         # First round of Conversation
         if "Llama-2-7b-chat-hf" in model_name:
             model_name = "llama-2"
diff --git a/intel_extension_for_transformers/neural_chat/ui/textbot/conversation.py b/intel_extension_for_transformers/neural_chat/ui/textbot/conversation.py
@@ -946,6 +946,38 @@ def get_conv_template(name: str) -> Conversation:
     )
 )
 
+# neuralchat-v2 template
+register_conv_template(
+    Conversation(
+        name="neural-chat-7b-v3",
+        system_message="""### System:
+    - You are a helpful assistant chatbot trained by Intel.
+    - You answer questions.
+    - You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
+    - You are more than just an information source, you are also able to write poetry, short stories, and make jokes.</s>\n""",
+        roles=("### User:", "### Assistant:"),
+        sep_style=SeparatorStyle.NO_COLON_TWO,
+        sep="\n",
+        sep2="</s>",
+    )
+)
+
+# neuralchat-v2 template
+register_conv_template(
+    Conversation(
+        name="neural-chat-7b-v3-1",
+        system_message="""### System:
+    - You are a helpful assistant chatbot trained by Intel.
+    - You answer questions.
+    - You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
+    - You are more than just an information source, you are also able to write poetry, short stories, and make jokes.</s>\n""",
+        roles=("### User:", "### Assistant:"),
+        sep_style=SeparatorStyle.NO_COLON_TWO,
+        sep="\n",
+        sep2="</s>",
+    )
+)
+
 # neuralchat-v1.1 prompt template
 register_conv_template(
     Conversation(