clean code

wnma3mz · Jan 29, 2025 · 2ac8c98 · 2ac8c98
1 parent 5ac4b38
commit 2ac8c98
Show file tree

Hide file tree

Showing 4 changed files with 13 additions and 103 deletions.
diff --git a/README.md b/README.md
@@ -11,13 +11,13 @@
 
 2. run server
 
-   2.1 (no communication)
+   2.1 (localhost)
 
    ```bash
    tllm.server --model_path mlx-community/Llama-3.2-1B-Instruct-4bit --hostname localhost --is_local --client_size 1
    ```
 
-   2.2 (with communication)
+   2.2 (multi clients)
 
    ```bash
    # first in one terminal
@@ -26,6 +26,8 @@
    # in another terminal
    tllm.client --hostname http://$YOUR_IP:8022
    ```
+
+
 3. testing
 
 ```bash

diff --git a/examples/run_engine.py → run_engine.py b/examples/run_engine.py → run_engine.py
@@ -19,6 +19,7 @@ def parse_args():
         help="Attention backend if backend is TORCH",
     )
     parser.add_argument("--model_path", type=str, default="Qwen/Qwen2-VL-2B-Instruct")
+    parser.add_argument("--message_type", type=str, default="llm", choices=["llm", "mllm", "image"])
     return parser.parse_args()
 
 
@@ -131,6 +132,11 @@ async def image_generate(args):
 
 if __name__ == "__main__":
     args = parse_args()
-    asyncio.run(llm_generate(args, llm_message()))
-    # asyncio.run(llm_generate(args, mllm_message()))
-    # asyncio.run(image_generate(args))
+    if args.message_type == "llm":
+        asyncio.run(llm_generate(args, llm_message()))
+    elif args.message_type == "mllm":
+        asyncio.run(llm_generate(args, mllm_message()))
+    elif args.message_type == "image":
+        asyncio.run(image_generate(args))
+    else:
+        raise ValueError(f"Unknown message type: {args.message_type}")
diff --git a/tllm/generate/token_utils.py b/tllm/generate/token_utils.py
@@ -29,15 +29,6 @@ def preprocess(
         input_ids = self.tokenizer.encode(text, add_special_tokens=False)
         return TokenizerResult(input_ids=input_ids, input_str=text)
 
-    def preprocess_old(self, text: str = None, messages: List[List[Dict[str, str]]] = None) -> TokenizerResult:
-        formatted_prompt = "### Human: {}### Assistant:"
-
-        if messages:
-            text = formatted_prompt.format(messages[0]["content"])
-        input_ids = self.tokenizer.encode(text, add_special_tokens=True)
-        while input_ids[0] == input_ids[1] == self.tokenizer.bos_token_id:
-            input_ids.pop(0)
-        return TokenizerResult(input_ids=input_ids, input_str=text)
 
     def decode(
         self, token_ids: List[int], cache_token_ids: List[Optional[List[int]]]

diff --git a/tllm/shared_memory.py b/tllm/shared_memory.py