fix ci case

FlagOpen · Jan 23, 2025 · 3648863 · 3648863
1 parent a188707
commit 3648863
Show file tree

Hide file tree

Showing 7 changed files with 131 additions and 23 deletions.
diff --git a/tests/functional_tests/test_cases/serve/build_dag/main.py b/tests/functional_tests/test_cases/serve/build_dag/main.py
@@ -3,30 +3,8 @@
 from flagscale.serve.core import auto_remote
 
 
-@auto_remote(gpu=1)
-class LLMActor:
-    def __init__(self):
-        # Initialize the LLM inside the actor to avoid serialization
-        self.llm = LLM(
-            model="/home/gitlab-runner/data/Qwen2.5-0.5B-Instruct",
-            tensor_parallel_size=1,
-            gpu_memory_utilization=0.5
-        )
-
-    def generate(self, prompt: str) -> str:                    
-        sampling_params = SamplingParams(
-            temperature=0.7,
-            top_p=0.95,
-            max_tokens=1000
-        )
-        result = self.llm.generate([prompt], sampling_params=sampling_params)
-        return result[0].outputs[0].text
-
-
-llm = LLMActor()
-
 def model_A(prompt):
-    result = llm.generate(prompt)
+    result = prompt + "__add_model_A"
     return fn(result)
 
 

diff --git a/tests/functional_tests/test_cases/serve/build_models/call.sh b/tests/functional_tests/test_cases/serve/build_models/call.sh
@@ -0,0 +1 @@
+curl http://127.0.0.1:8000/generate -H "Content-Type: application/json" -d '{"prompt": "introduce bruce lee"}'
diff --git a/tests/functional_tests/test_cases/serve/build_models/conf/multiple_model.yaml b/tests/functional_tests/test_cases/serve/build_models/conf/multiple_model.yaml
@@ -0,0 +1,19 @@
+defaults:
+  - _self_
+  - serve: config
+
+experiment:
+  exp_name: multiple_model
+  exp_dir: outputs/${experiment.exp_name}
+  task:
+    type: serve
+    entrypoint: null
+  runner:
+    hostfile: null
+  envs:
+    CUDA_VISIBLE_DEVICES: 0,1,2,3
+    CUDA_DEVICE_MAX_CONNECTIONS: 1
+  cmds:
+    before_start: source /root/miniconda3/bin/activate flagscale
+
+action: run
diff --git a/tests/functional_tests/test_cases/serve/build_models/conf/serve/config.yaml b/tests/functional_tests/test_cases/serve/build_models/conf/serve/config.yaml
@@ -0,0 +1,38 @@
+deploy:
+  models:
+    A:
+      module: ./tests/functional_tests/test_cases/serve/build_dag/main.py 
+      name: model_A
+      resources:
+        gpu: 1 
+    B:
+      module: ./tests/functional_tests/test_cases/serve/build_dag/main.py
+      depends:
+        - A
+      name: model_B
+      resources:
+        gpu: 0
+    C:
+      module: ./tests/functional_tests/test_cases/serve/build_dag/custom/models.py
+      depends: 
+        - A
+      name: model_C
+      resources:
+        gpu: 0
+    D:
+      module: ./tests/functional_tests/test_cases/serve/build_dag/custom/models.py
+      depends:
+        - B
+        - C
+      name: model_D
+      resources:
+        gpu: 0
+
+  service:
+    name: /generate
+    port: 6701
+    request:
+      names:
+        - prompt
+      types:
+        - str
diff --git a/tests/functional_tests/test_cases/serve/build_models/custom/models.py b/tests/functional_tests/test_cases/serve/build_models/custom/models.py
@@ -0,0 +1,13 @@
+def fn(input_data):
+    res = input_data + "__add_process_fn"
+    return res
+
+
+def model_C(input_data):
+    res = input_data + "__add_model_C"
+    return res
+
+
+def model_D(input_data_B, input_data_C):
+    output_data = input_data_B + input_data_C
+    return output_data
diff --git a/tests/functional_tests/test_cases/serve/build_models/main.py b/tests/functional_tests/test_cases/serve/build_models/main.py
@@ -0,0 +1,40 @@
+from vllm import LLM, SamplingParams
+from custom.models import fn
+from flagscale.serve.core import auto_remote
+
+
+@auto_remote(gpu=1)
+class LLMActor:
+    def __init__(self):
+        # Initialize the LLM inside the actor to avoid serialization
+        self.llm = LLM(
+            model="/home/gitlab-runner/data/Qwen2.5-0.5B-Instruct",
+            tensor_parallel_size=1,
+            gpu_memory_utilization=0.5
+        )
+
+    def generate(self, prompt: str) -> str:                    
+        sampling_params = SamplingParams(
+            temperature=0.7,
+            top_p=0.95,
+            max_tokens=1000
+        )
+        result = self.llm.generate([prompt], sampling_params=sampling_params)
+        return result[0].outputs[0].text
+
+
+llm = LLMActor()
+
+def model_A(prompt):
+    result = llm.generate(prompt)
+    return fn(result)
+
+
+def model_B(input_data):
+    res = input_data + "__add_model_B"
+    return res
+
+
+if __name__ == "__main__":
+    prompt = "introduce Bruce Lee"
+    print(model_A(prompt))
diff --git a/tests/functional_tests/test_cases/serve/build_models/test_call.py b/tests/functional_tests/test_cases/serve/build_models/test_call.py
@@ -0,0 +1,19 @@
+import unittest
+import requests
+
+class TestAPI(unittest.TestCase):
+    def test_generate_endpoint(self):
+        url = "http://127.0.0.1:6701/generate"
+        headers = {"Content-Type": "application/json", "accept": "application/json"}
+        test_data = {"prompt": "Introduce BAAI."}
+
+        response = requests.post(url, headers=headers, json=test_data)
+
+        self.assertEqual(response.status_code, 200, 
+                        f"Expected status code 200, got {response.status_code}. Response: {response}")
+
+        self.assertGreater(len(response.text), 0,
+                          "Generated text should not be empty")
+
+if __name__ == '__main__':
+    unittest.main()
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		curl http://127.0.0.1:8000/generate -H "Content-Type: application/json" -d '{"prompt": "introduce bruce lee"}'