Skip to content

Commit

Permalink
fix ci case
Browse files Browse the repository at this point in the history
  • Loading branch information
cyber-pioneer committed Jan 23, 2025
1 parent a188707 commit 3648863
Show file tree
Hide file tree
Showing 7 changed files with 131 additions and 23 deletions.
24 changes: 1 addition & 23 deletions tests/functional_tests/test_cases/serve/build_dag/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,30 +3,8 @@
from flagscale.serve.core import auto_remote


@auto_remote(gpu=1)
class LLMActor:
def __init__(self):
# Initialize the LLM inside the actor to avoid serialization
self.llm = LLM(
model="/home/gitlab-runner/data/Qwen2.5-0.5B-Instruct",
tensor_parallel_size=1,
gpu_memory_utilization=0.5
)

def generate(self, prompt: str) -> str:
sampling_params = SamplingParams(
temperature=0.7,
top_p=0.95,
max_tokens=1000
)
result = self.llm.generate([prompt], sampling_params=sampling_params)
return result[0].outputs[0].text


llm = LLMActor()

def model_A(prompt):
result = llm.generate(prompt)
result = prompt + "__add_model_A"
return fn(result)


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
curl http://127.0.0.1:8000/generate -H "Content-Type: application/json" -d '{"prompt": "introduce bruce lee"}'
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
defaults:
- _self_
- serve: config

experiment:
exp_name: multiple_model
exp_dir: outputs/${experiment.exp_name}
task:
type: serve
entrypoint: null
runner:
hostfile: null
envs:
CUDA_VISIBLE_DEVICES: 0,1,2,3
CUDA_DEVICE_MAX_CONNECTIONS: 1
cmds:
before_start: source /root/miniconda3/bin/activate flagscale

action: run
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
deploy:
models:
A:
module: ./tests/functional_tests/test_cases/serve/build_dag/main.py
name: model_A
resources:
gpu: 1
B:
module: ./tests/functional_tests/test_cases/serve/build_dag/main.py
depends:
- A
name: model_B
resources:
gpu: 0
C:
module: ./tests/functional_tests/test_cases/serve/build_dag/custom/models.py
depends:
- A
name: model_C
resources:
gpu: 0
D:
module: ./tests/functional_tests/test_cases/serve/build_dag/custom/models.py
depends:
- B
- C
name: model_D
resources:
gpu: 0

service:
name: /generate
port: 6701
request:
names:
- prompt
types:
- str
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
def fn(input_data):
res = input_data + "__add_process_fn"
return res


def model_C(input_data):
res = input_data + "__add_model_C"
return res


def model_D(input_data_B, input_data_C):
output_data = input_data_B + input_data_C
return output_data
40 changes: 40 additions & 0 deletions tests/functional_tests/test_cases/serve/build_models/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from vllm import LLM, SamplingParams
from custom.models import fn
from flagscale.serve.core import auto_remote


@auto_remote(gpu=1)
class LLMActor:
def __init__(self):
# Initialize the LLM inside the actor to avoid serialization
self.llm = LLM(
model="/home/gitlab-runner/data/Qwen2.5-0.5B-Instruct",
tensor_parallel_size=1,
gpu_memory_utilization=0.5
)

def generate(self, prompt: str) -> str:
sampling_params = SamplingParams(
temperature=0.7,
top_p=0.95,
max_tokens=1000
)
result = self.llm.generate([prompt], sampling_params=sampling_params)
return result[0].outputs[0].text


llm = LLMActor()

def model_A(prompt):
result = llm.generate(prompt)
return fn(result)


def model_B(input_data):
res = input_data + "__add_model_B"
return res


if __name__ == "__main__":
prompt = "introduce Bruce Lee"
print(model_A(prompt))
19 changes: 19 additions & 0 deletions tests/functional_tests/test_cases/serve/build_models/test_call.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import unittest
import requests

class TestAPI(unittest.TestCase):
def test_generate_endpoint(self):
url = "http://127.0.0.1:6701/generate"
headers = {"Content-Type": "application/json", "accept": "application/json"}
test_data = {"prompt": "Introduce BAAI."}

response = requests.post(url, headers=headers, json=test_data)

self.assertEqual(response.status_code, 200,
f"Expected status code 200, got {response.status_code}. Response: {response}")

self.assertGreater(len(response.text), 0,
"Generated text should not be empty")

if __name__ == '__main__':
unittest.main()

0 comments on commit 3648863

Please sign in to comment.