Skip to content

Commit

Permalink
debug ci
Browse files Browse the repository at this point in the history
  • Loading branch information
cyber-pioneer committed Jan 22, 2025
1 parent a58c8b8 commit ee64a87
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 22 deletions.
3 changes: 2 additions & 1 deletion flagscale/runner/runner_serve.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ def _generate_run_script_serve(
root_dir = os.path.dirname(
os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
)
vllm_dir = os.path.join(root_dir, "vllm")
cmds_config = config.experiment.get("cmds", None)
if cmds_config:
before_start = cmds_config.get("before_start", "")
Expand All @@ -96,7 +97,7 @@ def _generate_run_script_serve(
f.write(f"\n")
f.write(f"cd {root_dir}\n")
f.write(f"\n")
f.write(f"export PYTHONPATH={root_dir}\n")
f.write(f"export PYTHONPATH={vllm_dir}:{root_dir}\n")
f.write(f"\n")
f.write(f'cmd="{cmd}"\n')
f.write(f"\n")
Expand Down
41 changes: 20 additions & 21 deletions tests/functional_tests/test_cases/serve/build_dag/main.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,32 @@
# from vllm import LLM, SamplingParams
from vllm import LLM, SamplingParams
from custom.models import fn
from flagscale.serve.core import auto_remote


# @auto_remote(gpu=1)
# class LLMActor:
# def __init__(self):
# # Initialize the LLM inside the actor to avoid serialization
# self.llm = LLM(
# model="/home/gitlab-runner/data/Qwen2.5-0.5B-Instruct",
# tensor_parallel_size=1,
# gpu_memory_utilization=0.5
# )
@auto_remote(gpu=1)
class LLMActor:
def __init__(self):
# Initialize the LLM inside the actor to avoid serialization
self.llm = LLM(
model="/home/gitlab-runner/data/Qwen2.5-0.5B-Instruct",
tensor_parallel_size=1,
gpu_memory_utilization=0.5
)

# def generate(self, prompt: str) -> str:
# sampling_params = SamplingParams(
# temperature=0.7,
# top_p=0.95,
# max_tokens=1000
# )
# result = self.llm.generate([prompt], sampling_params=sampling_params)
# return result[0].outputs[0].text
def generate(self, prompt: str) -> str:
sampling_params = SamplingParams(
temperature=0.7,
top_p=0.95,
max_tokens=1000
)
result = self.llm.generate([prompt], sampling_params=sampling_params)
return result[0].outputs[0].text


# llm = LLMActor()
llm = LLMActor()

def model_A(prompt):
# result = llm.generate(prompt)
result = prompt + "__add_model_A"
result = llm.generate(prompt)
return fn(result)


Expand Down

0 comments on commit ee64a87

Please sign in to comment.