Skip to content

Commit 6e8696c

Browse files
authored
Merge pull request #121 from sven-knoblauch/lora-modules
add changes for lora adapter support and /v1/models endpoint
2 parents b49e81a + 677a01e commit 6e8696c

File tree

1 file changed

+15
-6
lines changed

1 file changed

+15
-6
lines changed

src/engine.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@
1111
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
1212
from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion
1313
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, CompletionRequest, ErrorResponse
14-
from vllm.entrypoints.openai.serving_engine import BaseModelPath
14+
from vllm.entrypoints.openai.serving_engine import BaseModelPath, LoRAModulePath
15+
1516

1617
from utils import DummyRequest, JobInput, BatchSize, create_error_response
1718
from constants import DEFAULT_MAX_CONCURRENCY, DEFAULT_BATCH_SIZE, DEFAULT_BATCH_SIZE_GROWTH_FACTOR, DEFAULT_MIN_BATCH_SIZE
@@ -128,21 +129,32 @@ async def _initialize_engines(self):
128129
self.base_model_paths = [
129130
BaseModelPath(name=self.engine_args.model, model_path=self.engine_args.model)
130131
]
132+
133+
lora_modules = os.getenv('LORA_MODULES', None)
134+
if lora_modules is not None:
135+
try:
136+
lora_modules = json.loads(lora_modules)
137+
lora_modules = [LoRAModulePath(**lora_modules)]
138+
except:
139+
lora_modules = None
140+
141+
142+
131143
self.chat_engine = OpenAIServingChat(
132144
engine_client=self.llm,
133145
model_config=self.model_config,
134146
base_model_paths=self.base_model_paths,
135147
response_role=self.response_role,
136148
chat_template=self.tokenizer.tokenizer.chat_template,
137-
lora_modules=None,
149+
lora_modules=lora_modules,
138150
prompt_adapters=None,
139151
request_logger=None
140152
)
141153
self.completion_engine = OpenAIServingCompletion(
142154
engine_client=self.llm,
143155
model_config=self.model_config,
144156
base_model_paths=self.base_model_paths,
145-
lora_modules=[],
157+
lora_modules=lora_modules,
146158
prompt_adapters=None,
147159
request_logger=None
148160
)
@@ -158,9 +170,6 @@ async def generate(self, openai_request: JobInput):
158170

159171
async def _handle_model_request(self):
160172
models = await self.chat_engine.show_available_models()
161-
fixed_model = models.data[0]
162-
fixed_model.id = self.served_model_name
163-
models.data = [fixed_model]
164173
return models.model_dump()
165174

166175
async def _handle_chat_or_completion_request(self, openai_request: JobInput):

0 commit comments

Comments
 (0)