-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathMakefile
More file actions
26 lines (24 loc) · 736 Bytes
/
Makefile
File metadata and controls
26 lines (24 loc) · 736 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
.PHONY: vllm-server
.PHONY: whisper-vllm
vllm-server:
python -m vllm.entrypoints.openai.api_server \
--model Qwen/Qwen2.5-VL-3B-Instruct \
--max-model-len 20000 \
--max-seq-len-to-capture 20000 \
--dtype bfloat16 \
--allowed-local-media-path=/ \
--limit-mm-per-prompt '{"image": 1, "video": 1}' \
--mm-processor-kwargs '{"max_pixels": 65536, "fps": 1}' \
--max_num_seqs 8 \
--port 8000 \
--host 127.0.0.1 \
--gpu-memory-utilization 0.7
whisper-vllm:
VLLM_USE_V1=0 python -m vllm.entrypoints.openai.api_server \
--model openai/whisper-large-v3-turbo \
--task transcription \
--allowed-local-media-path=/ \
--limit-mm-per-prompt "audio=1" \
--disable-log-requests \
--host 127.0.0.1 \
--port 9000 \