-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_generation.sh
More file actions
40 lines (32 loc) · 962 Bytes
/
run_generation.sh
File metadata and controls
40 lines (32 loc) · 962 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#!/bin/bash
# Port for vLLM server
PORT=8000
echo "==== Starting ===="
model_path="model_path"
# Start vLLM API server in the background
vllm serve "$model_path" \
--port $PORT \
--tensor-parallel-size 8 \
--max-model-len 32000 &
SERVER_PID=$!
# # Wait for the server to be ready
echo "Waiting for the server to start..."
until curl -s "http://localhost:$PORT/v1/models" > /dev/null; do
sleep 1
done
echo "Server is up. Running the test script..."
# Run the test script
echo "Running tests for model: $MODEL"
python test_llm.py \
--dataset_path "data/interactscience.jsonl" \
--prompt_type "lm_system_prompt" \
--dump_path "eval/interactscience_${MODEL}.jsonl" \
--model_path "$model_path" \
--base_url "http://localhost:$PORT/v1" \
--api_key "EMPTY"
echo "Test completed. Shutting down the server..."
kill $SERVER_PID
wait $SERVER_PID 2>/dev/null
echo "Server stopped."
echo "==============================="
echo