Skip to content

Commit 340fa07

Browse files
Adding files to deploy Translation application on ROCm vLLM (#1648)
Signed-off-by: Chingis Yundunov <[email protected]> Signed-off-by: Artem Astafev <[email protected]>
1 parent b7f2476 commit 340fa07

File tree

8 files changed

+699
-53
lines changed

8 files changed

+699
-53
lines changed
Loading
Loading

Translation/docker_compose/amd/gpu/rocm/README.md

+370-52
Large diffs are not rendered by default.

Translation/docker_compose/amd/gpu/rocm/compose.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ services:
9090
- translation-backend-server
9191
- translation-ui-server
9292
ports:
93-
- "${TRANSLATION_NGINX_PORT:-80}:80"
93+
- "${TRANSLATION_NGINX_PORT:-80}:8080"
9494
environment:
9595
- no_proxy=${no_proxy}
9696
- https_proxy=${https_proxy}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
services:
5+
translation-vllm-service:
6+
image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest}
7+
container_name: translation-vllm-service
8+
ports:
9+
- "${TRANSLATION_VLLM_SERVICE_PORT:-8081}:8011"
10+
environment:
11+
no_proxy: ${no_proxy}
12+
http_proxy: ${http_proxy}
13+
https_proxy: ${https_proxy}
14+
HUGGINGFACEHUB_API_TOKEN: ${TRANSLATION_HUGGINGFACEHUB_API_TOKEN}
15+
HF_TOKEN: ${TRANSLATION_HUGGINGFACEHUB_API_TOKEN}
16+
HF_HUB_DISABLE_PROGRESS_BARS: 1
17+
HF_HUB_ENABLE_HF_TRANSFER: 0
18+
WILM_USE_TRITON_FLASH_ATTENTION: 0
19+
PYTORCH_JIT: 0
20+
volumes:
21+
- "./data:/data"
22+
shm_size: 20G
23+
devices:
24+
- /dev/kfd:/dev/kfd
25+
- /dev/dri/:/dev/dri/
26+
cap_add:
27+
- SYS_PTRACE
28+
group_add:
29+
- video
30+
security_opt:
31+
- seccomp:unconfined
32+
- apparmor=unconfined
33+
command: "--model ${TRANSLATION_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 1 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\""
34+
ipc: host
35+
translation-llm:
36+
image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}
37+
container_name: translation-llm-textgen-server
38+
depends_on:
39+
- translation-vllm-service
40+
ports:
41+
- "${TRANSLATION_LLM_PORT:-9000}:9000"
42+
ipc: host
43+
environment:
44+
no_proxy: ${no_proxy}
45+
http_proxy: ${http_proxy}
46+
https_proxy: ${https_proxy}
47+
LLM_ENDPOINT: ${TRANSLATION_LLM_ENDPOINT}
48+
LLM_MODEL_ID: ${TRANSLATION_LLM_MODEL_ID}
49+
HUGGINGFACEHUB_API_TOKEN: ${TRANSLATION_HUGGINGFACEHUB_API_TOKEN}
50+
HF_TOKEN: ${TRANSLATION_HUGGINGFACEHUB_API_TOKEN}
51+
LLM_COMPONENT_NAME: "OpeaTextGenService"
52+
HF_HUB_DISABLE_PROGRESS_BARS: 1
53+
HF_HUB_ENABLE_HF_TRANSFER: 0
54+
restart: unless-stopped
55+
translation-backend-server:
56+
image: ${REGISTRY:-opea}/translation:${TAG:-latest}
57+
container_name: translation-backend-server
58+
depends_on:
59+
- translation-vllm-service
60+
- translation-llm
61+
ports:
62+
- "${TRANSLATION_BACKEND_SERVICE_PORT:-8888}:8888"
63+
environment:
64+
no_proxy: ${no_proxy}
65+
https_proxy: ${https_proxy}
66+
http_proxy: ${http_proxy}
67+
MEGA_SERVICE_HOST_IP: ${TRANSLATION_MEGA_SERVICE_HOST_IP}
68+
LLM_SERVICE_HOST_IP: ${TRANSLATION_LLM_SERVICE_HOST_IP}
69+
LLM_SERVICE_PORT: ${TRANSLATION_LLM_PORT}
70+
ipc: host
71+
restart: always
72+
translation-ui-server:
73+
image: ${REGISTRY:-opea}/translation-ui:${TAG:-latest}
74+
container_name: translation-ui-server
75+
depends_on:
76+
- translation-backend-server
77+
ports:
78+
- "${TRANSLATION_FRONTEND_SERVICE_PORT:-5173}:5173"
79+
environment:
80+
no_proxy: ${no_proxy}
81+
https_proxy: ${https_proxy}
82+
http_proxy: ${http_proxy}
83+
BASE_URL: ${TRANSLATION_BACKEND_SERVICE_ENDPOINT}
84+
ipc: host
85+
restart: always
86+
translation-nginx-server:
87+
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
88+
container_name: translation-nginx-server
89+
depends_on:
90+
- translation-backend-server
91+
- translation-ui-server
92+
ports:
93+
- "${TRANSLATION_NGINX_PORT:-80}:8080"
94+
environment:
95+
no_proxy: ${no_proxy}
96+
https_proxy: ${https_proxy}
97+
http_proxy: ${http_proxy}
98+
FRONTEND_SERVICE_IP: ${TRANSLATION_FRONTEND_SERVICE_IP}
99+
FRONTEND_SERVICE_PORT: ${TRANSLATION_FRONTEND_SERVICE_PORT}
100+
BACKEND_SERVICE_NAME: ${TRANSLATION_BACKEND_SERVICE_NAME}
101+
BACKEND_SERVICE_IP: ${TRANSLATION_BACKEND_SERVICE_IP}
102+
BACKEND_SERVICE_PORT: ${TRANSLATION_BACKEND_SERVICE_PORT}
103+
ipc: host
104+
restart: always
105+
networks:
106+
default:
107+
driver: bridge
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#!/usr/bin/env bash
2+
3+
# Copyright (C) 2024 Intel Corporation
4+
# SPDX-License-Identifier: Apache-2.0
5+
6+
# SPDX-License-Identifier: Apache-2.0
7+
8+
export HOST_IP=''
9+
export EXTERNAL_HOST_IP=''
10+
export TRANSLATION_LLM_MODEL_ID="haoranxu/ALMA-13B"
11+
export TRANSLATION_VLLM_SERVICE_PORT=8088
12+
export TRANSLATION_LLM_ENDPOINT="http://${HOST_IP}:${TRANSLATION_VLLM_SERVICE_PORT}"
13+
export TRANSLATION_LLM_PORT=9088
14+
export TRANSLATION_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
15+
export TRANSLATION_MEGA_SERVICE_HOST_IP=${HOST_IP}
16+
export TRANSLATION_LLM_SERVICE_HOST_IP=${HOST_IP}
17+
export TRANSLATION_FRONTEND_SERVICE_IP=${HOST_IP}
18+
export TRANSLATION_FRONTEND_SERVICE_PORT=18122
19+
export TRANSLATION_BACKEND_SERVICE_NAME=translation
20+
export TRANSLATION_BACKEND_SERVICE_IP=${HOST_IP}
21+
export TRANSLATION_BACKEND_SERVICE_PORT=18121
22+
export TRANSLATION_BACKEND_SERVICE_ENDPOINT="http://${EXTERNAL_HOST_IP}:${TRANSLATION_BACKEND_SERVICE_PORT}/v1/translation"
23+
export TRANSLATION_NGINX_PORT=18123

Translation/docker_image_build/build.yaml

+5
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,8 @@ services:
2929
dockerfile: comps/third_parties/nginx/src/Dockerfile
3030
extends: translation
3131
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
32+
vllm-rocm:
33+
build:
34+
context: GenAIComps
35+
dockerfile: comps/third_parties/vllm/src/Dockerfile.amd_gpu
36+
image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
#!/bin/bash
2+
# Copyright (C) 2024 Intel Corporation
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
set -xe
6+
IMAGE_REPO=${IMAGE_REPO:-"opea"}
7+
IMAGE_TAG=${IMAGE_TAG:-"latest"}
8+
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
9+
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
10+
export REGISTRY=${IMAGE_REPO}
11+
export TAG=${IMAGE_TAG}
12+
13+
WORKPATH=$(dirname "$PWD")
14+
LOG_PATH="$WORKPATH/tests"
15+
ip_address=$(hostname -I | awk '{print $1}')
16+
17+
function build_docker_images() {
18+
opea_branch=${opea_branch:-"main"}
19+
# If the opea_branch isn't main, replace the git clone branch in Dockerfile.
20+
if [[ "${opea_branch}" != "main" ]]; then
21+
cd $WORKPATH
22+
OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git"
23+
NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git"
24+
find . -type f -name "Dockerfile*" | while read -r file; do
25+
echo "Processing file: $file"
26+
sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file"
27+
done
28+
fi
29+
30+
cd $WORKPATH/docker_image_build
31+
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
32+
33+
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
34+
service_list="translation translation-ui llm-textgen nginx vllm-rocm"
35+
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
36+
docker images && sleep 3s
37+
}
38+
39+
function start_services() {
40+
cd $WORKPATH/docker_compose/amd/gpu/rocm/
41+
42+
export HOST_IP=${ip_address}
43+
export EXTERNAL_HOST_IP=${ip_address}
44+
export TRANSLATION_LLM_MODEL_ID="haoranxu/ALMA-13B"
45+
export TRANSLATION_VLLM_SERVICE_PORT=8088
46+
export TRANSLATION_LLM_ENDPOINT="http://${HOST_IP}:${TRANSLATION_VLLM_SERVICE_PORT}"
47+
export TRANSLATION_LLM_PORT=9088
48+
export TRANSLATION_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
49+
export TRANSLATION_MEGA_SERVICE_HOST_IP=${HOST_IP}
50+
export TRANSLATION_LLM_SERVICE_HOST_IP=${HOST_IP}
51+
export TRANSLATION_FRONTEND_SERVICE_IP=${HOST_IP}
52+
export TRANSLATION_FRONTEND_SERVICE_PORT=5173
53+
export TRANSLATION_BACKEND_SERVICE_NAME=translation
54+
export TRANSLATION_BACKEND_SERVICE_IP=${HOST_IP}
55+
export TRANSLATION_BACKEND_SERVICE_PORT=8089
56+
export TRANSLATION_BACKEND_SERVICE_ENDPOINT="http://${EXTERNAL_HOST_IP}:${TRANSLATION_BACKEND_SERVICE_PORT}/v1/translation"
57+
export TRANSLATION_NGINX_PORT=8090
58+
59+
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
60+
61+
# Start Docker Containers
62+
docker compose -f compose_vllm.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
63+
64+
n=0
65+
# wait long for llm model download
66+
until [[ "$n" -ge 500 ]]; do
67+
docker logs translation-vllm-service >& ${LOG_PATH}/translation-vllm-service_start.log
68+
if grep -q "Application startup complete" ${LOG_PATH}/translation-vllm-service_start.log; then
69+
echo "vLLM check successful"
70+
break
71+
fi
72+
sleep 10s
73+
n=$((n+1))
74+
done
75+
76+
}
77+
78+
function validate_services() {
79+
local URL="$1"
80+
local EXPECTED_RESULT="$2"
81+
local SERVICE_NAME="$3"
82+
local DOCKER_NAME="$4"
83+
local INPUT_DATA="$5"
84+
85+
local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
86+
if [ "$HTTP_STATUS" -eq 200 ]; then
87+
echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
88+
89+
local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log)
90+
91+
if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
92+
echo "[ $SERVICE_NAME ] Content is as expected."
93+
else
94+
echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
95+
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
96+
exit 1
97+
fi
98+
else
99+
echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
100+
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
101+
exit 1
102+
fi
103+
sleep 1s
104+
}
105+
106+
function validate_microservices() {
107+
# Check if the microservices are running correctly.
108+
109+
# vLLM for llm service
110+
validate_services \
111+
"${ip_address}:${TRANSLATION_VLLM_SERVICE_PORT}/v1/completions" \
112+
"choices" \
113+
"translation-vllm-service" \
114+
"translation-vllm-service" \
115+
'{"model": "haoranxu/ALMA-13B", "prompt": "What is Deep Learning?", "max_tokens": 100, "temperature": 0}'
116+
117+
# llm microservice
118+
validate_services \
119+
"${HOST_IP}:${TRANSLATION_LLM_PORT}/v1/chat/completions" \
120+
"data: " \
121+
"translation-llm" \
122+
"translation-llm-textgen-server" \
123+
'{"query":"Translate this from Chinese to English:\nChinese: 我爱机器翻译。\nEnglish:"}'
124+
}
125+
126+
function validate_megaservice() {
127+
# Curl the Mega Service
128+
validate_services \
129+
"${HOST_IP}:${TRANSLATION_BACKEND_SERVICE_PORT}/v1/translation" \
130+
"translation" \
131+
"translation-backend-server" \
132+
"translation-backend-server" \
133+
'{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}'
134+
135+
# test the megeservice via nginx
136+
validate_services \
137+
"${HOST_IP}:${TRANSLATION_NGINX_PORT}/v1/translation" \
138+
"translation" \
139+
"translation-nginx-server" \
140+
"translation-nginx-server" \
141+
'{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}'
142+
}
143+
144+
function validate_frontend() {
145+
cd $WORKPATH/ui/svelte
146+
local conda_env_name="OPEA_e2e"
147+
export PATH=${HOME}/miniconda3/bin/:$PATH
148+
if conda info --envs | grep -q "$conda_env_name"; then
149+
echo "$conda_env_name exist!"
150+
else
151+
conda create -n ${conda_env_name} python=3.12 -y
152+
fi
153+
source activate ${conda_env_name}
154+
155+
sed -i "s/localhost/$ip_address/g" playwright.config.ts
156+
157+
conda install -c conda-forge nodejs=22.6.0 -y
158+
npm install && npm ci && npx playwright install --with-deps
159+
node -v && npm -v && pip list
160+
161+
exit_status=0
162+
npx playwright test || exit_status=$?
163+
164+
if [ $exit_status -ne 0 ]; then
165+
echo "[TEST INFO]: ---------frontend test failed---------"
166+
exit $exit_status
167+
else
168+
echo "[TEST INFO]: ---------frontend test passed---------"
169+
fi
170+
}
171+
172+
function stop_docker() {
173+
cd $WORKPATH/docker_compose/amd/gpu/rocm/
174+
docker compose -f compose_vllm.yaml stop && docker compose -f compose_vllm.yaml rm -f
175+
}
176+
177+
function main() {
178+
179+
stop_docker
180+
181+
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
182+
start_services
183+
184+
validate_microservices
185+
validate_megaservice
186+
validate_frontend
187+
188+
stop_docker
189+
echo y | docker system prune
190+
191+
}
192+
193+
main

0 commit comments

Comments
 (0)