Skip to content

Commit 853f130

Browse files
Adding files to deploy SearchQnA application on ROCm vLLM (#1649)
Signed-off-by: Chingis Yundunov <[email protected]> Signed-off-by: Artem Astafev <[email protected]>
1 parent 340fa07 commit 853f130

File tree

11 files changed

+929
-150
lines changed

11 files changed

+929
-150
lines changed
Loading
Loading

SearchQnA/docker_compose/amd/gpu/rocm/README.md

Lines changed: 473 additions & 76 deletions
Large diffs are not rendered by default.

SearchQnA/docker_compose/amd/gpu/rocm/compose.yaml

Lines changed: 35 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ services:
88
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
99
container_name: search-tei-embedding-server
1010
ports:
11-
- "3001:80"
11+
- "${SEARCH_TEI_EMBEDDING_PORT:-3001}:80"
1212
volumes:
13-
- "${MODEL_PATH:-./data}:/data"
13+
- "${MODEL_CACHE:-./data}:/data"
1414
shm_size: 1g
1515
environment:
1616
no_proxy: ${no_proxy}
@@ -20,13 +20,14 @@ services:
2020
HUGGING_FACE_HUB_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
2121
HUGGINGFACEHUB_API_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
2222
command: --model-id ${SEARCH_EMBEDDING_MODEL_ID} --auto-truncate
23+
2324
search-embedding:
2425
image: ${REGISTRY:-opea}/embedding:${TAG:-latest}
2526
container_name: search-embedding-server
2627
depends_on:
2728
- search-tei-embedding-service
2829
ports:
29-
- "3002:6000"
30+
- "${SEARCH_EMBEDDING_SERVICE_PORT:-3002}:6000"
3031
ipc: host
3132
environment:
3233
no_proxy: ${no_proxy}
@@ -36,11 +37,12 @@ services:
3637
TEI_EMBEDDING_ENDPOINT: ${SEARCH_TEI_EMBEDDING_ENDPOINT}
3738
HF_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
3839
restart: unless-stopped
40+
3941
search-web-retriever:
4042
image: ${REGISTRY:-opea}/web-retriever:${TAG:-latest}
4143
container_name: search-web-retriever-server
4244
ports:
43-
- "3003:7077"
45+
- "${SEARCH_WEB_RETRIEVER_SERVICE_PORT:-3003}:7077"
4446
ipc: host
4547
environment:
4648
no_proxy: ${no_proxy}
@@ -50,26 +52,28 @@ services:
5052
GOOGLE_API_KEY: ${SEARCH_GOOGLE_API_KEY}
5153
GOOGLE_CSE_ID: ${SEARCH_GOOGLE_CSE_ID}
5254
restart: unless-stopped
55+
5356
search-tei-reranking-service:
5457
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
5558
container_name: search-tei-reranking-server
5659
ports:
57-
- "3004:80"
60+
- "${SEARCH_TEI_RERANKING_PORT:-3004}:80"
5861
volumes:
59-
- "${MODEL_PATH:-./data}:/data"
62+
- "${MODEL_CACHE:-./data}:/data"
6063
shm_size: 1g
6164
environment:
6265
no_proxy: ${no_proxy}
6366
http_proxy: ${http_proxy}
6467
https_proxy: ${https_proxy}
6568
command: --model-id ${SEARCH_RERANK_MODEL_ID} --auto-truncate
69+
6670
search-reranking:
6771
image: ${REGISTRY:-opea}/reranking:${TAG:-latest}
6872
container_name: search-reranking-server
6973
depends_on:
7074
- search-tei-reranking-service
7175
ports:
72-
- "3005:8000"
76+
- "${SEARCH_RERANK_SERVICE_PORT:-3005}:8000"
7377
ipc: host
7478
environment:
7579
no_proxy: ${no_proxy}
@@ -80,13 +84,14 @@ services:
8084
HUGGING_FACE_HUB_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
8185
HUGGINGFACEHUB_API_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
8286
restart: unless-stopped
87+
8388
search-tgi-service:
8489
image: ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
8590
container_name: search-tgi-service
8691
ports:
87-
- "3006:80"
92+
- "${SEARCH_TGI_SERVICE_PORT:-3006}:80"
8893
volumes:
89-
- "${MODEL_PATH:-./data}:/data"
94+
- "${MODEL_CACHE:-./data}:/data"
9095
environment:
9196
no_proxy: ${no_proxy}
9297
http_proxy: ${http_proxy}
@@ -96,7 +101,7 @@ services:
96101
shm_size: 1g
97102
devices:
98103
- /dev/kfd:/dev/kfd
99-
- /dev/dri/:/dev/dri/
104+
- /dev/dri:/dev/dri
100105
cap_add:
101106
- SYS_PTRACE
102107
group_add:
@@ -105,25 +110,26 @@ services:
105110
- seccomp:unconfined
106111
ipc: host
107112
command: --model-id ${SEARCH_LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
113+
108114
search-llm:
109115
image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}
110116
container_name: search-llm-server
111117
depends_on:
112118
- search-tgi-service
113119
ports:
114-
- "3007:9000"
120+
- "${SEARCH_LLM_SERVICE_PORT:-3007}:9000"
115121
ipc: host
116122
environment:
117123
no_proxy: ${no_proxy}
118124
http_proxy: ${http_proxy}
119125
https_proxy: ${https_proxy}
120-
TGI_LLM_ENDPOINT: ${SEARCH_TGI_LLM_ENDPOINT}
121126
HUGGINGFACEHUB_API_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
122127
LLM_ENDPOINT: ${SEARCH_TGI_LLM_ENDPOINT}
123128
LLM_MODEL_ID: ${SEARCH_LLM_MODEL_ID}
124129
LLM_MODEL: ${SEARCH_LLM_MODEL_ID}
125130
HF_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
126-
OPENAI_API_KEY: ${SEARCH_OPENAI_API_KEY}
131+
LLM_COMPONENT_NAME: "OpeaTextGenService"
132+
127133
restart: unless-stopped
128134
search-backend-server:
129135
image: ${REGISTRY:-opea}/searchqna:${TAG:-latest}
@@ -139,18 +145,18 @@ services:
139145
ports:
140146
- "${SEARCH_BACKEND_SERVICE_PORT:-3008}:8888"
141147
environment:
142-
- no_proxy=${no_proxy}
143-
- https_proxy=${https_proxy}
144-
- http_proxy=${http_proxy}
145-
- MEGA_SERVICE_HOST_IP=${SEARCH_MEGA_SERVICE_HOST_IP}
146-
- EMBEDDING_SERVICE_HOST_IP=${SEARCH_EMBEDDING_SERVICE_HOST_IP}
147-
- WEB_RETRIEVER_SERVICE_HOST_IP=${SEARCH_WEB_RETRIEVER_SERVICE_HOST_IP}
148-
- RERANK_SERVICE_HOST_IP=${SEARCH_RERANK_SERVICE_HOST_IP}
149-
- LLM_SERVICE_HOST_IP=${SEARCH_LLM_SERVICE_HOST_IP}
150-
- EMBEDDING_SERVICE_PORT=${SEARCH_EMBEDDING_SERVICE_PORT}
151-
- WEB_RETRIEVER_SERVICE_PORT=${SEARCH_WEB_RETRIEVER_SERVICE_PORT}
152-
- RERANK_SERVICE_PORT=${SEARCH_RERANK_SERVICE_PORT}
153-
- LLM_SERVICE_PORT=${SEARCH_LLM_SERVICE_PORT}
148+
no_proxy: ${no_proxy}
149+
https_proxy: ${https_proxy}
150+
http_proxy: ${http_proxy}
151+
MEGA_SERVICE_HOST_IP: ${SEARCH_MEGA_SERVICE_HOST_IP}
152+
EMBEDDING_SERVICE_HOST_IP: ${SEARCH_EMBEDDING_SERVICE_HOST_IP}
153+
WEB_RETRIEVER_SERVICE_HOST_IP: ${SEARCH_WEB_RETRIEVER_SERVICE_HOST_IP}
154+
RERANK_SERVICE_HOST_IP: ${SEARCH_RERANK_SERVICE_HOST_IP}
155+
LLM_SERVICE_HOST_IP: ${SEARCH_LLM_SERVICE_HOST_IP}
156+
EMBEDDING_SERVICE_PORT: ${SEARCH_EMBEDDING_SERVICE_PORT}
157+
WEB_RETRIEVER_SERVICE_PORT: ${SEARCH_WEB_RETRIEVER_SERVICE_PORT}
158+
RERANK_SERVICE_PORT: ${SEARCH_RERANK_SERVICE_PORT}
159+
LLM_SERVICE_PORT: ${SEARCH_LLM_SERVICE_PORT}
154160
ipc: host
155161
restart: always
156162
search-ui-server:
@@ -161,10 +167,10 @@ services:
161167
ports:
162168
- "${SEARCH_FRONTEND_SERVICE_PORT:-5173}:5173"
163169
environment:
164-
- no_proxy=${no_proxy}
165-
- https_proxy=${https_proxy}
166-
- http_proxy=${http_proxy}
167-
- BACKEND_BASE_URL=${SEARCH_BACKEND_SERVICE_ENDPOINT}
170+
no_proxy: ${no_proxy}
171+
https_proxy: ${https_proxy}
172+
http_proxy: ${http_proxy}
173+
BACKEND_BASE_URL: ${SEARCH_BACKEND_SERVICE_ENDPOINT}
168174
ipc: host
169175
restart: always
170176

Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
# SPDX-License-Identifier: Apache-2.0
5+
6+
services:
7+
search-tei-embedding-service:
8+
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
9+
container_name: search-tei-embedding-server
10+
ports:
11+
- "${SEARCH_TEI_EMBEDDING_PORT:-3001}:80"
12+
volumes:
13+
- "${MODEL_CACHE:-./data}:/data"
14+
shm_size: 1g
15+
environment:
16+
no_proxy: ${no_proxy}
17+
http_proxy: ${http_proxy}
18+
https_proxy: ${https_proxy}
19+
HF_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
20+
HUGGING_FACE_HUB_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
21+
HUGGINGFACEHUB_API_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
22+
command: --model-id ${SEARCH_EMBEDDING_MODEL_ID} --auto-truncate
23+
24+
search-embedding:
25+
image: ${REGISTRY:-opea}/embedding:${TAG:-latest}
26+
container_name: search-embedding-server
27+
depends_on:
28+
- search-tei-embedding-service
29+
ports:
30+
- "${SEARCH_EMBEDDING_SERVICE_PORT:-3002}:6000"
31+
ipc: host
32+
environment:
33+
no_proxy: ${no_proxy}
34+
http_proxy: ${http_proxy}
35+
https_proxy: ${https_proxy}
36+
TEI_EMBEDDING_HOST_IP: ${SEARCH_HOST_IP}
37+
TEI_EMBEDDING_PORT: ${SEARCH_TEI_EMBEDDING_PORT}
38+
TEI_EMBEDDING_ENDPOINT: ${SEARCH_TEI_EMBEDDING_ENDPOINT}
39+
HF_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
40+
restart: unless-stopped
41+
42+
search-web-retriever:
43+
image: ${REGISTRY:-opea}/web-retriever:${TAG:-latest}
44+
container_name: search-web-retriever-server
45+
ports:
46+
- "${SEARCH_WEB_RETRIEVER_SERVICE_PORT:-3003}:7077"
47+
ipc: host
48+
environment:
49+
no_proxy: ${no_proxy}
50+
http_proxy: ${http_proxy}
51+
https_proxy: ${https_proxy}
52+
TEI_EMBEDDING_ENDPOINT: ${SEARCH_TEI_EMBEDDING_ENDPOINT}
53+
GOOGLE_API_KEY: ${SEARCH_GOOGLE_API_KEY}
54+
GOOGLE_CSE_ID: ${SEARCH_GOOGLE_CSE_ID}
55+
restart: unless-stopped
56+
57+
search-tei-reranking-service:
58+
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
59+
container_name: search-tei-reranking-server
60+
ports:
61+
- "${SEARCH_TEI_RERANKING_PORT:-3004}:80"
62+
volumes:
63+
- "${MODEL_CACHE:-./data}:/data"
64+
shm_size: 1g
65+
environment:
66+
no_proxy: ${no_proxy}
67+
http_proxy: ${http_proxy}
68+
https_proxy: ${https_proxy}
69+
command: --model-id ${SEARCH_RERANK_MODEL_ID} --auto-truncate
70+
71+
search-reranking:
72+
image: ${REGISTRY:-opea}/reranking:${TAG:-latest}
73+
container_name: search-reranking-server
74+
depends_on:
75+
- search-tei-reranking-service
76+
ports:
77+
- "${SEARCH_RERANK_SERVICE_PORT:-3005}:8000"
78+
ipc: host
79+
environment:
80+
no_proxy: ${no_proxy}
81+
http_proxy: ${http_proxy}
82+
https_proxy: ${https_proxy}
83+
TEI_RERANKING_ENDPOINT: ${SEARCH_TEI_RERANKING_ENDPOINT}
84+
HF_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
85+
HUGGING_FACE_HUB_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
86+
HUGGINGFACEHUB_API_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
87+
restart: unless-stopped
88+
89+
search-vllm-service:
90+
image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest}
91+
container_name: search-vllm-service
92+
ports:
93+
- "${SEARCH_VLLM_SERVICE_PORT:-8081}:8011"
94+
environment:
95+
no_proxy: ${no_proxy}
96+
http_proxy: ${http_proxy}
97+
https_proxy: ${https_proxy}
98+
HUGGINGFACEHUB_API_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
99+
HF_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
100+
HF_HUB_DISABLE_PROGRESS_BARS: 1
101+
HF_HUB_ENABLE_HF_TRANSFER: 0
102+
WILM_USE_TRITON_FLASH_ATTENTION: 0
103+
PYTORCH_JIT: 0
104+
volumes:
105+
- "${MODEL_CACHE:-./data}:/data"
106+
shm_size: 20G
107+
devices:
108+
- /dev/kfd:/dev/kfd
109+
- /dev/dri/:/dev/dri/
110+
cap_add:
111+
- SYS_PTRACE
112+
group_add:
113+
- video
114+
security_opt:
115+
- seccomp:unconfined
116+
- apparmor=unconfined
117+
command: "--model ${SEARCH_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\""
118+
ipc: host
119+
120+
search-llm:
121+
image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}
122+
container_name: search-llm-server
123+
depends_on:
124+
- search-vllm-service
125+
ports:
126+
- "${SEARCH_LLM_SERVICE_PORT:-3007}:9000"
127+
ipc: host
128+
environment:
129+
no_proxy: ${no_proxy}
130+
http_proxy: ${http_proxy}
131+
https_proxy: ${https_proxy}
132+
LLM_ENDPOINT: ${SEARCH_LLM_ENDPOINT}
133+
HUGGINGFACEHUB_API_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
134+
LLM_MODEL_ID: ${SEARCH_LLM_MODEL_ID}
135+
HF_TOKEN: ${SEARCH_HUGGINGFACEHUB_API_TOKEN}
136+
LLM_COMPONENT_NAME: "OpeaTextGenService"
137+
restart: unless-stopped
138+
139+
search-backend-server:
140+
image: ${REGISTRY:-opea}/searchqna:${TAG:-latest}
141+
container_name: search-backend-server
142+
depends_on:
143+
- search-tei-embedding-service
144+
- search-embedding
145+
- search-web-retriever
146+
- search-tei-reranking-service
147+
- search-reranking
148+
- search-vllm-service
149+
- search-llm
150+
ports:
151+
- "${SEARCH_BACKEND_SERVICE_PORT:-3008}:8888"
152+
environment:
153+
no_proxy: ${no_proxy}
154+
https_proxy: ${https_proxy}
155+
http_proxy: ${http_proxy}
156+
MEGA_SERVICE_HOST_IP: ${SEARCH_MEGA_SERVICE_HOST_IP}
157+
EMBEDDING_SERVICE_HOST_IP: ${SEARCH_EMBEDDING_SERVICE_HOST_IP}
158+
EMBEDDING_SERVICE_PORT: ${SEARCH_EMBEDDING_SERVICE_PORT}
159+
WEB_RETRIEVER_SERVICE_HOST_IP: ${SEARCH_WEB_RETRIEVER_SERVICE_HOST_IP}
160+
WEB_RETRIEVER_SERVICE_PORT: ${SEARCH_WEB_RETRIEVER_SERVICE_PORT}
161+
RERANK_SERVICE_HOST_IP: ${SEARCH_RERANK_SERVICE_HOST_IP}
162+
RERANK_SERVICE_PORT: ${SEARCH_RERANK_SERVICE_PORT}
163+
LLM_SERVICE_HOST_IP: ${SEARCH_LLM_SERVICE_HOST_IP}
164+
LLM_SERVICE_PORT: ${SEARCH_LLM_SERVICE_PORT}
165+
ipc: host
166+
restart: always
167+
168+
search-ui-server:
169+
image: ${REGISTRY:-opea}/searchqna-ui:${TAG:-latest}
170+
container_name: search-ui-server
171+
depends_on:
172+
- search-backend-server
173+
ports:
174+
- "${SEARCH_FRONTEND_SERVICE_PORT:-5173}:5173"
175+
environment:
176+
no_proxy: ${no_proxy}
177+
https_proxy: ${https_proxy}
178+
http_proxy: ${http_proxy}
179+
BACKEND_BASE_URL: ${SEARCH_BACKEND_SERVICE_ENDPOINT}
180+
ipc: host
181+
restart: always
182+
183+
networks:
184+
default:
185+
driver: bridge

0 commit comments

Comments
 (0)