File tree 2 files changed +33
-0
lines changed 2 files changed +33
-0
lines changed Original file line number Diff line number Diff line change 9
9
export HFTOKEN="insert-your-huggingface-token-here"
10
10
helm install agentqna oci://ghcr.io/opea-project/charts/agentqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f gaudi-values.yaml
11
11
```
12
+
13
+ ## Deploy on CPU
14
+
15
+ ```
16
+ export HFTOKEN="insert-your-huggingface-token-here"
17
+ helm install agentqna oci://ghcr.io/opea-project/charts/agentqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml
18
+ ```
Original file line number Diff line number Diff line change
1
+ # Copyright (C) 2025 Intel Corporation
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # Accelerate inferencing in heaviest components to improve performance
5
+ # by overriding their subchart values
6
+
7
+ vllm :
8
+ enabled : true
9
+ accelDevice : " "
10
+ resources :
11
+ limits :
12
+ habana.ai/gaudi : 0
13
+ image :
14
+ repository : opea/vllm
15
+ LLM_MODEL_ID : " meta-llama/Llama-3.2-3B-Instruct"
16
+ VLLM_CPU_KVCACHE_SPACE : 40
17
+ extraCmdArgs : ["--max-seq-len-to-capture", "16384"]
18
+ supervisor :
19
+ llm_endpoint_url : http://{{ .Release.Name }}-vllm
20
+ model : " meta-llama/Llama-3.2-3B-Instruct"
21
+ ragagent :
22
+ llm_endpoint_url : http://{{ .Release.Name }}-vllm
23
+ model : " meta-llama/Llama-3.2-3B-Instruct"
24
+ sqlagent :
25
+ llm_endpoint_url : http://{{ .Release.Name }}-vllm
26
+ model : " meta-llama/Llama-3.2-3B-Instruct"
You can’t perform that action at this time.
0 commit comments