diff --git a/AgentQnA/kubernetes/helm/README.md b/AgentQnA/kubernetes/helm/README.md index 8d0cbc61e4..8247f65702 100644 --- a/AgentQnA/kubernetes/helm/README.md +++ b/AgentQnA/kubernetes/helm/README.md @@ -9,3 +9,10 @@ export HFTOKEN="insert-your-huggingface-token-here" helm install agentqna oci://ghcr.io/opea-project/charts/agentqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f gaudi-values.yaml ``` + +## Deploy on CPU + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install agentqna oci://ghcr.io/opea-project/charts/agentqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml +``` diff --git a/AgentQnA/kubernetes/helm/cpu-values.yaml b/AgentQnA/kubernetes/helm/cpu-values.yaml new file mode 100644 index 0000000000..1def3bca9f --- /dev/null +++ b/AgentQnA/kubernetes/helm/cpu-values.yaml @@ -0,0 +1,22 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +tgi: + enabled: false +vllm: + enabled: true + LLM_MODEL_ID: "meta-llama/Meta-Llama-3-8B-Instruct" + extraCmdArgs: ["--max-seq_len-to-capture", "16384", "--enable-auto-tool-choice", "--tool-call-parser", "llama3_json"] + +supervisor: + llm_endpoint_url: http://{{ .Release.Name }}-vllm + llm_engine: vllm + model: "meta-llama/Meta-Llama-3-8B-Instruct" +ragagent: + llm_endpoint_url: http://{{ .Release.Name }}-vllm + llm_engine: vllm + model: "meta-llama/Meta-Llama-3-8B-Instruct" +sqlagent: + llm_endpoint_url: http://{{ .Release.Name }}-vllm + llm_engine: vllm + model: "meta-llama/Meta-Llama-3-8B-Instruct"