Fix vLLM CPU initialize engine issue for DeepSeek models (#1762)

lvliang-intel · web-flow · commit 7b7728c6c30f · 2025-04-09T09:47:08.000+08:00
Signed-off-by: lvliang-intel &lt;liang1.lv@intel.com&gt;
diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -96,6 +96,7 @@ services:
       HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
       LLM_MODEL_ID: ${LLM_MODEL_ID}
       VLLM_TORCH_PROFILER_DIR: "/mnt"
+      VLLM_CPU_KVCACHE_SPACE: 40
     healthcheck:
       test: ["CMD-SHELL", "curl -f http://$host_ip:9009/health || exit 1"]
       interval: 10s