-
Notifications
You must be signed in to change notification settings - Fork 25
Expand file tree
/
Copy pathreference-cpu.yaml
More file actions
66 lines (65 loc) · 1.75 KB
/
reference-cpu.yaml
File metadata and controls
66 lines (65 loc) · 1.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# Copyright (C) 2024-2026 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: gmc.opea.io/v1alpha3
kind: GMConnector
metadata:
labels:
app.kubernetes.io/name: gmconnector
app.kubernetes.io/managed-by: kustomize
gmc/platform: xeon
name: docsum
namespace: docsum
spec:
routerConfig:
name: router
serviceName: router-service
nodes:
root:
routerType: Sequence
steps:
- name: TextExtractor
dependency: Hard
internalService:
serviceName: text-extractor-svc
config:
endpoint: /v1/text_extractor
- name: TextCompression
dependency: Hard
data: $response
internalService:
serviceName: text-compression-svc
config:
endpoint: /v1/text_compression
- name: TextSplitter
dependency: Hard
data: $response
internalService:
serviceName: text-splitter-svc
config:
endpoint: /v1/text_splitter
CHUNK_SIZE: "3584"
CHUNK_OVERLAP: "512"
- name: VLLM
dependency: Hard
internalService:
serviceName: vllm-service-m
config:
endpoint: /v1/completions
isDownstreamService: true
- name: Llm
dependency: Hard
internalService:
serviceName: llm-svc
config:
endpoint: /v1/chat/completions
LLM_MODEL_SERVER: vllm
LLM_MODEL_SERVER_ENDPOINT: vllm-service-m
isDownstreamService: true
- name: DocSum
data: $response
dependency: Hard
internalService:
serviceName: docsum-svc
config:
endpoint: /v1/docsum
DOCSUM_LLM_USVC_ENDPOINT: llm-svc