forked from exo-explore/exo
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconfig.yml
259 lines (238 loc) · 8.58 KB
/
config.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
version: 2.1
orbs:
python: circleci/python@2
commands:
run_chatgpt_api_test:
parameters:
inference_engine:
type: string
model_id:
type: string
expected_output:
type: string
prompt:
type: string
steps:
- run:
name: Run chatgpt api integration test (<<parameters.inference_engine>>, <<parameters.model_id>>)
command: |
source env/bin/activate
# Start first instance
HF_HOME="$(pwd)/.hf_cache_node1" DEBUG_DISCOVERY=7 DEBUG=7 exo --inference-engine <<parameters.inference_engine>> --node-id "node1" --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 --chatgpt-api-response-timeout 900 2>&1 | tee output1.log &
PID1=$!
# Start second instance
HF_HOME="$(pwd)/.hf_cache_node2" DEBUG_DISCOVERY=7 DEBUG=7 exo --inference-engine <<parameters.inference_engine>> --node-id "node2" --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 --chatgpt-api-response-timeout 900 2>&1 | tee output2.log &
PID2=$!
# Wait for discovery
sleep 10
# Function to check if processes are still running
check_processes() {
if ! kill -0 $PID1 2>/dev/null; then
echo "First instance (PID $PID1) died unexpectedly. Log output:"
cat output1.log
exit 1
fi
if ! kill -0 $PID2 2>/dev/null; then
echo "Second instance (PID $PID2) died unexpectedly. Log output:"
cat output2.log
exit 1
fi
}
# Check processes before proceeding
check_processes
# Special handling for dummy engine
if [ "<<parameters.inference_engine>>" = "dummy" ]; then
expected_content="This is a dummy response"
else
expected_content="Michael Jackson"
fi
echo "Sending request to first instance..."
response_1=$(curl -s http://localhost:8000/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "<<parameters.model_id>>",
"messages": [{"role": "user", "content": "<<parameters.prompt>>"}],
"temperature": 0.7
}')
echo "Response 1: $response_1"
# Check processes after first response
check_processes
echo "Sending request to second instance..."
response_2=$(curl -s http://localhost:8001/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "<<parameters.model_id>>",
"messages": [{"role": "user", "content": "<<parameters.prompt>>"}],
"temperature": 0.7
}')
echo "Response 2: $response_2"
# Check processes after second response
check_processes
# Stop both instances
kill $PID1 $PID2
echo ""
if ! echo "$response_1" | grep -q "<<parameters.expected_output>>" || ! echo "$response_2" | grep -q "<<parameters.expected_output>>"; then
echo "Test failed: Response does not contain '<<parameters.expected_output>>'"
echo "Response 1: $response_1"
echo ""
echo "Response 2: $response_2"
echo "Output of first instance:"
cat output1.log
echo "Output of second instance:"
cat output2.log
exit 1
else
echo "Test passed: Response from both nodes contains '<<parameters.expected_output>>'"
fi
jobs:
unit_test:
macos:
xcode: "16.0.0"
resource_class: m2pro.large
steps:
- checkout
- run:
name: Set up Python
command: |
brew install [email protected]
python3.12 -m venv env
source env/bin/activate
- run:
name: Install dependencies
command: |
source env/bin/activate
pip install --upgrade pip
pip install .
- run:
name: Run tests
command: |
source env/bin/activate
# set TEMPERATURE to 0 for deterministic sampling
echo "Running inference engine tests..."
METAL_DEVICE_WRAPPER_TYPE=1 METAL_DEBUG_ERROR_MODE=0 METAL_XCODE=1 TEMPERATURE=0 python3 -m exo.inference.test_inference_engine
echo "Running tokenizer tests..."
python3 ./test/test_tokenizers.py
discovery_integration_test:
macos:
xcode: "16.0.0"
steps:
- checkout
- run:
name: Set up Python
command: |
brew install [email protected]
python3.12 -m venv env
source env/bin/activate
- run:
name: Install dependencies
command: |
source env/bin/activate
pip install --upgrade pip
pip install .
- run:
name: Run discovery integration test
command: |
source env/bin/activate
DEBUG_DISCOVERY=7 DEBUG=7 exo --node-id "node1" --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 > output1.log 2>&1 &
PID1=$!
DEBUG_DISCOVERY=7 DEBUG=7 exo --node-id "node2" --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 > output2.log 2>&1 &
PID2=$!
sleep 10
kill $PID1 $PID2
if grep -q "Peer statuses: {\\'node2\\': \\'is_connected=True, health_check=True" output1.log && ! grep -q "Failed to connect peers:" output1.log && grep -q "Peer statuses: {\\'node1\\': \\'is_connected=True, health_check=True" output2.log && ! grep -q "Failed to connect peers:" output2.log; then
echo "Test passed: Both instances discovered each other"
exit 0
else
echo "Test failed: Devices did not discover each other"
echo "Output of first instance:"
cat output1.log
echo "Output of second instance:"
cat output2.log
exit 1
fi
chatgpt_api_integration_test_mlx:
macos:
xcode: "16.0.0"
resource_class: m2pro.large
steps:
- checkout
- run:
name: Set up Python
command: |
brew install [email protected]
python3.12 -m venv env
source env/bin/activate
- run:
name: Install dependencies
command: |
source env/bin/activate
pip install --upgrade pip
pip install .
- run_chatgpt_api_test:
inference_engine: mlx
model_id: llama-3.2-1b
prompt: "Keep responses concise. Who was the king of pop?"
expected_output: "Michael Jackson"
chatgpt_api_integration_test_dummy:
macos:
xcode: "16.0.0"
resource_class: m2pro.large
steps:
- checkout
- run:
name: Set up Python
command: |
brew install [email protected]
python3.12 -m venv env
source env/bin/activate
- run:
name: Install dependencies
command: |
source env/bin/activate
pip install --upgrade pip
pip install .
- run_chatgpt_api_test:
inference_engine: dummy
model_id: dummy-model
prompt: "Dummy prompt."
expected_output: "dummy"
test_macos_m1:
macos:
xcode: "16.0.0"
resource_class: m2pro.large
steps:
- checkout
- run: system_profiler SPHardwareDataType
# chatgpt_api_integration_test_tinygrad:
# macos:
# xcode: "16.0.0"
# resource_class: m2pro.large
# steps:
# - checkout
# - run:
# name: Set up Python
# command: |
# brew install [email protected]
# python3.12 -m venv env
# source env/bin/activate
# - run:
# name: Install dependencies
# command: |
# source env/bin/activate
# pip install --upgrade pip
# pip install .
# - run_chatgpt_api_test:
# inference_engine: tinygrad
# model_id: llama-3-8b
# prompt: "Keep responses concise. Who was the king of pop?"
# expected_output: "Michael Jackson"
workflows:
version: 2
build_and_test:
jobs:
- unit_test
- discovery_integration_test
- chatgpt_api_integration_test_mlx
- chatgpt_api_integration_test_dummy
- test_macos_m1
# - chatgpt_api_integration_test_tinygrad