17
17
from loguru import logger
18
18
19
19
from guidellm .config import settings
20
- from guidellm .request .session import RequestSession
21
20
from guidellm .request .types import (
22
21
RequestT ,
23
22
ResponseT ,
27
26
SchedulerRequestResult ,
28
27
SchedulerResult ,
29
28
SchedulerRunInfo ,
30
- WorkerProcessRequestTime ,
29
+ WorkerProcessRequest ,
31
30
WorkerProcessResult ,
32
31
)
33
32
from guidellm .scheduler .strategy import SchedulingStrategy
@@ -127,10 +126,14 @@ async def run(
127
126
) as executor ,
128
127
):
129
128
requests_iter : Optional [Iterator [Any ]] = None
129
+ # TODO: Configurable delay and move somewhere more appropriate
130
+ scheduling_strategy .start_time = (
131
+ time .time ()
132
+ ) # Add a small delay to allow processes to start
130
133
futures , queues , stop_event = await self ._start_processes (
131
134
manager , executor , scheduling_strategy
132
135
)
133
- run_info , requests_iter , times_iter = self ._run_setup (
136
+ run_info , requests_iter = self ._run_setup (
134
137
futures , scheduling_strategy , max_number , max_duration
135
138
)
136
139
yield SchedulerResult (
@@ -147,17 +150,16 @@ async def run(
147
150
148
151
if (
149
152
requests_iter is None
150
- and run_info .completed_requests >= run_info .created_requests
153
+ # FIXME: Need new way to handle max requests
154
+ # and run_info.completed_requests >= run_info.created_requests
151
155
):
152
156
# we've exhausted all requests we've wanted to run
153
157
# and yielded all responses
154
158
break
155
159
156
160
requests_iter = self ._add_requests (
157
161
requests_iter ,
158
- times_iter ,
159
162
queues .requests ,
160
- queues .times ,
161
163
run_info ,
162
164
)
163
165
await asyncio .sleep (0 ) # enable requests to start
@@ -196,7 +198,6 @@ async def _start_processes(
196
198
requests = manager .Queue (
197
199
maxsize = scheduling_strategy .processing_requests_limit
198
200
),
199
- times = manager .Queue (maxsize = scheduling_strategy .processing_requests_limit ),
200
201
responses = manager .Queue (),
201
202
)
202
203
stop_event = manager .Event ()
@@ -229,10 +230,12 @@ async def _start_processes(
229
230
executor ,
230
231
self .worker .process_loop_asynchronous ,
231
232
queues ,
233
+ scheduling_strategy ,
232
234
stop_event ,
233
235
False , # TODO: Make configurable
234
236
requests_limit ,
235
237
id_ ,
238
+ num_processes ,
236
239
)
237
240
)
238
241
@@ -246,11 +249,9 @@ def _run_setup(
246
249
scheduling_strategy : SchedulingStrategy ,
247
250
max_number : Optional [int ],
248
251
max_duration : Optional [float ],
249
- ) -> tuple [SchedulerRunInfo , Iterator [Any ], Iterator [ float ] ]:
252
+ ) -> tuple [SchedulerRunInfo , Iterator [Any ]]:
250
253
requests_iter = iter (self .request_loader )
251
- start_time = time .time ()
252
- times_iter = iter (scheduling_strategy .request_times ())
253
- end_time = time .time () + (max_duration or math .inf )
254
+ end_time = scheduling_strategy .start_time + (max_duration or math .inf )
254
255
end_number = max_number or math .inf
255
256
256
257
try :
@@ -268,27 +269,28 @@ def _run_setup(
268
269
)
269
270
270
271
info = SchedulerRunInfo (
271
- start_time = start_time ,
272
+ start_time = scheduling_strategy . start_time ,
272
273
end_time = end_time ,
273
274
end_number = end_number ,
274
275
processes = len (processes ),
275
276
strategy = scheduling_strategy ,
276
277
)
277
278
278
- return info , requests_iter , times_iter
279
+ return info , requests_iter
279
280
280
281
def _add_requests (
281
282
self ,
282
283
requests_iter : Optional [Iterator [Any ]],
283
- times_iter : Iterator [float ],
284
- requests_queue : Queue [RequestSession [RequestT , ResponseT ]],
285
- times_queue : Queue [WorkerProcessRequestTime ],
284
+ requests_queue : Queue [WorkerProcessRequest [RequestT , ResponseT ]],
286
285
run_info : SchedulerRunInfo ,
287
286
) -> Optional [Iterator [Any ]]:
288
287
if requests_iter is not None :
289
288
try :
290
289
added_count = 0
291
290
291
+ if time .time () >= run_info .end_time :
292
+ raise StopIteration
293
+
292
294
while (
293
295
not requests_queue .full ()
294
296
and added_count < settings .max_add_requests_per_loop
@@ -297,23 +299,16 @@ def _add_requests(
297
299
raise StopIteration
298
300
299
301
session = next (requests_iter )
300
- requests_queue .put (session )
301
- for _ in range (len (session )):
302
- if (
303
- request_time := next (times_iter )
304
- ) >= run_info .end_time or time .time () >= run_info .end_time :
305
- raise StopIteration
306
-
307
- work_req = WorkerProcessRequestTime (
308
- start_time = request_time ,
309
- timeout_time = run_info .end_time ,
310
- queued_time = time .time (),
311
- )
312
- times_queue .put (work_req )
313
-
314
- run_info .created_requests += 1
315
- run_info .queued_requests += 1
316
- added_count += 1
302
+ work_req = WorkerProcessRequest (
303
+ session = session ,
304
+ timeout_time = run_info .end_time ,
305
+ queued_time = time .time (),
306
+ )
307
+ requests_queue .put (work_req )
308
+
309
+ run_info .created_requests += len (session )
310
+ run_info .queued_requests += len (session )
311
+ added_count += len (session )
317
312
except StopIteration :
318
313
# we've reached the limit number, limit time, or exhausted the requests
319
314
# set to None to stop adding more and tell the loop no more requests
0 commit comments