Skip to content

Commit 5b853dd

Browse files
Small plugin bugfix
1 parent e0afb89 commit 5b853dd

File tree

2 files changed

+22
-1
lines changed

2 files changed

+22
-1
lines changed

pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/requestcontrol_hooks.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,10 @@ func (s *SLOAwareRouter) deleteSLOContextForRequest(request *schedulingtypes.LLM
107107

108108
func (t *SLOAwareRouter) PreRequest(ctx context.Context, request *schedulingtypes.LLMRequest, schedulingResult *schedulingtypes.SchedulingResult) {
109109
logger := log.FromContext(ctx)
110+
if request == nil {
111+
logger.V(logutil.DEBUG).Info("SLOAwareRouter.PreRequest: request is nil, skipping")
112+
return
113+
}
110114

111115
if schedulingResult == nil || len(schedulingResult.ProfileResults) == 0 {
112116
logger.V(logutil.TRACE).Info("SLOAwareRouter: Skipping PreRequest because no scheduling result was provided.")
@@ -157,6 +161,10 @@ func (t *SLOAwareRouter) PreRequest(ctx context.Context, request *schedulingtype
157161

158162
func (t *SLOAwareRouter) ResponseReceived(ctx context.Context, request *schedulingtypes.LLMRequest, response *requestcontrol.Response, targetPod *backend.Pod) {
159163
logger := log.FromContext(ctx)
164+
if request == nil {
165+
logger.V(logutil.DEBUG).Info("SLOAwareRouter.ResponseReceived: request is nil, skipping")
166+
return
167+
}
160168
if !t.checkPredictor(logger, targetPod) {
161169
return
162170
}
@@ -177,6 +185,10 @@ func (t *SLOAwareRouter) ResponseReceived(ctx context.Context, request *scheduli
177185

178186
func (t *SLOAwareRouter) ResponseStreaming(ctx context.Context, request *schedulingtypes.LLMRequest, response *requestcontrol.Response, pod *backend.Pod) {
179187
logger := log.FromContext(ctx)
188+
if request == nil {
189+
logger.V(logutil.DEBUG).Info("SLOAwareRouter.ResponseStreaming: request is nil, skipping")
190+
return
191+
}
180192
if !t.checkPredictor(logger, pod) || response.EndOfStream {
181193
return
182194
}
@@ -199,6 +211,10 @@ func (t *SLOAwareRouter) ResponseStreaming(ctx context.Context, request *schedul
199211

200212
func (t *SLOAwareRouter) ResponseComplete(ctx context.Context, request *schedulingtypes.LLMRequest, response *requestcontrol.Response, pod *backend.Pod) {
201213
logger := log.FromContext(ctx)
214+
if request == nil {
215+
logger.V(logutil.DEBUG).Info("SLOAwareRouter.ResponseComplete: request is nil, skipping")
216+
return
217+
}
202218
targetPod := pod
203219
if !t.checkPredictor(logger, targetPod) {
204220
return

pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/scorer.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,11 @@ func (s *SLOAwareRouter) Score(ctx context.Context, state *schedulingtypes.Cycle
230230

231231
s.parseSLOHeaders(ctx, request, sloCtx)
232232

233+
for _, pod := range pods {
234+
prefixCacheScore := s.getPrefixCacheScoreForPod(ctx, state, pod)
235+
sloCtx.prefixCacheScoresForPods[pod.GetPod().String()] = prefixCacheScore
236+
}
237+
233238
// Check if SLOs are provided
234239
if !sloCtx.predictorBasedScheduling {
235240
logger.V(logutil.DEBUG).Info("PredictorBasedScheduling turned off, skipping prediction-based filtering")
@@ -258,7 +263,7 @@ func (s *SLOAwareRouter) Score(ctx context.Context, state *schedulingtypes.Cycle
258263
allPreds, sticky := s.epsilonGreedyAffinityGate(ctx, allPreds, r, "overall", AffinityGateTauGlobal)
259264

260265
// Check if all pods are invalid and all have running requests
261-
allPodsInvalid := true
266+
allPodsInvalid := (sloCtx.ttftSLO > 0 && sloCtx.avgTPOTSLO > 0)
262267
allPodsHaveRunningRequests := true
263268

264269
for _, pred := range allPreds {

0 commit comments

Comments
 (0)