Skip to content

Commit ceb8ca5

Browse files
author
mani
committed
[YUNIKORN-3139] Decrement 'preemptAttemptsRemaining' counter only when attempt has been made and not for precondition check failures (apache#1035)
Closes: apache#1035 Signed-off-by: mani <[email protected]>
1 parent fde27e5 commit ceb8ca5

File tree

3 files changed

+34
-13
lines changed

3 files changed

+34
-13
lines changed

pkg/common/errors.go

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,10 @@ var (
3535

3636
// Constant messages for AllocationLog entries
3737
const (
38-
PreemptionPreconditionsFailed = "Preemption preconditions failed"
39-
PreemptionDoesNotGuarantee = "Preemption queue guarantees check failed"
40-
PreemptionShortfall = "Preemption helped but short of resources"
41-
PreemptionDoesNotHelp = "Preemption does not help"
42-
NoVictimForRequiredNode = "No fit on required node, preemption does not help"
38+
PreemptionPreconditionsFailed = "Preemption preconditions failed"
39+
PreemptionDoesNotGuarantee = "Preemption queue guarantees check failed"
40+
PreemptionShortfall = "Preemption helped but short of resources"
41+
PreemptionDoesNotHelp = "Preemption does not help"
42+
NoVictimForRequiredNode = "No fit on required node, preemption does not help"
43+
PreemptionMaxAttemptsExhausted = "Preemption max attempts exhausted"
4344
)

pkg/scheduler/objects/application.go

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1013,11 +1013,10 @@ func (sa *Application) tryAllocate(headRoom *resources.Resource, allowPreemption
10131013
// resource must fit in headroom otherwise skip the request (unless preemption could help)
10141014
if !headRoom.FitInMaxUndef(request.GetAllocatedResource()) {
10151015
// attempt preemption
1016-
if allowPreemption && *preemptAttemptsRemaining > 0 {
1017-
*preemptAttemptsRemaining--
1016+
if allowPreemption {
10181017
fullIterator := fullNodeIterator()
10191018
if fullIterator != nil {
1020-
if result, ok := sa.tryPreemption(headRoom, preemptionDelay, request, fullIterator, false); ok {
1019+
if result, ok := sa.tryPreemption(headRoom, preemptionDelay, preemptAttemptsRemaining, request, fullIterator, false); ok {
10211020
// preemption occurred, and possibly reservation
10221021
return result
10231022
}
@@ -1050,11 +1049,10 @@ func (sa *Application) tryAllocate(headRoom *resources.Resource, allowPreemption
10501049
}
10511050

10521051
// no nodes qualify, attempt preemption
1053-
if allowPreemption && *preemptAttemptsRemaining > 0 {
1054-
*preemptAttemptsRemaining--
1052+
if allowPreemption {
10551053
fullIterator := fullNodeIterator()
10561054
if fullIterator != nil {
1057-
if result, ok := sa.tryPreemption(headRoom, preemptionDelay, request, fullIterator, true); ok {
1055+
if result, ok := sa.tryPreemption(headRoom, preemptionDelay, preemptAttemptsRemaining, request, fullIterator, true); ok {
10581056
// preemption occurred, and possibly reservation
10591057
return result
10601058
}
@@ -1364,14 +1362,23 @@ func (sa *Application) tryReservedAllocate(headRoom *resources.Resource, nodeIte
13641362
return nil
13651363
}
13661364

1367-
func (sa *Application) tryPreemption(headRoom *resources.Resource, preemptionDelay time.Duration, ask *Allocation, iterator NodeIterator, nodesTried bool) (*AllocationResult, bool) {
1365+
func (sa *Application) tryPreemption(headRoom *resources.Resource, preemptionDelay time.Duration, preemptionAttemptsRemaining *int, ask *Allocation, iterator NodeIterator, nodesTried bool) (*AllocationResult, bool) {
1366+
if *preemptionAttemptsRemaining == 0 {
1367+
log.Log(log.SchedApplication).Debug("Max queue preemption attempts exhausted",
1368+
zap.String("allocationKey", ask.GetAllocationKey()),
1369+
zap.String("applicationID", sa.ApplicationID),
1370+
zap.String("queue", sa.queuePath))
1371+
ask.LogAllocationFailure(common.PreemptionMaxAttemptsExhausted, true)
1372+
return nil, false
1373+
}
13681374
preemptor := NewPreemptor(sa, headRoom, preemptionDelay, ask, iterator, nodesTried)
13691375

13701376
// validate prerequisites for preemption of an ask and mark ask for preemption if successful
13711377
if !preemptor.CheckPreconditions() {
13721378
ask.LogAllocationFailure(common.PreemptionPreconditionsFailed, true)
13731379
return nil, false
13741380
}
1381+
*preemptionAttemptsRemaining--
13751382

13761383
// track time spent trying preemption
13771384
tryPreemptionStart := time.Now()

pkg/scheduler/objects/application_test.go

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2237,17 +2237,30 @@ func TestTryAllocatePreemptQueue(t *testing.T) {
22372237
alloc2 := result2.Request
22382238
assert.Assert(t, alloc2 != nil, "alloc2 expected")
22392239

2240-
// on first attempt, not enough time has passed
2240+
// preemption max attempts exhausted
2241+
preemptionAttemptsRemaining = 0
22412242
result3 := app2.tryAllocate(resources.NewResourceFromMap(map[string]resources.Quantity{"first": 0}), true, 30*time.Second, &preemptionAttemptsRemaining, iterator, iterator, getNode)
22422243
assert.Assert(t, result3 == nil, "result3 not expected")
22432244
assert.Assert(t, !alloc2.IsPreempted(), "alloc2 should not have been preempted")
2245+
log := ask3.GetAllocationLog()
2246+
assert.Equal(t, log[0].Message, common.PreemptionMaxAttemptsExhausted)
2247+
assert.Equal(t, preemptionAttemptsRemaining, 0)
2248+
2249+
preemptionAttemptsRemaining = 10
2250+
2251+
// on first attempt, not enough time has passed
2252+
result3 = app2.tryAllocate(resources.NewResourceFromMap(map[string]resources.Quantity{"first": 0}), true, 30*time.Second, &preemptionAttemptsRemaining, iterator, iterator, getNode)
2253+
assert.Assert(t, result3 == nil, "result3 not expected")
2254+
assert.Assert(t, !alloc2.IsPreempted(), "alloc2 should not have been preempted")
22442255
assertAllocationLog(t, ask3)
2256+
assert.Equal(t, preemptionAttemptsRemaining, 10)
22452257

22462258
// pass the time and try again
22472259
ask3.createTime = ask3.createTime.Add(-30 * time.Second)
22482260
result3 = app2.tryAllocate(resources.NewResourceFromMap(map[string]resources.Quantity{"first": 0}), true, 30*time.Second, &preemptionAttemptsRemaining, iterator, iterator, getNode)
22492261
assert.Assert(t, result3 != nil && result3.Request != nil && result3.ResultType == Reserved, "alloc3 should be a reservation")
22502262
assert.Assert(t, alloc2.IsPreempted(), "alloc2 should have been preempted")
2263+
assert.Equal(t, preemptionAttemptsRemaining, 9)
22512264
}
22522265

22532266
func TestTryAllocatePreemptNode(t *testing.T) {

0 commit comments

Comments
 (0)