Skip to content

Commit 475b560

Browse files
committed
Improve dsq types
1 parent 0d92efb commit 475b560

File tree

3 files changed

+125
-97
lines changed

3 files changed

+125
-97
lines changed

scheds/rust/scx_mitosis/src/bpf/dsq.bpf.h

Lines changed: 95 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* This software may be used and distributed according to the terms of the
44
* GNU General Public License version 2.
55
*
6-
* This header defines the 32-bit dispatch queue (DSQ) ID encoding
6+
* This header defines the 64-bit dispatch queue (DSQ) ID encoding
77
* scheme for scx_mitosis, using type fields to distinguish between
88
* per-CPU and cell+L3 domain queues. It includes helper functions to
99
* construct, validate, and parse these DSQ IDs for queue management.
@@ -37,94 +37,134 @@
3737
* Only the low 32 bits are used.
3838
*
3939
* [63 .. 32] [31..0]
40-
* [ 0s or unused ] [ VAL ]
40+
* [ 0][ unused ] [ VAL ]
4141
*
4242
* Mitosis uses VAL as follows:
4343
*
44-
* [31..24] [23..0]
44+
* [31..28] [27..0]
4545
* [QTYPE ] [DATA ]
4646
*
47-
* QTYPE encodes the queue type (exactly one bit set):
47+
* QTYPE encodes the queue type:
4848
*
4949
* QTYPE = 0x1 -> Per-CPU Q
50-
* [31 .. 24] [23 .. 16] [15 .. 0]
51-
* [00000001] [00000000] [ CPU# ]
50+
* [31..28] [27 .. .. 0]
51+
* [ 0001 ] [ CPU# ]
5252
* [Q-TYPE:1]
5353
*
5454
* QTYPE = 0x2 -> Cell+L3 Q
55-
* [31 .. 24] [23 .. 16] [15 .. 0]
56-
* [00000010] [ CELL# ] [ L3ID ]
55+
* [31..28] [27 .. 16] [15 .. 0]
56+
* [ 0010 ] [ CELL# ] [ L3ID ]
5757
* [Q-TYPE:2]
5858
*
5959
*/
6060

61+
#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
62+
#error "dsq64 bitfield layout assumes little-endian (bpfel)."
63+
#endif
64+
65+
/* ---- Bitfield widths (bits) ---- */
66+
#define CPU_B 28
67+
#define L3_B 16
68+
#define CELL_B 12
69+
#define TYPE_B 4
70+
#define DATA_B 28
71+
#define RSVD_B 32
72+
73+
/* Sum checks (in bits) */
74+
_Static_assert(CPU_B + TYPE_B == 32, "CPU layout low half must be 32 bits");
75+
_Static_assert(L3_B + CELL_B + TYPE_B == 32, "CELL+L3 layout low half must be 32 bits");
76+
_Static_assert(DATA_B + TYPE_B == 32, "Common layout low half must be 32 bits");
77+
78+
typedef union {
79+
u64 raw;
80+
81+
/* Per-CPU user DSQ */
82+
struct { u64 cpu: CPU_B; u64 type: TYPE_B; u64 rsvd: RSVD_B; } cpu_dsq;
83+
84+
/* Cell+L3 user DSQ */
85+
struct { u64 l3: L3_B; u64 cell: CELL_B; u64 type: TYPE_B; u64 rsvd: RSVD_B; } cell_l3_dsq;
86+
87+
/* Generic user view */
88+
struct { u64 data: DATA_B; u64 type: TYPE_B; u64 rsvd: RSVD_B; } user_dsq;
89+
90+
/* Built-in DSQ view */
91+
struct { u64 value:32; u64 rsvd:30; u64 local_on:1; u64 builtin:1; } builtin_dsq;
92+
93+
/*
94+
* NOTE: There's an argument for removing both these attributes.
95+
* Packed is making sure the compiler doesn't add padding between
96+
* the fields, but it has the side effect of reducing alignment to 1.
97+
* Right now this is redundant because the bit fields are correctly
98+
* allocated to 64 bits so it's 8-byte aligned. Maybe it's defensive,
99+
* or maybe it's just noise.
100+
*/
101+
} __attribute__((packed, aligned(8))) dsq_id_t;
102+
103+
/*
104+
* Invalid DSQ ID Sentinel:
105+
* invalid bc bit 63 clear (it's a user DSQ) && dsq_type == 0 (no type)
106+
* Good for catching uninitialized DSQ IDs.
107+
*/
108+
#define DSQ_INVALID ((dsq_id_t){ .raw = 0 })
109+
110+
_Static_assert(sizeof(((dsq_id_t){0}).cpu_dsq) == sizeof(u64), "cpu view must be 8 bytes");
111+
_Static_assert(sizeof(((dsq_id_t){0}).cell_l3_dsq) == sizeof(u64), "cell+l3 view must be 8 bytes");
112+
_Static_assert(sizeof(((dsq_id_t){0}).user_dsq) == sizeof(u64), "user common view must be 8 bytes");
113+
_Static_assert(sizeof(((dsq_id_t){0}).builtin_dsq) == sizeof(u64), "builtin view must be 8 bytes");
114+
115+
/* Compile-time checks (in bytes) */
116+
_Static_assert(sizeof(dsq_id_t) == sizeof(u64), "dsq64 must be 8 bytes (64 bits)");
117+
_Static_assert(_Alignof(dsq_id_t) == 8, "dsq64 must be 8-byte aligned");
118+
119+
/* Range guards */
120+
_Static_assert(MAX_CPUS <= (1u << CPU_B), "MAX_CPUS must fit in field");
121+
_Static_assert(MAX_L3S <= (1u << L3_B), "MAX_L3S must fit in field");
122+
_Static_assert(MAX_CELLS <= (1u << CELL_B), "MAX_CELLS must fit in field");
123+
61124
/* DSQ type enumeration */
62125
enum dsq_type {
63-
DSQ_UNKNOWN,
126+
DSQ_TYPE_NONE,
64127
DSQ_TYPE_CPU,
65128
DSQ_TYPE_CELL_L3,
66129
};
67130

68-
/* DSQ ID structure using unions for type-safe access */
69-
struct dsq_cpu {
70-
u32 cpu : 16;
71-
u32 unused : 8;
72-
u32 type : 8;
73-
} __attribute__((packed));
74-
75-
struct dsq_cell_l3 {
76-
u32 l3 : 16;
77-
u32 cell : 8;
78-
u32 type : 8;
79-
} __attribute__((packed));
80-
81-
union dsq_id {
82-
u32 raw;
83-
struct dsq_cpu cpu;
84-
struct dsq_cell_l3 cell_l3;
85-
struct {
86-
u32 data : 24;
87-
u32 type : 8;
88-
} common;
89-
} __attribute__((packed));
90-
91-
/* Static assertions to ensure correct sizes */
92-
/* Verify that all DSQ structures are exactly 32 bits */
93-
_Static_assert(sizeof(struct dsq_cpu) == 4, "dsq_cpu must be 32 bits");
94-
_Static_assert(sizeof(struct dsq_cell_l3) == 4, "dsq_cell_l3 must be 32 bits");
95-
_Static_assert(sizeof(union dsq_id) == 4, "dsq_id union must be 32 bits");
96-
97-
/* Inline helper functions for DSQ ID manipulation */
131+
/*
132+
* While I considered error propagation, I decided to bail to force errors early.
133+
*/
134+
135+
static inline bool is_user_dsq(dsq_id_t dsq_id){
136+
return !dsq_id.builtin_dsq.builtin;
137+
}
98138

99139
// Is this a per CPU DSQ?
100-
static inline bool is_cpu_dsq(u32 dsq_id)
140+
static inline bool is_cpu_dsq(dsq_id_t dsq_id)
101141
{
102-
union dsq_id id = { .raw = dsq_id };
103-
return id.common.type == DSQ_TYPE_CPU;
142+
return is_user_dsq(dsq_id) && dsq_id.user_dsq.type == DSQ_TYPE_CPU;
104143
}
105144

106145
// If this is a per cpu dsq, return the cpu
107-
static inline u32 get_cpu_from_dsq(u32 dsq_id)
146+
static inline u32 get_cpu_from_dsq(dsq_id_t dsq_id)
108147
{
109-
union dsq_id id = { .raw = dsq_id };
110-
if (id.common.type != DSQ_TYPE_CPU)
111-
return DSQ_ERROR;
112-
return id.cpu.cpu;
148+
if (!is_cpu_dsq(dsq_id))
149+
scx_bpf_error("trying to get cpu from non-cpu dsq\n");
150+
151+
return dsq_id.cpu_dsq.cpu;
113152
}
114153

115154
/* Helper functions to construct DSQ IDs */
116-
static inline u32 get_cpu_dsq_id(u32 cpu)
155+
static inline dsq_id_t get_cpu_dsq_id(u32 cpu)
117156
{
157+
// Check for valid CPU range, 0 indexed so >=.
118158
if (cpu >= MAX_CPUS)
119-
return DSQ_ERROR;
120-
union dsq_id id = { .cpu = { .cpu = cpu, .unused = 0, .type = DSQ_TYPE_CPU } };
121-
return id.raw;
159+
scx_bpf_error("invalid cpu %d\n", cpu);
160+
161+
return (dsq_id_t){.cpu_dsq = {.cpu = cpu, .type = DSQ_TYPE_CPU}};
122162
}
123163

124-
static inline u32 get_cell_l3_dsq_id(u32 cell, u32 l3)
164+
static inline dsq_id_t get_cell_l3_dsq_id(u32 cell, u32 l3)
125165
{
126166
if (cell >= MAX_CELLS || l3 >= MAX_L3S)
127-
return DSQ_ERROR;
128-
union dsq_id id = { .cell_l3 = {.l3 = l3, .cell = cell, .type = DSQ_TYPE_CELL_L3 } };
129-
return id.raw;
167+
scx_bpf_error("cell %d or l3 %d too large\n", cell, l3);
168+
169+
return (dsq_id_t) { .cell_l3_dsq = { .l3 = l3, .cell = cell, .type = DSQ_TYPE_CELL_L3 } };
130170
}

scheds/rust/scx_mitosis/src/bpf/mitosis.bpf.c

Lines changed: 26 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,6 @@ static inline struct cgroup *task_cgroup(struct task_struct *p)
118118
return cgrp;
119119
}
120120

121-
122121
struct {
123122
__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
124123
__uint(map_flags, BPF_F_NO_PREALLOC);
@@ -347,8 +346,6 @@ static inline int update_task_cpumask(struct task_struct *p,
347346

348347
/* --- Point to the correct (cell,L3) DSQ and set vtime baseline --- */
349348
tctx->dsq = get_cell_l3_dsq_id(tctx->cell, tctx->l3);
350-
if (tctx->dsq == DSQ_ERROR)
351-
return -EINVAL;
352349

353350
struct cell *cell = lookup_cell(tctx->cell);
354351
if (!cell)
@@ -606,7 +603,7 @@ void BPF_STRUCT_OPS(mitosis_enqueue, struct task_struct *p, u64 enq_flags)
606603
if (time_before(vtime, basis_vtime - slice_ns))
607604
vtime = basis_vtime - slice_ns;
608605

609-
scx_bpf_dsq_insert_vtime(p, tctx->dsq, slice_ns, vtime, enq_flags);
606+
scx_bpf_dsq_insert_vtime(p, tctx->dsq.raw, slice_ns, vtime, enq_flags);
610607

611608
/* Kick the CPU if needed */
612609
if (!__COMPAT_is_enq_cpu_selected(enq_flags) && cpu >= 0)
@@ -626,14 +623,10 @@ void BPF_STRUCT_OPS(mitosis_dispatch, s32 cpu, struct task_struct *prev)
626623
cell = READ_ONCE(cctx->cell);
627624

628625
/* Start from a valid DSQ */
629-
u64 local_dsq = get_cpu_dsq_id(cpu);
630-
if (!local_dsq) {
631-
scx_bpf_error("get_cpu_dsq_id(%d) returned 0", cpu);
632-
return;
633-
}
626+
dsq_id_t local_dsq = get_cpu_dsq_id(cpu);
634627

635628
bool found = false;
636-
u64 min_vtime_dsq = local_dsq;
629+
dsq_id_t min_vtime_dsq = local_dsq;
637630
u64 min_vtime = ~0ULL; /* U64_MAX */
638631
struct task_struct *p;
639632

@@ -644,19 +637,17 @@ void BPF_STRUCT_OPS(mitosis_dispatch, s32 cpu, struct task_struct *prev)
644637

645638
/* Check the L3 queue */
646639
if (l3 != INVALID_L3_ID) {
647-
u64 cell_l3_dsq = get_cell_l3_dsq_id(cell, l3);
648-
if (cell_l3_dsq != DSQ_ERROR) {
649-
bpf_for_each(scx_dsq, p, cell_l3_dsq, 0) {
650-
min_vtime = p->scx.dsq_vtime;
651-
min_vtime_dsq = cell_l3_dsq;
652-
found = true;
653-
break;
654-
}
640+
dsq_id_t cell_l3_dsq = get_cell_l3_dsq_id(cell, l3);
641+
bpf_for_each(scx_dsq, p, cell_l3_dsq.raw, 0) {
642+
min_vtime = p->scx.dsq_vtime;
643+
min_vtime_dsq = cell_l3_dsq;
644+
found = true;
645+
break;
655646
}
656647
}
657648

658649
/* Check the CPU DSQ for a lower vtime */
659-
bpf_for_each(scx_dsq, p, local_dsq, 0) {
650+
bpf_for_each(scx_dsq, p, local_dsq.raw, 0) {
660651
if (!found || time_before(p->scx.dsq_vtime, min_vtime)) {
661652
min_vtime = p->scx.dsq_vtime;
662653
min_vtime_dsq = local_dsq;
@@ -672,7 +663,7 @@ void BPF_STRUCT_OPS(mitosis_dispatch, s32 cpu, struct task_struct *prev)
672663
*/
673664
// TODO: The upstream side has "&& min_vtime_dsq != dsq" as part of this condition.
674665
// Do we care?
675-
if (!scx_bpf_dsq_move_to_local(min_vtime_dsq)) {
666+
if (!scx_bpf_dsq_move_to_local(min_vtime_dsq.raw)) {
676667
#if MITOSIS_ENABLE_STEALING
677668
/* Dead-simple work stealing:
678669
* If our local choices are empty, scan sibling (cell,L3) DSQs in the
@@ -684,19 +675,18 @@ void BPF_STRUCT_OPS(mitosis_dispatch, s32 cpu, struct task_struct *prev)
684675
// TODO: This math is kinda dumb and confusing.
685676
u32 start = ((u32)l3 + 1) % nr_l3;
686677
u32 off;
678+
// TODO: This might try a bunch of L3s outside of the cell
687679
bpf_for (off, 0, nr_l3) {
688680
u32 cand = (start + off) % nr_l3;
689681
if (cand == (u32)l3)
690682
continue;
691-
u64 src = get_cell_l3_dsq_id(cell, cand);
692-
if (src == DSQ_ERROR)
693-
continue;
683+
dsq_id_t src = get_cell_l3_dsq_id(cell, cand);
694684

695685
struct task_struct *q;
696686
/* Peek only at the head. */
697-
bpf_for_each(scx_dsq, q, src, 0) {
687+
bpf_for_each(scx_dsq, q, src.raw, 0) {
698688
// TODO maybe this should use if (scx_bpf_dsq_move(BPF_FOR_EACH_ITER, q, SCX_DSQ_LOCAL, 0) == 0)
699-
if (scx_bpf_dsq_move_to_local(src)) {
689+
if (scx_bpf_dsq_move_to_local(src.raw)) {
700690
struct task_ctx *qt = lookup_task_ctx(q);
701691
if (qt) {
702692
qt->steal_count++;
@@ -722,7 +712,7 @@ void BPF_STRUCT_OPS(mitosis_dispatch, s32 cpu, struct task_struct *prev)
722712
}
723713
if (!moved)
724714
#endif
725-
scx_bpf_dsq_move_to_local(local_dsq);
715+
scx_bpf_dsq_move_to_local(local_dsq.raw);
726716
}
727717
}
728718

@@ -1149,7 +1139,7 @@ void BPF_STRUCT_OPS(mitosis_running, struct task_struct *p)
11491139
#endif
11501140

11511141
/* Validate task's DSQ before it starts running */
1152-
if (tctx->dsq == 0) {
1142+
if (tctx->dsq.raw == DSQ_INVALID.raw) {
11531143
if (tctx->all_cell_cpus_allowed) {
11541144
scx_bpf_error(
11551145
"Task %d has invalid DSQ 0 in running callback (CELL-SCHEDULABLE task, can run on any CPU in cell %d)",
@@ -1437,13 +1427,13 @@ static __always_inline void dump_cell_state(u32 cell_idx)
14371427
}
14381428
}
14391429

1430+
// TODO: FIX THIS
14401431
static __always_inline void dump_l3_state(){
1441-
14421432
}
14431433

14441434
void BPF_STRUCT_OPS(mitosis_dump, struct scx_dump_ctx *dctx)
14451435
{
1446-
u64 dsq_id;
1436+
dsq_id_t dsq_id;
14471437
int i;
14481438
struct cell *cell;
14491439
struct cpu_ctx *cpu_ctx;
@@ -1472,7 +1462,7 @@ void BPF_STRUCT_OPS(mitosis_dump, struct scx_dump_ctx *dctx)
14721462
dsq_id = get_cpu_dsq_id(i);
14731463
scx_bpf_dump("CPU[%d] cell=%d vtime=%llu nr_queued=%d\n", i,
14741464
cpu_ctx->cell, READ_ONCE(cpu_ctx->vtime_now),
1475-
scx_bpf_dsq_nr_queued(dsq_id));
1465+
scx_bpf_dsq_nr_queued(dsq_id.raw));
14761466
}
14771467

14781468
dump_l3_state();
@@ -1488,9 +1478,9 @@ void BPF_STRUCT_OPS(mitosis_dump_task, struct scx_dump_ctx *dctx,
14881478
return;
14891479

14901480
scx_bpf_dump(
1491-
"Task[%d] vtime=%llu basis_vtime=%llu cell=%u dsq=%x all_cell_cpus_allowed=%d\n",
1481+
"Task[%d] vtime=%llu basis_vtime=%llu cell=%u dsq=%llu all_cell_cpus_allowed=%d\n",
14921482
p->pid, p->scx.dsq_vtime, tctx->basis_vtime, tctx->cell,
1493-
tctx->dsq, tctx->all_cell_cpus_allowed);
1483+
tctx->dsq.raw, tctx->all_cell_cpus_allowed);
14941484
scx_bpf_dump("Task[%d] CPUS=", p->pid);
14951485
dump_cpumask(p->cpus_ptr);
14961486
scx_bpf_dump("\n");
@@ -1522,7 +1512,7 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(mitosis_init)
15221512
if ((u8_ptr = MEMBER_VPTR(all_cpus, [i / 8]))) {
15231513
if (*u8_ptr & (1 << (i % 8))) {
15241514
bpf_cpumask_set_cpu(i, cpumask);
1525-
ret = scx_bpf_create_dsq(get_cpu_dsq_id(i), ANY_NUMA);
1515+
ret = scx_bpf_create_dsq(get_cpu_dsq_id(i).raw, ANY_NUMA);
15261516
if (ret < 0) {
15271517
bpf_cpumask_release(cpumask);
15281518
return ret;
@@ -1584,12 +1574,10 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(mitosis_init)
15841574
u32 l3;
15851575
bpf_for(l3, 0, nr_l3)
15861576
{
1587-
u32 id = get_cell_l3_dsq_id(i, l3);
1588-
if (id == DSQ_ERROR)
1589-
return -EINVAL;
1590-
ret = scx_bpf_create_dsq(id, ANY_NUMA);
1577+
dsq_id_t id = get_cell_l3_dsq_id(i, l3);
1578+
ret = scx_bpf_create_dsq(id.raw, ANY_NUMA);
15911579
if (ret < 0)
1592-
return ret;
1580+
scx_bpf_error( "Failed to create DSQ for cell %d, L3 %d: err %d", i, l3, ret);
15931581
}
15941582
}
15951583

0 commit comments

Comments
 (0)