Skip to content

Commit 7857caf

Browse files
committed
feat: amd64 tls support
1 parent 06f9bee commit 7857caf

26 files changed

+474
-158
lines changed

lib/linux_amd64/crt1.o

-32 Bytes
Binary file not shown.

runtime/aco/acosw.S

+7-9
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,8 @@ assist_preempt_yield:
107107
push rbp
108108
mov rbp, rsp
109109
pushfq
110-
sub rsp, 368
110+
sub rsp, 376 // 376 + ret_addr + rbp + pushfd = 400, 16 对齐
111+
111112
nop
112113
mov QWORD PTR [rsp], rax
113114
mov QWORD PTR [rsp + 8], rcx
@@ -179,17 +180,14 @@ assist_preempt_yield:
179180
mov rdx, [rsp + 16]
180181
mov rcx, [rsp + 8]
181182
mov rax, [rsp]
182-
add rsp, 368
183-
popfq // 从栈顶弹出一个值到 RFLAGS 寄存器中,与之前的 pushfq 对应
184183

185-
pop rbp // 从栈顶弹出一个值到rbp寄存器中, 与之前的 push rbp dvyk
184+
add rsp, 376
186185

187-
// thread_handle_sig 部分预留了一些栈空间,现在需要还原成与被抢占的函数一致
188-
// 但是此时栈空间还没有被污染,依旧可以从原空间中找到需要返回的 rip
189-
// add rsp, 1032
190-
// jmp [rsp - 1032]
191-
// ret 会跳转到 rbp 向前一个地址部分
186+
popfq // 从栈顶弹出一个值到 RFLAGS 寄存器中,与之前的 pushfq 对应
192187

188+
pop rbp // 从栈顶弹出一个值到rbp寄存器中, 与之前的 push rbp dvyk
189+
190+
ret
193191

194192
#elif defined(__ARM64)
195193
// 保存所有通用寄存器

runtime/memory.c

+3
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ int64_t allocated_bytes = 0; // 当前分配的内存空间
99
uint64_t next_gc_bytes = 0; // 下一次 gc 的内存量
1010
bool gc_barrier; // gc 屏障开启标识
1111

12+
_Thread_local __attribute__((tls_model("local-exec"))) int64_t tls_yield_safepoint3 = true; // gc 全局 safepoint 标识,通常配合 stw 使用
13+
_Thread_local __attribute__((tls_model("local-exec"))) int64_t tls_yield_safepoint4 = false; // gc 全局 safepoint 标识,通常配合 stw 使用
14+
1215
uint8_t gc_stage; // gc 阶段
1316
mutex_t gc_stage_locker;
1417

runtime/memory.h

+4
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@ extern bool gc_barrier; // gc 屏障开启标识
2222
extern uint8_t gc_stage; // gc 阶段
2323
extern mutex_t gc_stage_locker;
2424

25+
// TODO
26+
extern _Thread_local __attribute__((tls_model("local-exec"))) int64_t tls_yield_safepoint3; // gc 全局 safepoint 标识,通常配合 stw 使用
27+
extern _Thread_local __attribute__((tls_model("local-exec"))) int64_t tls_yield_safepoint4; // gc 全局 safepoint 标识,通常配合 stw 使用
28+
2529
typedef enum {
2630
GC_STAGE_OFF, // 0 表示 gc 关闭, 这也是一个初始状态
2731
GC_STAGE_START,

runtime/processor.c

+16-11
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ rt_linked_fixalloc_t global_gc_worklist;
2424
uv_key_t tls_processor_key = 0;
2525
uv_key_t tls_coroutine_key = 0;
2626

27+
_Thread_local __attribute__((tls_model("local-exec"))) int64_t tls_yield_safepoint2 = false;
2728
_Thread_local __attribute__((tls_model("local-exec"))) int64_t tls_yield_safepoint = false;
2829

2930
fixalloc_t coroutine_alloc;
@@ -112,12 +113,12 @@ NO_OPTIMIZE static void thread_handle_sig(int sig, siginfo_t *info, void *uconte
112113
if (fn) {
113114
// 基于当前 rsp scan
114115
uint64_t sp_addr = (uint64_t) rsp;
115-
co->scan_ret_addr = rip;
116-
co->scan_offset = (uint64_t) co->p->share_stack.align_retptr - sp_addr;
116+
// co->scan_ret_addr = rip;
117+
// co->scan_offset = (uint64_t) co->p->share_stack.align_retptr - sp_addr;
117118
} else {
118119
// c 语言段被抢占,采取保守的扫描策略(使用 ret_addr = 0 来识别)
119-
co->scan_ret_addr = 0;
120-
co->scan_offset = (uint64_t) co->p->share_stack.align_retptr - (uint64_t) rsp;
120+
// co->scan_ret_addr = 0;
121+
// co->scan_offset = (uint64_t) co->p->share_stack.align_retptr - (uint64_t) rsp;
121122
}
122123

123124
// 由于被抢占的函数可以会在没有 sub 保留 rsp 的情况下使用 rsp-0x10 这样的空间地址
@@ -196,7 +197,8 @@ static void processor_uv_close(n_processor_t *p) {
196197
int result = uv_loop_close(&p->uv_loop);
197198

198199
if (result != 0) {
199-
DEBUGF("[runtime.processor_uv_close] uv loop close failed, code=%d, msg=%s, p_index=%d", result, uv_strerror(result), p->index);
200+
DEBUGF("[runtime.processor_uv_close] uv loop close failed, code=%d, msg=%s, p_index=%d", result,
201+
uv_strerror(result), p->index);
200202
assert(false && "uv loop close failed");
201203
}
202204

@@ -209,7 +211,8 @@ NO_OPTIMIZE static void coroutine_wrapper() {
209211
n_processor_t *p = processor_get();
210212
assert(p);
211213

212-
DEBUGF("[runtime.coroutine_wrapper] p_index=%d, p_status=%d co=%p, fn=%p main=%d, rt_co=%d", p->index, p->status, co, co->fn, co->main, co->flag & FLAG(CO_FLAG_RTFN));
214+
DEBUGF("[runtime.coroutine_wrapper] p_index=%d, p_status=%d co=%p, fn=%p main=%d, rt_co=%d", p->index, p->status,
215+
co, co->fn, co->main, co->flag & FLAG(CO_FLAG_RTFN));
213216

214217
co_set_status(p, co, CO_STATUS_RUNNING);
215218
processor_set_status(p, P_STATUS_RUNNING);
@@ -438,13 +441,14 @@ static void processor_run(void *raw) {
438441
// TRACEF("[runtime.processor_run] handle, p_index_%d=%d", p->share, p->index);
439442
// - stw
440443
if (p->need_stw > 0) {
441-
STW_WAIT:
442-
TDEBUGF("[runtime.processor_run] need stw, set safe_point=need_stw(%lu), p_index=%d", p->need_stw, p->index);
444+
STW_WAIT:
445+
TDEBUGF("[runtime.processor_run] need stw, set safe_point=need_stw(%lu), p_index=%d", p->need_stw, p->index);
443446
p->in_stw = p->need_stw;
444447

445448
// runtime_gc 线程会解除 safe 状态,所以这里一直等待即可
446449
while (processor_need_stw(p)) {
447-
TRACEF("[runtime.processor_run] p_index=%d, need_stw=%lu, safe_point=%lu stw loop....", p->index, p->need_stw, p->in_stw);
450+
TRACEF("[runtime.processor_run] p_index=%d, need_stw=%lu, safe_point=%lu stw loop....", p->index,
451+
p->need_stw, p->in_stw);
448452
usleep(WAIT_BRIEF_TIME * 1000); // 1ms
449453
}
450454

@@ -465,7 +469,8 @@ static void processor_run(void *raw) {
465469
coroutine_t *co = rt_linked_fixalloc_pop(&p->runnable_list);
466470
assert(co);
467471

468-
RDEBUGF("[runtime.processor_run] will handle coroutine, p_index=%d, co=%p, status=%d", p->index, co, co->status);
472+
RDEBUGF("[runtime.processor_run] will handle coroutine, p_index=%d, co=%p, status=%d", p->index, co,
473+
co->status);
469474

470475
coroutine_resume(p, co);
471476
run_count++;
@@ -500,7 +505,7 @@ static void processor_run(void *raw) {
500505
io_run(p, WAIT_BRIEF_TIME * 5);
501506
}
502507

503-
EXIT:
508+
EXIT:
504509
processor_uv_close(p);
505510
p->thread_id = 0;
506511
processor_set_status(p, P_STATUS_EXIT);

runtime/processor.h

+1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ extern int64_t coroutine_count;
2424
extern uv_key_t tls_processor_key;
2525
extern uv_key_t tls_coroutine_key;
2626

27+
extern _Thread_local __attribute__((tls_model("local-exec"))) int64_t tls_yield_safepoint2; // gc 全局 safepoint 标识,通常配合 stw 使用
2728
extern _Thread_local __attribute__((tls_model("local-exec"))) int64_t tls_yield_safepoint; // gc 全局 safepoint 标识,通常配合 stw 使用
2829

2930
// processor gc_finished 后新产生的 shade ptr 会存入到该全局工作队列中,在 gc_mark_done 阶段进行单线程处理

runtime/runtime.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ int runtime_main(int argc, char *argv[]) __asm("main");
6868
({ \
6969
uint64_t _rbp_value; \
7070
__asm__ volatile("mov %%rbp, %0" : "=r"(_rbp_value)); \
71-
fetch_addr_value(rbp_value + POINTER_SIZE); \
71+
fetch_addr_value(_rbp_value + POINTER_SIZE); \
7272
});
7373
#elif __ARM64
7474
#define CALLER_RET_ADDR(_co) \

src/binary/arch/amd64.h

+69-20
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,17 @@ static inline void amd64_rewrite_rip_symbol(amd64_asm_operand_t *operand) {
162162
operand->value = r;
163163
}
164164

165+
static inline void amd64_rewrite_fs_offset_symbol(amd64_asm_operand_t *operand) {
166+
operand->type = AMD64_ASM_OPERAND_TYPE_SEG_OFFSET;
167+
operand->size = operand->size;
168+
169+
asm_seg_offset_t *s = NEW(asm_seg_offset_t);
170+
s->name = "fs";
171+
s->offset = 0;
172+
operand->value = s;
173+
}
174+
175+
165176
static inline amd64_asm_operand_t *extract_symbol_operand(amd64_asm_inst_t *operation) {
166177
for (int i = 0; i < operation->count; ++i) {
167178
amd64_asm_operand_t *operand = operation->operands[i];
@@ -192,11 +203,13 @@ static inline int amd64_gotplt_entry_type(uint64_t relocate_type) {
192203
case R_X86_64_JUMP_SLOT:
193204
case R_X86_64_COPY:
194205
case R_X86_64_RELATIVE:
206+
case R_X86_64_TPOFF32:
207+
case R_X86_64_TPOFF64:
195208
return NO_GOTPLT_ENTRY;
196209

197-
/* The following relocs wouldn't normally need GOT or PLT
198-
slots, but we need them for simplicity in the link
199-
editor part. See our caller for comments. */
210+
/* The following relocs wouldn't normally need GOT or PLT
211+
slots, but we need them for simplicity in the link
212+
editor part. See our caller for comments. */
200213
case R_X86_64_32:
201214
case R_X86_64_32S:
202215
case R_X86_64_64:
@@ -217,9 +230,9 @@ static inline int amd64_gotplt_entry_type(uint64_t relocate_type) {
217230
case R_X86_64_TLSGD:
218231
case R_X86_64_TLSLD:
219232
case R_X86_64_DTPOFF32:
220-
case R_X86_64_TPOFF32:
233+
// case R_X86_64_TPOFF32:
221234
case R_X86_64_DTPOFF64:
222-
case R_X86_64_TPOFF64:
235+
// case R_X86_64_TPOFF64:
223236
case R_X86_64_REX_GOTPCRELX:
224237
case R_X86_64_PLT32:
225238
case R_X86_64_PLTOFF64:
@@ -277,11 +290,12 @@ static inline int8_t amd64_is_code_relocate(uint64_t relocate_type) {
277290
case R_X86_64_TLSGD:
278291
case R_X86_64_TLSLD:
279292
case R_X86_64_DTPOFF32:
280-
case R_X86_64_TPOFF32:
293+
// case R_X86_64_TPOFF32:
281294
case R_X86_64_DTPOFF64:
282295
case R_X86_64_TPOFF64:
283296
return 0;
284297

298+
// case R_X86_64_TPOFF32:
285299
case R_X86_64_PC32:
286300
case R_X86_64_PC64:
287301
case R_X86_64_PLT32:
@@ -311,7 +325,8 @@ elf_amd64_relocate(elf_context_t *ctx, Elf64_Rela *rel, int type, uint8_t *ptr,
311325
case R_X86_64_PLT32:
312326
/* fallthrough: val already holds the PLT slot address */
313327

314-
plt32pc32: {
328+
plt32pc32:
329+
{
315330
// 相对地址计算,
316331
// addr 保存了符号的使用位置(加载到虚拟内存中的位置)
317332
// val 保存了符号的定义的位置(加载到虚拟内存中的位置)
@@ -323,7 +338,8 @@ elf_amd64_relocate(elf_context_t *ctx, Elf64_Rela *rel, int type, uint8_t *ptr,
323338
}
324339
// 小端写入
325340
add32le(ptr, diff);
326-
} break;
341+
}
342+
break;
327343

328344
case R_X86_64_COPY:
329345
break;
@@ -345,7 +361,7 @@ elf_amd64_relocate(elf_context_t *ctx, Elf64_Rela *rel, int type, uint8_t *ptr,
345361
case R_X86_64_GOTPCRELX:
346362
case R_X86_64_REX_GOTPCRELX:
347363
add32le(ptr, ctx->got->sh_addr - addr +
348-
elf_get_sym_attr(ctx, sym_index, 0)->got_offset - 4);
364+
elf_get_sym_attr(ctx, sym_index, 0)->got_offset - 4);
349365
break;
350366
case R_X86_64_GOTPC32:
351367
add32le(ptr, ctx->got->sh_addr - addr + rel->r_addend);
@@ -420,9 +436,11 @@ elf_amd64_relocate(elf_context_t *ctx, Elf64_Rela *rel, int type, uint8_t *ptr,
420436
case R_X86_64_TPOFF32: {
421437
Elf64_Sym *sym = &((Elf64_Sym *) ctx->symtab_section->data)[sym_index];
422438
section_t *s = SEC_TACK(sym->st_shndx);
423-
int32_t x;
424439

425-
x = val - s->sh_addr - s->data_count;
440+
int32_t x = -val;
441+
log_debug("[elf_amd64_relocate] R_X86_64_TPOFF32, val=%ld, s->sh_addr=%ld, s->data_count=%ld, x=%lx", val,
442+
s->sh_addr, s->data_count, (uint32_t)x);
443+
426444
add32le(ptr, x);
427445
break;
428446
}
@@ -517,6 +535,11 @@ static inline void elf_amd64_operation_encodings(elf_context_t *ctx, slice_t *cl
517535
temp->rel_symbol = symbol_operand->name;
518536
}
519537
} else {
538+
int st_type = STT_OBJECT;
539+
if (symbol_operand->is_tls) {
540+
st_type = STT_TLS;
541+
}
542+
520543
// 其他指令(可能是 mov 等,对数据段符号的引用)引用了符号,由于不用考虑指令重写的问题,所以直接写入 0(%rip),让重定位阶段去找改符号进行重定位即可
521544
// 完全不用考虑是标签符号还是数据符号
522545
// 添加到重定位表(.rela.text)
@@ -527,15 +550,21 @@ static inline void elf_amd64_operation_encodings(elf_context_t *ctx, slice_t *cl
527550
Elf64_Sym sym = {
528551
.st_shndx = 0,
529552
.st_size = 0,
530-
.st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC),
553+
.st_info = ELF64_ST_INFO(STB_GLOBAL, st_type),
531554
.st_other = 0,
532555
.st_value = 0,
533556
};
534557
sym_index = elf_put_sym(ctx->symtab_section, ctx->symtab_hash, &sym, symbol_operand->name);
535558
}
536559

537-
// rewrite symbol TODO 可能有其他的重定位方式
538-
amd64_rewrite_rip_symbol(rel_operand);
560+
int reloc_type = 0;
561+
if (symbol_operand->is_tls) {
562+
amd64_rewrite_fs_offset_symbol(rel_operand);
563+
reloc_type = R_X86_64_TPOFF32;
564+
} else {
565+
amd64_rewrite_rip_symbol(rel_operand);
566+
reloc_type = R_X86_64_PC32;
567+
}
539568

540569
// 编码
541570
temp->inst = amd64_asm_inst_encoding(*operation, temp->data, &temp->data_count, c);
@@ -544,12 +573,18 @@ static inline void elf_amd64_operation_encodings(elf_context_t *ctx, slice_t *cl
544573

545574
// 将符号和 sym_index 关联,rel 记录了符号的使用位置, sym_index 记录的符号的信息(包括 linker 完成后的绝对虚拟地址)
546575
// 计算重定位的起点信息
576+
// rip_offset 和 seg_offset 都是最后 4 个字节
547577
uint64_t rel_offset = *temp->offset + rip_offset(temp->data_count, temp->operation);
548-
int64_t addend = (int64_t) (*temp->offset + temp->data_count) - (int64_t) rel_offset;
578+
int64_t addend = 0;
579+
if (symbol_operand->is_tls) {
580+
addend = 0;
581+
} else {
582+
addend = (int64_t) (*temp->offset + temp->data_count) - (int64_t) rel_offset;
583+
}
549584

550585
// addend = 下一条指令的起始位置 - rel_offset
551586
temp->rel = elf_put_relocate(ctx, ctx->symtab_section, ctx->text_section,
552-
rel_offset, R_X86_64_PC32, (int) sym_index, -addend);
587+
rel_offset, reloc_type, (int) sym_index, -addend);
553588

554589
continue;
555590
}
@@ -679,7 +714,9 @@ static void mach_amd64_operation_encodings(mach_context_t *ctx, slice_t *closure
679714
if (!s->is_local) {
680715
n_type |= N_EXT;
681716
}
682-
uint64_t sym_index = mach_put_sym(ctx->symtab_command, &(struct nlist_64){.n_sect = ctx->text_section->sh_index, .n_value = *temp->offset, .n_type = n_type}, s->name);
717+
uint64_t sym_index = mach_put_sym(ctx->symtab_command,
718+
&(struct nlist_64) {.n_sect = ctx->text_section->sh_index, .n_value = *temp->offset, .n_type = n_type},
719+
s->name);
683720
temp->sym_index = sym_index;
684721

685722
assert(s->name);
@@ -722,7 +759,11 @@ static void mach_amd64_operation_encodings(mach_context_t *ctx, slice_t *closure
722759
uint64_t sym_index = (uint64_t) table_get(symtab_hash, symbol_operand->name);
723760
if (sym_index == 0) {
724761
// 可重定位符号注册
725-
sym_index = mach_put_sym(ctx->symtab_command, &(struct nlist_64){.n_sect = NO_SECT, .n_value = 0, .n_type = N_UNDF | N_EXT}, symbol_operand->name);
762+
sym_index = mach_put_sym(ctx->symtab_command, &(struct nlist_64) {
763+
.n_sect = NO_SECT,
764+
.n_value = 0,
765+
.n_type = N_UNDF | N_EXT
766+
}, symbol_operand->name);
726767
}
727768

728769
// rewrite symbol
@@ -738,8 +779,14 @@ static void mach_amd64_operation_encodings(mach_context_t *ctx, slice_t *closure
738779
uint64_t rel_offset = *temp->offset + rip_offset(temp->data_count, temp->operation);
739780
int64_t addend = (int64_t) (*temp->offset + temp->data_count) - (int64_t) rel_offset; // 下一条指令的其实位置
740781

782+
bool is_tls = symbol_operand->is_tls;
783+
int reloc_type = X86_64_RELOC_BRANCH;
784+
if (is_tls) {
785+
reloc_type = X86_64_RELOC_TLV;
786+
}
787+
741788
// addend = 下一条指令的起始位置 - rel_offset, 这是一条 branch 类型的数据
742-
temp->rel = mach_put_relocate(ctx, ctx->text_section, rel_offset, X86_64_RELOC_BRANCH, sym_index);
789+
temp->rel = mach_put_relocate(ctx, ctx->text_section, rel_offset, reloc_type, sym_index);
743790
continue;
744791
}
745792
}
@@ -786,7 +833,9 @@ static void mach_amd64_operation_encodings(mach_context_t *ctx, slice_t *closure
786833
if (sym_index == 0) {
787834
// 如果遍历没有找到符号则会添加一条 UND 符号信息到符号表中
788835
// 10: 0000000000000000 0 FUNC GLOBAL DEFAULT UND string_new
789-
sym_index = mach_put_sym(ctx->symtab_command, &(struct nlist_64){.n_sect = NO_SECT, .n_value = 0, .n_type = N_UNDF | N_EXT}, temp->rel_symbol);
836+
sym_index = mach_put_sym(ctx->symtab_command,
837+
&(struct nlist_64) {.n_sect = NO_SECT, .n_value = 0, .n_type = N_UNDF | N_EXT},
838+
temp->rel_symbol);
790839
}
791840

792841
// sym->st_value 表示符号定义的位置,基于符号所在的 section(.section)

0 commit comments

Comments
 (0)