Skip to content

Commit dbe9a51

Browse files
committed
stage2: implement @setAlignStack and 128-bit cmpxchg
* test runner is improved to respect `error.SkipZigTest` * start code is improved to `@setAlignStack(16)` before calling main() * the newly passing behavior test has a workaround for the fact that stage2 cannot yet call `std.Target.x86.featureSetHas()` at comptime. This is blocking on comptime closures. The workaround is that there is a new decl `@import("builtin").stage2_x86_cx16` which is a `bool`. * Implement `@setAlignStack`. This language feature should be re-evaluated at some point - I'll file an issue for it. * LLVM backend: apply/remove the cold attribute and noinline attribute where appropriate. * LLVM backend: loads and stores are properly annotated with alignment and volatile attributes. * LLVM backend: allocas are properly annotated with alignment. * Type: fix integers reporting wrong alignment for 256-bit integers and beyond. Once you get to 16 byte aligned, there is no further alignment for larger integers.
1 parent dc9d76b commit dbe9a51

File tree

12 files changed

+195
-49
lines changed

12 files changed

+195
-49
lines changed

lib/std/special/test_runner.zig

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,8 +123,16 @@ pub fn log(
123123
}
124124

125125
pub fn main2() anyerror!void {
126+
var bad = false;
126127
// Simpler main(), exercising fewer language features, so that stage2 can handle it.
127128
for (builtin.test_functions) |test_fn| {
128-
try test_fn.func();
129+
test_fn.func() catch |err| {
130+
if (err != error.SkipZigTest) {
131+
bad = true;
132+
}
133+
};
134+
}
135+
if (bad) {
136+
return error.TestsFailed;
129137
}
130138
}

lib/std/start.zig

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,11 @@ fn main2() callconv(.C) c_int {
8787
}
8888

8989
fn _start2() callconv(.Naked) noreturn {
90+
callMain2();
91+
}
92+
93+
fn callMain2() noreturn {
94+
@setAlignStack(16);
9095
root.main();
9196
exit2(0);
9297
}

src/Compilation.zig

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3714,6 +3714,8 @@ pub fn generateBuiltinZigSource(comp: *Compilation, allocator: *Allocator) Alloc
37143714
const target = comp.getTarget();
37153715
const generic_arch_name = target.cpu.arch.genericName();
37163716
const use_stage1 = build_options.is_stage1 and comp.bin_file.options.use_stage1;
3717+
const stage2_x86_cx16 = target.cpu.arch == .x86_64 and
3718+
std.Target.x86.featureSetHas(target.cpu.features, .cx16);
37173719

37183720
@setEvalBranchQuota(4000);
37193721
try buffer.writer().print(
@@ -3725,6 +3727,8 @@ pub fn generateBuiltinZigSource(comp: *Compilation, allocator: *Allocator) Alloc
37253727
\\pub const zig_is_stage2 = {};
37263728
\\/// Temporary until self-hosted supports the `cpu.arch` value.
37273729
\\pub const stage2_arch: std.Target.Cpu.Arch = .{};
3730+
\\/// Temporary until self-hosted can call `std.Target.x86.featureSetHas` at comptime.
3731+
\\pub const stage2_x86_cx16 = {};
37283732
\\
37293733
\\pub const output_mode = std.builtin.OutputMode.{};
37303734
\\pub const link_mode = std.builtin.LinkMode.{};
@@ -3740,6 +3744,7 @@ pub fn generateBuiltinZigSource(comp: *Compilation, allocator: *Allocator) Alloc
37403744
build_options.version,
37413745
!use_stage1,
37423746
std.zig.fmtId(@tagName(target.cpu.arch)),
3747+
stage2_x86_cx16,
37433748
std.zig.fmtId(@tagName(comp.bin_file.options.output_mode)),
37443749
std.zig.fmtId(@tagName(comp.bin_file.options.link_mode)),
37453750
comp.bin_file.options.is_test,

src/Module.zig

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,11 +64,16 @@ import_table: std.StringArrayHashMapUnmanaged(*Scope.File) = .{},
6464
/// The set of all the generic function instantiations. This is used so that when a generic
6565
/// function is called twice with the same comptime parameter arguments, both calls dispatch
6666
/// to the same function.
67+
/// TODO: remove functions from this set when they are destroyed.
6768
monomorphed_funcs: MonomorphedFuncsSet = .{},
68-
6969
/// The set of all comptime function calls that have been cached so that future calls
7070
/// with the same parameters will get the same return value.
7171
memoized_calls: MemoizedCallSet = .{},
72+
/// Contains the values from `@setAlignStack`. A sparse table is used here
73+
/// instead of a field of `Fn` because usage of `@setAlignStack` is rare, while
74+
/// functions are many.
75+
/// TODO: remove functions from this set when they are destroyed.
76+
align_stack_fns: std.AutoHashMapUnmanaged(*const Fn, SetAlignStack) = .{},
7277

7378
/// We optimize memory usage for a compilation with no compile errors by storing the
7479
/// error messages and mapping outside of `Decl`.
@@ -215,6 +220,13 @@ pub const MemoizedCall = struct {
215220
}
216221
};
217222

223+
pub const SetAlignStack = struct {
224+
alignment: u32,
225+
/// TODO: This needs to store a non-lazy source location for the case of an inline function
226+
/// which does `@setAlignStack` (applying it to the caller).
227+
src: LazySrcLoc,
228+
};
229+
218230
/// A `Module` has zero or one of these depending on whether `-femit-h` is enabled.
219231
pub const GlobalEmitH = struct {
220232
/// Where to put the output.
@@ -881,6 +893,7 @@ pub const Fn = struct {
881893

882894
state: Analysis,
883895
is_cold: bool = false,
896+
is_noinline: bool = false,
884897

885898
pub const Analysis = enum {
886899
queued,
@@ -2347,6 +2360,7 @@ pub fn deinit(mod: *Module) void {
23472360

23482361
mod.error_name_list.deinit(gpa);
23492362
mod.test_functions.deinit(gpa);
2363+
mod.align_stack_fns.deinit(gpa);
23502364
mod.monomorphed_funcs.deinit(gpa);
23512365

23522366
{
@@ -3977,6 +3991,12 @@ fn markOutdatedDecl(mod: *Module, decl: *Decl) !void {
39773991
if (mod.failed_decls.fetchSwapRemove(decl)) |kv| {
39783992
kv.value.destroy(mod.gpa);
39793993
}
3994+
if (decl.has_tv and decl.owns_tv) {
3995+
if (decl.val.castTag(.function)) |payload| {
3996+
const func = payload.data;
3997+
_ = mod.align_stack_fns.remove(func);
3998+
}
3999+
}
39804000
if (mod.emit_h) |emit_h| {
39814001
if (emit_h.failed_decls.fetchSwapRemove(decl)) |kv| {
39824002
kv.value.destroy(mod.gpa);

src/Sema.zig

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -833,6 +833,7 @@ fn failWithUseOfUndef(sema: *Sema, block: *Scope.Block, src: LazySrcLoc) Compile
833833
/// Appropriate to call when the coercion has already been done by result
834834
/// location semantics. Asserts the value fits in the provided `Int` type.
835835
/// Only supports `Int` types 64 bits or less.
836+
/// TODO don't ever call this since we're migrating towards ResultLoc.coerced_ty.
836837
fn resolveAlreadyCoercedInt(
837838
sema: *Sema,
838839
block: *Scope.Block,
@@ -849,6 +850,23 @@ fn resolveAlreadyCoercedInt(
849850
}
850851
}
851852

853+
fn resolveAlign(
854+
sema: *Sema,
855+
block: *Scope.Block,
856+
src: LazySrcLoc,
857+
zir_ref: Zir.Inst.Ref,
858+
) !u16 {
859+
const alignment_big = try sema.resolveInt(block, src, zir_ref, Type.initTag(.u16));
860+
const alignment = @intCast(u16, alignment_big); // We coerce to u16 in the prev line.
861+
if (alignment == 0) return sema.mod.fail(&block.base, src, "alignment must be >= 1", .{});
862+
if (!std.math.isPowerOfTwo(alignment)) {
863+
return sema.mod.fail(&block.base, src, "alignment value {d} is not a power of two", .{
864+
alignment,
865+
});
866+
}
867+
return alignment;
868+
}
869+
852870
fn resolveInt(
853871
sema: *Sema,
854872
block: *Scope.Block,
@@ -2285,9 +2303,36 @@ fn zirExport(sema: *Sema, block: *Scope.Block, inst: Zir.Inst.Index) CompileErro
22852303
}
22862304

22872305
fn zirSetAlignStack(sema: *Sema, block: *Scope.Block, inst: Zir.Inst.Index) CompileError!void {
2306+
const mod = sema.mod;
22882307
const inst_data = sema.code.instructions.items(.data)[inst].un_node;
2308+
const operand_src: LazySrcLoc = .{ .node_offset_builtin_call_arg0 = inst_data.src_node };
22892309
const src: LazySrcLoc = inst_data.src();
2290-
return sema.mod.fail(&block.base, src, "TODO: implement Sema.zirSetAlignStack", .{});
2310+
const alignment = try sema.resolveAlign(block, operand_src, inst_data.operand);
2311+
if (alignment > 256) {
2312+
return mod.fail(&block.base, src, "attempt to @setAlignStack({d}); maximum is 256", .{
2313+
alignment,
2314+
});
2315+
}
2316+
const func = sema.owner_func orelse
2317+
return mod.fail(&block.base, src, "@setAlignStack outside function body", .{});
2318+
2319+
switch (func.owner_decl.ty.fnCallingConvention()) {
2320+
.Naked => return mod.fail(&block.base, src, "@setAlignStack in naked function", .{}),
2321+
.Inline => return mod.fail(&block.base, src, "@setAlignStack in inline function", .{}),
2322+
else => {},
2323+
}
2324+
2325+
const gop = try mod.align_stack_fns.getOrPut(mod.gpa, func);
2326+
if (gop.found_existing) {
2327+
const msg = msg: {
2328+
const msg = try mod.errMsg(&block.base, src, "multiple @setAlignStack in the same function body", .{});
2329+
errdefer msg.destroy(mod.gpa);
2330+
try mod.errNote(&block.base, src, msg, "other instance here", .{});
2331+
break :msg msg;
2332+
};
2333+
return mod.failWithOwnedErrorMsg(&block.base, msg);
2334+
}
2335+
gop.value_ptr.* = .{ .alignment = alignment, .src = src };
22912336
}
22922337

22932338
fn zirSetCold(sema: *Sema, block: *Scope.Block, inst: Zir.Inst.Index) CompileError!void {

src/codegen/llvm.zig

Lines changed: 62 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,20 @@ pub const Object = struct {
355355

356356
const llvm_func = try dg.resolveLlvmFunction(decl);
357357

358+
if (module.align_stack_fns.get(func)) |align_info| {
359+
dg.addFnAttrInt(llvm_func, "alignstack", align_info.alignment);
360+
dg.addFnAttr(llvm_func, "noinline");
361+
} else {
362+
DeclGen.removeFnAttr(llvm_func, "alignstack");
363+
if (!func.is_noinline) DeclGen.removeFnAttr(llvm_func, "noinline");
364+
}
365+
366+
if (func.is_cold) {
367+
dg.addFnAttr(llvm_func, "cold");
368+
} else {
369+
DeclGen.removeFnAttr(llvm_func, "cold");
370+
}
371+
358372
// This gets the LLVM values from the function and stores them in `dg.args`.
359373
const fn_param_len = decl.ty.fnParamLen();
360374
var args = try dg.gpa.alloc(*const llvm.Value, fn_param_len);
@@ -512,7 +526,9 @@ pub const DeclGen = struct {
512526
}
513527
}
514528

515-
/// If the llvm function does not exist, create it
529+
/// If the llvm function does not exist, create it.
530+
/// Note that this can be called before the function's semantic analysis has
531+
/// completed, so if any attributes rely on that, they must be done in updateFunc, not here.
516532
fn resolveLlvmFunction(self: *DeclGen, decl: *Module.Decl) !*const llvm.Value {
517533
if (self.llvmModule().getNamedFunction(decl.name)) |llvm_fn| return llvm_fn;
518534

@@ -895,17 +911,39 @@ pub const DeclGen = struct {
895911
}
896912
}
897913

898-
// Helper functions
899-
fn addAttr(self: *DeclGen, val: *const llvm.Value, index: llvm.AttributeIndex, name: []const u8) void {
914+
fn addAttr(dg: *DeclGen, val: *const llvm.Value, index: llvm.AttributeIndex, name: []const u8) void {
915+
return dg.addAttrInt(val, index, name, 0);
916+
}
917+
918+
fn removeAttr(val: *const llvm.Value, index: llvm.AttributeIndex, name: []const u8) void {
900919
const kind_id = llvm.getEnumAttributeKindForName(name.ptr, name.len);
901920
assert(kind_id != 0);
902-
const llvm_attr = self.context.createEnumAttribute(kind_id, 0);
921+
val.removeEnumAttributeAtIndex(index, kind_id);
922+
}
923+
924+
fn addAttrInt(
925+
dg: *DeclGen,
926+
val: *const llvm.Value,
927+
index: llvm.AttributeIndex,
928+
name: []const u8,
929+
int: u64,
930+
) void {
931+
const kind_id = llvm.getEnumAttributeKindForName(name.ptr, name.len);
932+
assert(kind_id != 0);
933+
const llvm_attr = dg.context.createEnumAttribute(kind_id, int);
903934
val.addAttributeAtIndex(index, llvm_attr);
904935
}
905936

906-
fn addFnAttr(self: *DeclGen, val: *const llvm.Value, attr_name: []const u8) void {
907-
// TODO: improve this API, `addAttr(-1, attr_name)`
908-
self.addAttr(val, std.math.maxInt(llvm.AttributeIndex), attr_name);
937+
fn addFnAttr(dg: *DeclGen, val: *const llvm.Value, name: []const u8) void {
938+
dg.addAttr(val, std.math.maxInt(llvm.AttributeIndex), name);
939+
}
940+
941+
fn removeFnAttr(fn_val: *const llvm.Value, name: []const u8) void {
942+
removeAttr(fn_val, std.math.maxInt(llvm.AttributeIndex), name);
943+
}
944+
945+
fn addFnAttrInt(dg: *DeclGen, fn_val: *const llvm.Value, name: []const u8, int: u64) void {
946+
return dg.addAttrInt(fn_val, std.math.maxInt(llvm.AttributeIndex), name, int);
909947
}
910948

911949
/// If the operand type of an atomic operation is not byte sized we need to
@@ -1975,12 +2013,13 @@ pub const FuncGen = struct {
19752013
return null;
19762014
// buildAlloca expects the pointee type, not the pointer type, so assert that
19772015
// a Payload.PointerSimple is passed to the alloc instruction.
1978-
const inst_ty = self.air.typeOfIndex(inst);
1979-
const pointee_type = inst_ty.castPointer().?.data;
1980-
1981-
// TODO: figure out a way to get the name of the var decl.
1982-
// TODO: set alignment and volatile
1983-
return self.buildAlloca(try self.dg.llvmType(pointee_type));
2016+
const ptr_ty = self.air.typeOfIndex(inst);
2017+
const pointee_type = ptr_ty.elemType();
2018+
const pointee_llvm_ty = try self.dg.llvmType(pointee_type);
2019+
const target = self.dg.module.getTarget();
2020+
const alloca_inst = self.buildAlloca(pointee_llvm_ty);
2021+
alloca_inst.setAlignment(ptr_ty.ptrAlignment(target));
2022+
return alloca_inst;
19842023
}
19852024

19862025
/// Use this instead of builder.buildAlloca, because this function makes sure to
@@ -2200,8 +2239,11 @@ pub const FuncGen = struct {
22002239
}
22012240

22022241
fn load(self: *FuncGen, ptr: *const llvm.Value, ptr_ty: Type) *const llvm.Value {
2203-
_ = ptr_ty; // TODO set volatile and alignment on this load properly
2204-
return self.builder.buildLoad(ptr, "");
2242+
const llvm_inst = self.builder.buildLoad(ptr, "");
2243+
const target = self.dg.module.getTarget();
2244+
llvm_inst.setAlignment(ptr_ty.ptrAlignment(target));
2245+
llvm_inst.setVolatile(llvm.Bool.fromBool(ptr_ty.isVolatilePtr()));
2246+
return llvm_inst;
22052247
}
22062248

22072249
fn store(
@@ -2210,8 +2252,11 @@ pub const FuncGen = struct {
22102252
ptr_ty: Type,
22112253
elem: *const llvm.Value,
22122254
) *const llvm.Value {
2213-
_ = ptr_ty; // TODO set volatile and alignment on this store properly
2214-
return self.builder.buildStore(elem, ptr);
2255+
const llvm_inst = self.builder.buildStore(elem, ptr);
2256+
const target = self.dg.module.getTarget();
2257+
llvm_inst.setAlignment(ptr_ty.ptrAlignment(target));
2258+
llvm_inst.setVolatile(llvm.Bool.fromBool(ptr_ty.isVolatilePtr()));
2259+
return llvm_inst;
22152260
}
22162261
};
22172262

src/codegen/llvm/bindings.zig

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,9 @@ pub const Value = opaque {
8585
pub const addAttributeAtIndex = LLVMAddAttributeAtIndex;
8686
extern fn LLVMAddAttributeAtIndex(*const Value, Idx: AttributeIndex, A: *const Attribute) void;
8787

88+
pub const removeEnumAttributeAtIndex = LLVMRemoveEnumAttributeAtIndex;
89+
extern fn LLVMRemoveEnumAttributeAtIndex(F: *const Value, Idx: AttributeIndex, KindID: c_uint) void;
90+
8891
pub const getFirstBasicBlock = LLVMGetFirstBasicBlock;
8992
extern fn LLVMGetFirstBasicBlock(Fn: *const Value) ?*const BasicBlock;
9093

@@ -136,6 +139,12 @@ pub const Value = opaque {
136139

137140
pub const setOrdering = LLVMSetOrdering;
138141
extern fn LLVMSetOrdering(MemoryAccessInst: *const Value, Ordering: AtomicOrdering) void;
142+
143+
pub const setVolatile = LLVMSetVolatile;
144+
extern fn LLVMSetVolatile(MemoryAccessInst: *const Value, IsVolatile: Bool) void;
145+
146+
pub const setAlignment = LLVMSetAlignment;
147+
extern fn LLVMSetAlignment(V: *const Value, Bytes: c_uint) void;
139148
};
140149

141150
pub const Type = opaque {

src/type.zig

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1583,7 +1583,11 @@ pub const Type = extern union {
15831583

15841584
.int_signed, .int_unsigned => {
15851585
const bits: u16 = self.cast(Payload.Bits).?.data;
1586-
return std.math.ceilPowerOfTwoPromote(u16, (bits + 7) / 8);
1586+
if (bits <= 8) return 1;
1587+
if (bits <= 16) return 2;
1588+
if (bits <= 32) return 4;
1589+
if (bits <= 64) return 8;
1590+
return 16;
15871591
},
15881592

15891593
.optional => {

test/behavior/atomics.zig

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,3 +81,33 @@ test "cmpxchg with ignored result" {
8181

8282
try expect(5678 == x);
8383
}
84+
85+
test "128-bit cmpxchg" {
86+
try test_u128_cmpxchg();
87+
comptime try test_u128_cmpxchg();
88+
}
89+
90+
fn test_u128_cmpxchg() !void {
91+
if (builtin.zig_is_stage2) {
92+
if (builtin.stage2_arch != .x86_64) return error.SkipZigTest;
93+
if (!builtin.stage2_x86_cx16) return error.SkipZigTest;
94+
} else {
95+
if (builtin.cpu.arch != .x86_64) return error.SkipZigTest;
96+
if (comptime !std.Target.x86.featureSetHas(builtin.cpu.features, .cx16)) return error.SkipZigTest;
97+
}
98+
99+
var x: u128 = 1234;
100+
if (@cmpxchgWeak(u128, &x, 99, 5678, .SeqCst, .SeqCst)) |x1| {
101+
try expect(x1 == 1234);
102+
} else {
103+
@panic("cmpxchg should have failed");
104+
}
105+
106+
while (@cmpxchgWeak(u128, &x, 1234, 5678, .SeqCst, .SeqCst)) |x1| {
107+
try expect(x1 == 1234);
108+
}
109+
try expect(x == 5678);
110+
111+
try expect(@cmpxchgStrong(u128, &x, 5678, 42, .SeqCst, .SeqCst) == null);
112+
try expect(x == 42);
113+
}

0 commit comments

Comments
 (0)