Skip to content

Commit 38b7dc0

Browse files
committed
x86_64: support more non-sse movement options
Also improve abi compatibility. Closes #23426
1 parent 96e35b3 commit 38b7dc0

File tree

2 files changed

+92
-22
lines changed

2 files changed

+92
-22
lines changed

src/arch/x86_64/CodeGen.zig

Lines changed: 48 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -110436,11 +110436,12 @@ fn allocRegOrMemAdvanced(self: *CodeGen, ty: Type, inst: ?Air.Inst.Index, reg_ok
110436110436
if (reg_ok) need_mem: {
110437110437
if (std.math.isPowerOfTwo(abi_size) and abi_size <= @as(u32, max_abi_size: switch (ty.zigTypeTag(zcu)) {
110438110438
.float => switch (ty.floatBits(self.target.*)) {
110439-
16, 32, 64, 128 => 16,
110439+
16, 32, 64, 128 => if (self.hasFeature(.sse)) 16 else break :need_mem,
110440110440
80 => break :need_mem,
110441110441
else => unreachable,
110442110442
},
110443110443
.vector => {
110444+
if (!self.hasFeature(.sse)) break :need_mem;
110444110445
const elem_ty = ty.childType(zcu);
110445110446
break :max_abi_size if (elem_ty.toIntern() == .bool_type)
110446110447
8
@@ -110467,16 +110468,16 @@ fn regClassForType(self: *CodeGen, ty: Type) Register.Class {
110467110468
const zcu = pt.zcu;
110468110469
if (self.floatBits(ty)) |float_bits| return switch (float_bits) {
110469110470
80 => .x87,
110470-
else => .sse,
110471+
else => if (self.hasFeature(.sse)) .sse else .general_purpose,
110471110472
};
110472110473
if (!ty.isVector(zcu)) return .general_purpose;
110473110474
const elem_ty = ty.childType(zcu);
110474110475
return if (elem_ty.toIntern() == .bool_type)
110475110476
.general_purpose
110476110477
else if (self.floatBits(elem_ty)) |float_bits|
110477-
if (float_bits == 80) .x87 else .sse
110478+
if (float_bits == 80) .x87 else if (self.hasFeature(.sse)) .sse else .general_purpose
110478110479
else if (self.intInfo(elem_ty)) |_|
110479-
.sse
110480+
if (self.hasFeature(.sse)) .sse else .general_purpose
110480110481
else
110481110482
.general_purpose;
110482110483
}
@@ -123265,6 +123266,33 @@ fn genCopy(self: *CodeGen, ty: Type, dst_mcv: MCValue, src_mcv: MCValue, opts: C
123265123266
.register => |src_reg| switch (dst_regs[0].class()) {
123266123267
.general_purpose => switch (src_reg.class()) {
123267123268
else => unreachable,
123269+
.x87 => {
123270+
assert(dst_regs.len == 2);
123271+
const frame_index = try self.allocFrameIndex(.initType(ty, pt.zcu));
123272+
if (self.register_manager.isKnownRegFree(.st7)) {
123273+
try self.asmRegister(.{ .f_, .ld }, src_reg);
123274+
try self.asmMemory(.{ .f_p, .st }, .{
123275+
.base = .{ .frame = frame_index },
123276+
.mod = .{ .rm = .{ .size = .tbyte } },
123277+
});
123278+
} else {
123279+
try self.asmRegister(.{ .f_, .xch }, src_reg);
123280+
try self.asmMemory(.{ .f_p, .st }, .{
123281+
.base = .{ .frame = frame_index },
123282+
.mod = .{ .rm = .{ .size = .tbyte } },
123283+
});
123284+
try self.asmMemory(.{ .f_, .ld }, .{
123285+
.base = .{ .frame = frame_index },
123286+
.mod = .{ .rm = .{ .size = .tbyte } },
123287+
});
123288+
try self.asmRegister(.{ .f_, .xch }, src_reg);
123289+
}
123290+
for (dst_regs, 0..) |dst_reg, dst_index| try self.asmRegisterMemory(.{ ._, .mov }, dst_reg.to64(), .{
123291+
.base = .{ .frame = frame_index },
123292+
.mod = .{ .rm = .{ .size = .qword, .disp = @intCast(8 * dst_index) } },
123293+
});
123294+
return;
123295+
},
123268123296
.sse => if (ty.abiSize(pt.zcu) <= 16) {
123269123297
if (self.hasFeature(.avx)) {
123270123298
try self.asmRegisterRegister(.{ .v_q, .mov }, dst_regs[0].to64(), src_reg.to128());
@@ -124177,13 +124205,29 @@ fn genInlineMemcpy(self: *CodeGen, dst_ptr: MCValue, src_ptr: MCValue, len: MCVa
124177124205
try self.asmMemoryRegister(.{ ._ps, .movu }, try dst_ptr.deref().mem(self, .{ .size = .xword }), reg.to128());
124178124206
return;
124179124207
}
124208+
} else {
124209+
if (self.register_manager.tryAllocReg(null, abi.RegisterClass.gp)) |reg| {
124210+
for (0..2) |limb_index| {
124211+
try self.asmRegisterMemory(.{ ._, .mov }, reg.to64(), try src_ptr.deref().mem(self, .{ .size = .qword, .disp = @intCast(8 * limb_index) }));
124212+
try self.asmMemoryRegister(.{ ._, .mov }, try dst_ptr.deref().mem(self, .{ .size = .qword, .disp = @intCast(8 * limb_index) }), reg.to64());
124213+
}
124214+
return;
124215+
}
124180124216
},
124181124217
32 => if (self.hasFeature(.avx)) {
124182124218
if (self.register_manager.tryAllocReg(null, abi.RegisterClass.sse)) |reg| {
124183124219
try self.asmRegisterMemory(.{ .v_dqu, .mov }, reg.to256(), try src_ptr.deref().mem(self, .{ .size = .yword }));
124184124220
try self.asmMemoryRegister(.{ .v_dqu, .mov }, try dst_ptr.deref().mem(self, .{ .size = .yword }), reg.to256());
124185124221
return;
124186124222
}
124223+
} else {
124224+
if (self.register_manager.tryAllocReg(null, abi.RegisterClass.gp)) |reg| {
124225+
for (0..4) |limb_index| {
124226+
try self.asmRegisterMemory(.{ ._, .mov }, reg.to64(), try src_ptr.deref().mem(self, .{ .size = .qword, .disp = @intCast(8 * limb_index) }));
124227+
try self.asmMemoryRegister(.{ ._, .mov }, try dst_ptr.deref().mem(self, .{ .size = .qword, .disp = @intCast(8 * limb_index) }), reg.to64());
124228+
}
124229+
return;
124230+
}
124187124231
},
124188124232
},
124189124233
};

src/arch/x86_64/abi.zig

Lines changed: 44 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -148,38 +148,64 @@ pub fn classifySystemV(ty: Type, zcu: *Zcu, target: *const std.Target, ctx: Cont
148148
result[0] = .integer;
149149
return result;
150150
},
151-
.float => switch (ty.floatBits(target.*)) {
151+
.float => if (std.Target.x86.featureSetHas(target.cpu.features, .soft_float)) {
152+
@memset(result[0 .. std.math.divCeil(u16, ty.floatBits(target.*), 64) catch unreachable], .integer);
153+
return result;
154+
} else switch (ty.floatBits(target.*)) {
152155
16 => {
153-
if (ctx == .field) {
154-
result[0] = .memory;
155-
} else {
156+
result[0] = if (ctx == .field)
157+
.memory
158+
else if (std.Target.x86.featureSetHasAll(target.cpu.features, .{ .sse, .sse2 }))
156159
// TODO clang doesn't allow __fp16 as .ret or .arg
157-
result[0] = .sse;
158-
}
160+
.sse
161+
else
162+
.integer;
159163
return result;
160164
},
161165
32 => {
162-
result[0] = .float;
166+
if (std.Target.x86.featureSetHas(target.cpu.features, .sse)) {
167+
result[0] = .float;
168+
} else if (std.Target.x86.featureSetHas(target.cpu.features, .x87)) {
169+
result[0] = .x87;
170+
result[1] = .x87up;
171+
} else {
172+
@memset(result[0..2], .integer);
173+
}
163174
return result;
164175
},
165176
64 => {
166-
result[0] = .sse;
177+
if (std.Target.x86.featureSetHas(target.cpu.features, .sse2)) {
178+
result[0] = .sse;
179+
} else if (std.Target.x86.featureSetHas(target.cpu.features, .x87)) {
180+
result[0] = .x87;
181+
result[1] = .x87up;
182+
} else {
183+
@memset(result[0..2], .integer);
184+
}
167185
return result;
168186
},
169187
128 => {
170-
// "Arguments of types __float128, _Decimal128 and __m128 are
171-
// split into two halves. The least significant ones belong
172-
// to class SSE, the most significant one to class SSEUP."
173-
result[0] = .sse;
174-
result[1] = .sseup;
188+
if (std.Target.x86.featureSetHas(target.cpu.features, .sse)) {
189+
// "Arguments of types __float128, _Decimal128 and __m128 are
190+
// split into two halves. The least significant ones belong
191+
// to class SSE, the most significant one to class SSEUP."
192+
result[0] = .sse;
193+
result[1] = .sseup;
194+
} else {
195+
@memset(result[0..2], .integer);
196+
}
175197
return result;
176198
},
177199
80 => {
178-
// "The 64-bit mantissa of arguments of type long double
179-
// belongs to classX87, the 16-bit exponent plus 6 bytes
180-
// of padding belongs to class X87UP."
181-
result[0] = .x87;
182-
result[1] = .x87up;
200+
if (std.Target.x86.featureSetHas(target.cpu.features, .x87)) {
201+
// "The 64-bit mantissa of arguments of type long double
202+
// belongs to classX87, the 16-bit exponent plus 6 bytes
203+
// of padding belongs to class X87UP."
204+
result[0] = .x87;
205+
result[1] = .x87up;
206+
} else {
207+
@memset(result[0..2], .integer);
208+
}
183209
return result;
184210
},
185211
else => unreachable,

0 commit comments

Comments
 (0)