@@ -110436,11 +110436,12 @@ fn allocRegOrMemAdvanced(self: *CodeGen, ty: Type, inst: ?Air.Inst.Index, reg_ok
110436
110436
if (reg_ok) need_mem: {
110437
110437
if (std.math.isPowerOfTwo(abi_size) and abi_size <= @as(u32, max_abi_size: switch (ty.zigTypeTag(zcu)) {
110438
110438
.float => switch (ty.floatBits(self.target.*)) {
110439
- 16, 32, 64, 128 => 16 ,
110439
+ 16, 32, 64, 128 => if (self.hasFeature(.sse)) 16 else break :need_mem ,
110440
110440
80 => break :need_mem,
110441
110441
else => unreachable,
110442
110442
},
110443
110443
.vector => {
110444
+ if (!self.hasFeature(.sse)) break :need_mem;
110444
110445
const elem_ty = ty.childType(zcu);
110445
110446
break :max_abi_size if (elem_ty.toIntern() == .bool_type)
110446
110447
8
@@ -110467,16 +110468,16 @@ fn regClassForType(self: *CodeGen, ty: Type) Register.Class {
110467
110468
const zcu = pt.zcu;
110468
110469
if (self.floatBits(ty)) |float_bits| return switch (float_bits) {
110469
110470
80 => .x87,
110470
- else => . sse,
110471
+ else => if (self.hasFeature(. sse)) .sse else .general_purpose ,
110471
110472
};
110472
110473
if (!ty.isVector(zcu)) return .general_purpose;
110473
110474
const elem_ty = ty.childType(zcu);
110474
110475
return if (elem_ty.toIntern() == .bool_type)
110475
110476
.general_purpose
110476
110477
else if (self.floatBits(elem_ty)) |float_bits|
110477
- if (float_bits == 80) .x87 else . sse
110478
+ if (float_bits == 80) .x87 else if (self.hasFeature(. sse)) .sse else .general_purpose
110478
110479
else if (self.intInfo(elem_ty)) |_|
110479
- . sse
110480
+ if (self.hasFeature(. sse)) .sse else .general_purpose
110480
110481
else
110481
110482
.general_purpose;
110482
110483
}
@@ -123265,6 +123266,33 @@ fn genCopy(self: *CodeGen, ty: Type, dst_mcv: MCValue, src_mcv: MCValue, opts: C
123265
123266
.register => |src_reg| switch (dst_regs[0].class()) {
123266
123267
.general_purpose => switch (src_reg.class()) {
123267
123268
else => unreachable,
123269
+ .x87 => {
123270
+ assert(dst_regs.len == 2);
123271
+ const frame_index = try self.allocFrameIndex(.initType(ty, pt.zcu));
123272
+ if (self.register_manager.isKnownRegFree(.st7)) {
123273
+ try self.asmRegister(.{ .f_, .ld }, src_reg);
123274
+ try self.asmMemory(.{ .f_p, .st }, .{
123275
+ .base = .{ .frame = frame_index },
123276
+ .mod = .{ .rm = .{ .size = .tbyte } },
123277
+ });
123278
+ } else {
123279
+ try self.asmRegister(.{ .f_, .xch }, src_reg);
123280
+ try self.asmMemory(.{ .f_p, .st }, .{
123281
+ .base = .{ .frame = frame_index },
123282
+ .mod = .{ .rm = .{ .size = .tbyte } },
123283
+ });
123284
+ try self.asmMemory(.{ .f_, .ld }, .{
123285
+ .base = .{ .frame = frame_index },
123286
+ .mod = .{ .rm = .{ .size = .tbyte } },
123287
+ });
123288
+ try self.asmRegister(.{ .f_, .xch }, src_reg);
123289
+ }
123290
+ for (dst_regs, 0..) |dst_reg, dst_index| try self.asmRegisterMemory(.{ ._, .mov }, dst_reg.to64(), .{
123291
+ .base = .{ .frame = frame_index },
123292
+ .mod = .{ .rm = .{ .size = .qword, .disp = @intCast(8 * dst_index) } },
123293
+ });
123294
+ return;
123295
+ },
123268
123296
.sse => if (ty.abiSize(pt.zcu) <= 16) {
123269
123297
if (self.hasFeature(.avx)) {
123270
123298
try self.asmRegisterRegister(.{ .v_q, .mov }, dst_regs[0].to64(), src_reg.to128());
@@ -124177,13 +124205,29 @@ fn genInlineMemcpy(self: *CodeGen, dst_ptr: MCValue, src_ptr: MCValue, len: MCVa
124177
124205
try self.asmMemoryRegister(.{ ._ps, .movu }, try dst_ptr.deref().mem(self, .{ .size = .xword }), reg.to128());
124178
124206
return;
124179
124207
}
124208
+ } else {
124209
+ if (self.register_manager.tryAllocReg(null, abi.RegisterClass.gp)) |reg| {
124210
+ for (0..2) |limb_index| {
124211
+ try self.asmRegisterMemory(.{ ._, .mov }, reg.to64(), try src_ptr.deref().mem(self, .{ .size = .qword, .disp = @intCast(8 * limb_index) }));
124212
+ try self.asmMemoryRegister(.{ ._, .mov }, try dst_ptr.deref().mem(self, .{ .size = .qword, .disp = @intCast(8 * limb_index) }), reg.to64());
124213
+ }
124214
+ return;
124215
+ }
124180
124216
},
124181
124217
32 => if (self.hasFeature(.avx)) {
124182
124218
if (self.register_manager.tryAllocReg(null, abi.RegisterClass.sse)) |reg| {
124183
124219
try self.asmRegisterMemory(.{ .v_dqu, .mov }, reg.to256(), try src_ptr.deref().mem(self, .{ .size = .yword }));
124184
124220
try self.asmMemoryRegister(.{ .v_dqu, .mov }, try dst_ptr.deref().mem(self, .{ .size = .yword }), reg.to256());
124185
124221
return;
124186
124222
}
124223
+ } else {
124224
+ if (self.register_manager.tryAllocReg(null, abi.RegisterClass.gp)) |reg| {
124225
+ for (0..4) |limb_index| {
124226
+ try self.asmRegisterMemory(.{ ._, .mov }, reg.to64(), try src_ptr.deref().mem(self, .{ .size = .qword, .disp = @intCast(8 * limb_index) }));
124227
+ try self.asmMemoryRegister(.{ ._, .mov }, try dst_ptr.deref().mem(self, .{ .size = .qword, .disp = @intCast(8 * limb_index) }), reg.to64());
124228
+ }
124229
+ return;
124230
+ }
124187
124231
},
124188
124232
},
124189
124233
};
0 commit comments