Skip to content

Commit 1ee2c7a

Browse files
committed
x86_64: implement vector store
1 parent a3b0c24 commit 1ee2c7a

File tree

2 files changed

+128
-2
lines changed

2 files changed

+128
-2
lines changed

src/arch/x86_64/CodeGen.zig

Lines changed: 128 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98180,6 +98180,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
9818098180
.{ ._, ._, .mov, .dst0d, .memsi(.src0d, .@"4", .src1), ._, ._ },
9818198181
} },
9818298182
}, .{
98183+
.required_features = .{ .@"64bit", null, null, null },
9818398184
.dst_constraints = .{ .{ .int = .qword }, .any },
9818498185
.patterns = &.{
9818598186
.{ .src = .{ .to_mem, .simm32, .none } },
@@ -120534,7 +120535,133 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
120534120535
try ert.die(cg);
120535120536
try res.finish(inst, &.{}, &.{}, cg);
120536120537
},
120537-
.vector_store_elem => return cg.fail("TODO implement vector_store_elem", .{}),
120538+
.vector_store_elem => {
120539+
const extra = air_datas[@intFromEnum(inst)].vector_store_elem;
120540+
const bin_op = cg.air.extraData(Air.Bin, extra.payload).data;
120541+
var ops = try cg.tempsFromOperands(inst, .{ extra.vector_ptr, bin_op.lhs, bin_op.rhs });
120542+
cg.select(&.{}, &.{}, &ops, comptime &.{ .{
120543+
.src_constraints = .{ .any, .any, .{ .int = .byte } },
120544+
.patterns = &.{
120545+
.{ .src = .{ .to_gpr, .simm32, .imm8 } },
120546+
.{ .src = .{ .to_gpr, .simm32, .to_gpr } },
120547+
},
120548+
.each = .{ .once = &.{
120549+
.{ ._, ._, .mov, .leaa(.src0b, .add_src0_elem_size_mul_src1), .src2b, ._, ._ },
120550+
} },
120551+
}, .{
120552+
.src_constraints = .{ .any, .any, .{ .int = .byte } },
120553+
.patterns = &.{
120554+
.{ .src = .{ .to_gpr, .to_gpr, .imm8 } },
120555+
.{ .src = .{ .to_gpr, .to_gpr, .to_gpr } },
120556+
},
120557+
.each = .{ .once = &.{
120558+
.{ ._, ._, .mov, .leai(.src0b, .src1), .src2b, ._, ._ },
120559+
} },
120560+
}, .{
120561+
.src_constraints = .{ .any, .any, .{ .int = .word } },
120562+
.patterns = &.{
120563+
.{ .src = .{ .to_gpr, .simm32, .imm16 } },
120564+
.{ .src = .{ .to_gpr, .simm32, .to_gpr } },
120565+
},
120566+
.each = .{ .once = &.{
120567+
.{ ._, ._, .mov, .leaa(.src0w, .add_src0_elem_size_mul_src1), .src2w, ._, ._ },
120568+
} },
120569+
}, .{
120570+
.src_constraints = .{ .any, .any, .{ .int = .word } },
120571+
.patterns = &.{
120572+
.{ .src = .{ .to_gpr, .to_gpr, .imm16 } },
120573+
.{ .src = .{ .to_gpr, .to_gpr, .to_gpr } },
120574+
},
120575+
.each = .{ .once = &.{
120576+
.{ ._, ._, .mov, .leasi(.src0w, .@"2", .src1), .src2w, ._, ._ },
120577+
} },
120578+
}, .{
120579+
.src_constraints = .{ .any, .any, .{ .int = .dword } },
120580+
.patterns = &.{
120581+
.{ .src = .{ .to_gpr, .simm32, .imm32 } },
120582+
.{ .src = .{ .to_gpr, .simm32, .to_gpr } },
120583+
},
120584+
.each = .{ .once = &.{
120585+
.{ ._, ._, .mov, .leaa(.src0d, .add_src0_elem_size_mul_src1), .src2d, ._, ._ },
120586+
} },
120587+
}, .{
120588+
.src_constraints = .{ .any, .any, .{ .int = .dword } },
120589+
.patterns = &.{
120590+
.{ .src = .{ .to_gpr, .to_gpr, .imm32 } },
120591+
.{ .src = .{ .to_gpr, .to_gpr, .to_gpr } },
120592+
},
120593+
.each = .{ .once = &.{
120594+
.{ ._, ._, .mov, .leasi(.src0d, .@"4", .src1), .src2d, ._, ._ },
120595+
} },
120596+
}, .{
120597+
.required_features = .{ .@"64bit", null, null, null },
120598+
.dst_constraints = .{ .{ .int = .qword }, .any },
120599+
.patterns = &.{
120600+
.{ .src = .{ .to_mem, .simm32, .simm32 } },
120601+
.{ .src = .{ .to_mem, .simm32, .to_gpr } },
120602+
},
120603+
.each = .{ .once = &.{
120604+
.{ ._, ._, .mov, .leaa(.src0q, .add_src0_elem_size_mul_src1), .src2q, ._, ._ },
120605+
} },
120606+
}, .{
120607+
.required_features = .{ .@"64bit", null, null, null },
120608+
.dst_constraints = .{ .{ .int = .qword }, .any },
120609+
.patterns = &.{
120610+
.{ .src = .{ .to_mem, .to_gpr, .simm32 } },
120611+
.{ .src = .{ .to_mem, .to_gpr, .to_gpr } },
120612+
},
120613+
.each = .{ .once = &.{
120614+
.{ ._, ._, .mov, .leasi(.src0q, .@"8", .src1), .src2q, ._, ._ },
120615+
} },
120616+
} }) catch |err| switch (err) {
120617+
error.SelectFailed => {
120618+
const vector_ty = cg.typeOf(bin_op.lhs);
120619+
const res_ty = vector_ty.elemType2(zcu);
120620+
const elem_size = res_ty.abiSize(zcu);
120621+
while (try ops[0].toBase(false, cg) or
120622+
try ops[1].toRegClass(true, .general_purpose, cg))
120623+
{}
120624+
const base_reg = ops[0].tracking(cg).short.register.to64();
120625+
const rhs_reg = ops[1].tracking(cg).short.register.to64();
120626+
if (!std.math.isPowerOfTwo(elem_size)) {
120627+
try cg.spillEflagsIfOccupied();
120628+
try cg.asmRegisterRegisterImmediate(
120629+
.{ .i_, .mul },
120630+
rhs_reg,
120631+
rhs_reg,
120632+
.u(elem_size),
120633+
);
120634+
try cg.asmRegisterMemory(
120635+
.{ ._, .lea },
120636+
base_reg,
120637+
try ops[0].tracking(cg).short.mem(cg, .{ .index = rhs_reg }),
120638+
);
120639+
} else if (elem_size > 8) {
120640+
try cg.spillEflagsIfOccupied();
120641+
try cg.asmRegisterImmediate(
120642+
.{ ._l, .sh },
120643+
rhs_reg,
120644+
.u(std.math.log2_int(u64, elem_size)),
120645+
);
120646+
try cg.asmRegisterMemory(
120647+
.{ ._, .lea },
120648+
base_reg,
120649+
try ops[0].tracking(cg).short.mem(cg, .{ .index = rhs_reg }),
120650+
);
120651+
} else try cg.asmRegisterMemory(
120652+
.{ ._, .lea },
120653+
base_reg,
120654+
try ops[0].tracking(cg).short.mem(cg, .{
120655+
.index = rhs_reg,
120656+
.scale = .fromFactor(@intCast(elem_size)),
120657+
}),
120658+
);
120659+
try ops[0].store(&ops[1], .{}, cg);
120660+
},
120661+
else => |e| return e,
120662+
};
120663+
for (ops) |op| try op.die(cg);
120664+
},
120538120665
.c_va_arg => try cg.airVaArg(inst),
120539120666
.c_va_copy => try cg.airVaCopy(inst),
120540120667
.c_va_end => try cg.airVaEnd(inst),

test/behavior/vector.zig

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1391,7 +1391,6 @@ test "store packed vector element" {
13911391
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
13921392
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
13931393
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
1394-
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
13951394
if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
13961395
if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
13971396
if (builtin.cpu.arch == .aarch64_be and builtin.zig_backend == .stage2_llvm) return error.SkipZigTest;

0 commit comments

Comments
 (0)