Skip to content

Commit 416e122

Browse files
feat: refactor to use a function instead of inlining and give name to the operation
1 parent bef4953 commit 416e122

File tree

1 file changed

+44
-35
lines changed

1 file changed

+44
-35
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

Lines changed: 44 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,42 @@ static mlir::Value emitX86MaskAddLogic(CIRGenBuilderTy &builder,
100100
return builder.createBitcast(resVec, ops[0].getType());
101101
}
102102

103+
static mlir::Value emitX86MaskUnpack(CIRGenBuilderTy &builder,
104+
mlir::Location loc,
105+
const std::string &intrinsicName,
106+
SmallVectorImpl<mlir::Value> &ops) {
107+
unsigned numElems = cast<cir::IntType>(ops[0].getType()).getWidth();
108+
109+
// Convert both operands to mask vectors.
110+
mlir::Value lhs = getMaskVecValue(builder, loc, ops[0], numElems);
111+
mlir::Value rhs = getMaskVecValue(builder, loc, ops[1], numElems);
112+
113+
mlir::Type i32Ty = builder.getSInt32Ty();
114+
115+
// Create indices for extracting the first half of each vector.
116+
SmallVector<mlir::Attribute, 32> halfIndices;
117+
for (auto i : llvm::seq<unsigned>(0, numElems / 2))
118+
halfIndices.push_back(cir::IntAttr::get(i32Ty, i));
119+
120+
// Extract first half of each vector. This gives better codegen than
121+
// doing it in a single shuffle.
122+
mlir::Value lhsHalf = builder.createVecShuffle(loc, lhs, lhs, halfIndices);
123+
mlir::Value rhsHalf = builder.createVecShuffle(loc, rhs, rhs, halfIndices);
124+
125+
// Create indices for concatenating the vectors.
126+
// NOTE: Operands are swapped to match the intrinsic definition.
127+
// After the half extraction, both vectors have numElems/2 elements.
128+
// In createVecShuffle(rhsHalf, lhsHalf, indices), indices [0..numElems/2-1] select
129+
// from rhsHalf, and indices [numElems/2..numElems-1] select from lhsHalf.
130+
SmallVector<mlir::Attribute, 64> concatIndices;
131+
for (auto i : llvm::seq<unsigned>(0, numElems))
132+
concatIndices.push_back(cir::IntAttr::get(i32Ty, i));
133+
134+
// Concat the vectors (RHS first, then LHS).
135+
mlir::Value res = builder.createVecShuffle(loc, rhsHalf, lhsHalf, concatIndices);
136+
return builder.createBitcast(res, ops[0].getType());
137+
}
138+
103139
static mlir::Value emitX86MaskLogic(CIRGenBuilderTy &builder,
104140
mlir::Location loc,
105141
cir::BinOpKind binOpKind,
@@ -244,42 +280,15 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
244280
getContext().BuiltinInfo.getName(builtinID));
245281
return {};
246282

247-
case X86::BI__builtin_ia32_kunpckdi:
283+
case X86::BI__builtin_ia32_kunpckhi:
284+
return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()),
285+
"x86.avx512.kunpackb", ops);
248286
case X86::BI__builtin_ia32_kunpcksi:
249-
case X86::BI__builtin_ia32_kunpckhi: {
250-
// Get the number of elements from the bit width of the first operand.
251-
unsigned numElems = cast<cir::IntType>(ops[0].getType()).getWidth();
252-
253-
// Convert both operands to mask vectors.
254-
mlir::Value lhs = getMaskVecValue(*this, expr, ops[0], numElems);
255-
mlir::Value rhs = getMaskVecValue(*this, expr, ops[1], numElems);
256-
257-
mlir::Location loc = getLoc(expr->getExprLoc());
258-
259-
// Create indices for extracting the first half of each vector.
260-
SmallVector<mlir::Attribute, 32> halfIndices;
261-
mlir::Type i32Ty = builder.getSInt32Ty();
262-
for (auto i : llvm::seq<unsigned>(0, numElems / 2))
263-
halfIndices.push_back(cir::IntAttr::get(i32Ty, i));
264-
265-
// Extract first half of each vector. This gives better codegen than
266-
// doing it in a single shuffle.
267-
lhs = builder.createVecShuffle(loc, lhs, lhs, halfIndices);
268-
rhs = builder.createVecShuffle(loc, rhs, rhs, halfIndices);
269-
270-
// Create indices for concatenating the vectors.
271-
// NOTE: Operands are swapped to match the intrinsic definition.
272-
// After the half extraction, both vectors have numElems/2 elements.
273-
// In createVecShuffle(rhs, lhs, indices), indices [0..numElems/2-1] select
274-
// from rhs, and indices [numElems/2..numElems-1] select from lhs.
275-
SmallVector<mlir::Attribute, 64> concatIndices;
276-
for (auto i : llvm::seq<unsigned>(0, numElems))
277-
concatIndices.push_back(cir::IntAttr::get(i32Ty, i));
278-
279-
// Concat the vectors (RHS first, then LHS).
280-
mlir::Value res = builder.createVecShuffle(loc, rhs, lhs, concatIndices);
281-
return builder.createBitcast(res, ops[0].getType());
282-
}
287+
return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()),
288+
"x86.avx512.kunpackw", ops);
289+
case X86::BI__builtin_ia32_kunpckdi:
290+
return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()),
291+
"x86.avx512.kunpackd", ops);
283292

284293
case X86::BI_mm_setcsr:
285294
case X86::BI__builtin_ia32_ldmxcsr: {

0 commit comments

Comments
 (0)