Skip to content

Commit b783c62

Browse files
zoecarverhuixie90
authored andcommitted
[Builtin] Add __builtin_clear_padding
Adds `__builtin_clear_padding` to zero all padding bits of a struct. This builtin should match the behavior of those in NVCC and GCC (and MSVC?). There are some tests in this patch but hopefully we'll also get tests from other compilers (so all builtins can be as similar as possible). I'm planning to add support for unions, bitfields (both as members and members of sub-objects), and booleans as follow up patches. Differential Revision: https://reviews.llvm.org/D87974 overlapping subobjects + opague pointer union, rename, scalar types
1 parent 7925a9e commit b783c62

File tree

6 files changed

+1178
-0
lines changed

6 files changed

+1178
-0
lines changed

clang/include/clang/Basic/Builtins.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -974,6 +974,11 @@ def IsWithinLifetime : LangBuiltin<"CXX_LANG"> {
974974
let Spellings = ["__builtin_is_within_lifetime"];
975975
let Attributes = [NoThrow, CustomTypeChecking, Consteval];
976976
let Prototype = "bool(void*)";
977+
978+
def ClearPadding : LangBuiltin<"CXX_LANG"> {
979+
let Spellings = ["__builtin_clear_padding"];
980+
let Attributes = [NoThrow];
981+
let Prototype = "void(void*)";
977982
}
978983

979984
def GetVtablePointer : LangBuiltin<"CXX_LANG"> {

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@
3535
#include "llvm/IR/MatrixBuilder.h"
3636
#include "llvm/Support/ConvertUTF.h"
3737
#include "llvm/Support/ScopedPrinter.h"
38+
#include "llvm/TargetParser/AArch64TargetParser.h"
39+
#include "llvm/TargetParser/X86TargetParser.h"
40+
#include <algorithm>
3841
#include <optional>
3942
#include <utility>
4043

@@ -2592,6 +2595,205 @@ static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF,
25922595
return RValue::get(CGF->Builder.CreateCall(UBF, Args));
25932596
}
25942597

2598+
template <class T>
2599+
void RecursivelyClearPaddingImpl(CodeGenFunction &CGF, Value *Ptr, QualType Ty,
2600+
size_t CurrentStartOffset,
2601+
size_t &RunningOffset, T &&WriteZeroAtOffset,
2602+
bool VisitVirtualBase);
2603+
2604+
template <class T>
2605+
void ClearPaddingStruct(CodeGenFunction &CGF, Value *Ptr, QualType Ty,
2606+
StructType *ST, size_t CurrentStartOffset,
2607+
size_t &RunningOffset, T &&WriteZeroAtOffset,
2608+
bool VisitVirtualBase) {
2609+
llvm::dbgs() << "clear padding struct: " << ST->getName().data() << '\n';
2610+
const auto &DL = CGF.CGM.getModule().getDataLayout();
2611+
auto *SL = DL.getStructLayout(ST);
2612+
auto *R = dyn_cast<CXXRecordDecl>(Ty->getAsRecordDecl());
2613+
if (!R) {
2614+
llvm::dbgs() << "Not a CXXRecordDecl\n";
2615+
return;
2616+
}
2617+
const ASTRecordLayout &ASTLayout = CGF.getContext().getASTRecordLayout(R);
2618+
if (ASTLayout.hasOwnVFPtr()) {
2619+
llvm::dbgs() << "vtable ptr. Incrementing RunningOffset from "
2620+
<< RunningOffset << " to "
2621+
<< RunningOffset + DL.getPointerSizeInBits() / 8 << '\n';
2622+
RunningOffset += DL.getPointerSizeInBits() / 8;
2623+
}
2624+
std::vector<std::pair<size_t, CXXBaseSpecifier>> Bases;
2625+
Bases.reserve(R->getNumBases());
2626+
// todo get vbases
2627+
for (auto Base : R->bases()) {
2628+
auto *BaseRecord = cast<CXXRecordDecl>(Base.getType()->getAsRecordDecl());
2629+
if (!Base.isVirtual()) {
2630+
auto Offset = static_cast<size_t>(
2631+
ASTLayout.getBaseClassOffset(BaseRecord).getQuantity());
2632+
Bases.emplace_back(Offset, Base);
2633+
}
2634+
}
2635+
2636+
auto VisitBases =
2637+
[&](std::vector<std::pair<size_t, CXXBaseSpecifier>> &BasesToVisit) {
2638+
std::sort(
2639+
BasesToVisit.begin(), BasesToVisit.end(),
2640+
[](const auto &P1, const auto &P2) { return P1.first < P2.first; });
2641+
for (const auto &Pair : BasesToVisit) {
2642+
// is it OK to use structured binding in clang? what is the language
2643+
// version?
2644+
auto Offset = Pair.first;
2645+
auto Base = Pair.second;
2646+
2647+
llvm::dbgs() << "visiting base at offset " << Offset << '\n';
2648+
// Recursively zero out base classes.
2649+
auto Index = SL->getElementContainingOffset(Offset);
2650+
Value *Idx = CGF.Builder.getSize(Index);
2651+
llvm::Type *CurrentBaseType = CGF.ConvertTypeForMem(Base.getType());
2652+
Value *BaseElement = CGF.Builder.CreateGEP(CurrentBaseType, Ptr, Idx);
2653+
RecursivelyClearPaddingImpl(CGF, BaseElement, Base.getType(),
2654+
CurrentStartOffset + Offset,
2655+
RunningOffset, WriteZeroAtOffset, false);
2656+
}
2657+
};
2658+
2659+
VisitBases(Bases);
2660+
2661+
size_t NumFields = std::distance(R->field_begin(), R->field_end());
2662+
std::vector<size_t> FieldOffsets;
2663+
FieldOffsets.reserve(NumFields);
2664+
auto CurrentField = R->field_begin();
2665+
for (size_t I = 0; I < NumFields; ++I, ++CurrentField) {
2666+
// Size needs to be in bytes so we can compare it later.
2667+
auto Offset = ASTLayout.getFieldOffset(I) / 8;
2668+
llvm::dbgs() << "visiting field at offset " << Offset << '\n';
2669+
auto Index = SL->getElementContainingOffset(Offset);
2670+
Value *Idx = CGF.Builder.getSize(Index);
2671+
llvm::Type *CurrentFieldType =
2672+
CGF.ConvertTypeForMem(CurrentField->getType());
2673+
Value *Element = CGF.Builder.CreateGEP(CurrentFieldType, Ptr, Idx);
2674+
RecursivelyClearPaddingImpl(CGF, Element, CurrentField->getType(),
2675+
CurrentStartOffset + Offset, RunningOffset,
2676+
WriteZeroAtOffset, true);
2677+
}
2678+
2679+
if (VisitVirtualBase) {
2680+
2681+
std::vector<std::pair<size_t, CXXBaseSpecifier>> VBases;
2682+
VBases.reserve(R->getNumVBases());
2683+
for (auto VBase : R->vbases()) {
2684+
auto *BaseRecord =
2685+
cast<CXXRecordDecl>(VBase.getType()->getAsRecordDecl());
2686+
auto Offset = static_cast<size_t>(
2687+
ASTLayout.getVBaseClassOffset(BaseRecord).getQuantity());
2688+
VBases.emplace_back(Offset, VBase);
2689+
}
2690+
2691+
VisitBases(VBases);
2692+
}
2693+
}
2694+
2695+
template <class T>
2696+
void ClearPaddingConstantArray(CodeGenFunction &CGF, Value *Ptr,
2697+
llvm::Type *Type, ConstantArrayType const *AT,
2698+
size_t CurrentStartOffset, size_t &RunningOffset,
2699+
T &&WriteZeroAtOffset) {
2700+
llvm::dbgs() << "clear padding constant array\n";
2701+
for (size_t ArrIndex = 0; ArrIndex < AT->getSize().getLimitedValue();
2702+
++ArrIndex) {
2703+
2704+
QualType ElementQualType = AT->getElementType();
2705+
2706+
auto *ElementRecord = ElementQualType->getAsRecordDecl();
2707+
if (!ElementRecord) {
2708+
llvm::dbgs() << "null!\n";
2709+
}
2710+
auto ElementAlign =
2711+
ElementRecord
2712+
? CGF.getContext().getASTRecordLayout(ElementRecord).getAlignment()
2713+
: CGF.getContext().getTypeAlignInChars(ElementQualType);
2714+
2715+
Address FieldElementAddr{Ptr, Type, ElementAlign};
2716+
2717+
auto Element = CGF.Builder.CreateConstArrayGEP(FieldElementAddr, ArrIndex);
2718+
auto *ElementType = CGF.ConvertTypeForMem(ElementQualType);
2719+
auto AllocSize =
2720+
CGF.CGM.getModule().getDataLayout().getTypeAllocSize(ElementType);
2721+
llvm::dbgs() << "clearing array index! " << ArrIndex << '\n';
2722+
RecursivelyClearPaddingImpl(CGF, Element.getBasePointer(), ElementQualType,
2723+
CurrentStartOffset +
2724+
ArrIndex * AllocSize.getKnownMinValue(),
2725+
RunningOffset, WriteZeroAtOffset, true);
2726+
}
2727+
}
2728+
2729+
template <class T>
2730+
void RecursivelyClearPaddingImpl(CodeGenFunction &CGF, Value *Ptr, QualType Ty,
2731+
size_t CurrentStartOffset,
2732+
size_t &RunningOffset, T &&WriteZeroAtOffset,
2733+
bool VisitVirtualBase) {
2734+
2735+
llvm::dbgs() << "clear padding before current [" << RunningOffset << ", "
2736+
<< CurrentStartOffset << ")\n";
2737+
for (; RunningOffset < CurrentStartOffset; ++RunningOffset) {
2738+
WriteZeroAtOffset(RunningOffset);
2739+
}
2740+
auto *Type = CGF.ConvertTypeForMem(Ty);
2741+
auto Size = CGF.CGM.getModule()
2742+
.getDataLayout()
2743+
.getTypeSizeInBits(Type)
2744+
.getKnownMinValue() /
2745+
8;
2746+
2747+
if (auto *AT = dyn_cast<ConstantArrayType>(Ty)) {
2748+
ClearPaddingConstantArray(CGF, Ptr, Type, AT, CurrentStartOffset,
2749+
RunningOffset, WriteZeroAtOffset);
2750+
} else if (auto *ST = dyn_cast<StructType>(Type); ST && Ty->isRecordType()) {
2751+
ClearPaddingStruct(CGF, Ptr, Ty, ST, CurrentStartOffset, RunningOffset,
2752+
WriteZeroAtOffset, VisitVirtualBase);
2753+
} else if (Ty->isAtomicType()) {
2754+
RecursivelyClearPaddingImpl(CGF, Ptr, Ty.getAtomicUnqualifiedType(),
2755+
CurrentStartOffset, RunningOffset,
2756+
WriteZeroAtOffset, true);
2757+
} else {
2758+
llvm::dbgs() << "increment running offset from: " << RunningOffset << " to "
2759+
<< RunningOffset + Size << '\n';
2760+
RunningOffset =
2761+
std::max(RunningOffset, CurrentStartOffset + static_cast<size_t>(Size));
2762+
}
2763+
}
2764+
2765+
static void RecursivelyClearPadding(CodeGenFunction &CGF, Value *Ptr,
2766+
QualType Ty) {
2767+
auto *I8Ptr = CGF.Builder.CreateBitCast(Ptr, CGF.Int8PtrTy);
2768+
auto *Zero = ConstantInt::get(CGF.Int8Ty, 0);
2769+
auto WriteZeroAtOffset = [&](uint64_t Offset) {
2770+
auto *Index = ConstantInt::get(CGF.IntTy, Offset);
2771+
auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
2772+
CGF.Builder.CreateAlignedStore(
2773+
Zero, Element,
2774+
CharUnits::One().alignmentAtOffset(CharUnits::fromQuantity(Offset)));
2775+
};
2776+
2777+
size_t RunningOffset = 0;
2778+
2779+
RecursivelyClearPaddingImpl(CGF, Ptr, Ty, 0, RunningOffset, WriteZeroAtOffset,
2780+
true);
2781+
2782+
// Clear tail padding
2783+
auto *Type = CGF.ConvertTypeForMem(Ty);
2784+
2785+
auto Size = CGF.CGM.getModule()
2786+
.getDataLayout()
2787+
.getTypeAllocSize(Type)
2788+
.getKnownMinValue();
2789+
2790+
llvm::dbgs() << "clear tail padding [" << RunningOffset << ", " << Size
2791+
<< ")\n";
2792+
for (; RunningOffset < Size; ++RunningOffset) {
2793+
WriteZeroAtOffset(RunningOffset);
2794+
}
2795+
}
2796+
25952797
RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
25962798
const CallExpr *E,
25972799
ReturnValueSlot ReturnValue) {
@@ -4839,6 +5041,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
48395041

48405042
return RValue::get(Ptr);
48415043
}
5044+
case Builtin::BI__builtin_clear_padding: {
5045+
const Expr *Op = E->getArg(0);
5046+
Value *Address = EmitScalarExpr(Op);
5047+
auto PointeeTy = Op->getType()->getPointeeType();
5048+
RecursivelyClearPadding(*this, Address, PointeeTy);
5049+
return RValue::get(nullptr);
5050+
}
48425051
case Builtin::BI__sync_fetch_and_add:
48435052
case Builtin::BI__sync_fetch_and_sub:
48445053
case Builtin::BI__sync_fetch_and_or:

clang/lib/Sema/SemaChecking.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2899,7 +2899,37 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
28992899
return BuiltinIsWithinLifetime(*this, TheCall);
29002900
case Builtin::BI__builtin_trivially_relocate:
29012901
return BuiltinTriviallyRelocate(*this, TheCall);
2902+
case Builtin::BI__builtin_clear_padding: {
2903+
const auto numArgs = TheCall->getNumArgs();
2904+
if (numArgs < 1) {
2905+
Diag(TheCall->getEndLoc(), diag::err_typecheck_call_too_few_args_one)
2906+
<< 0 /*function call*/ << "T*" << 0;
2907+
return ExprError();
2908+
}
2909+
if (numArgs > 1) {
2910+
Diag(TheCall->getEndLoc(), diag::err_typecheck_call_too_many_args_one)
2911+
<< 0 /*function call*/ << "T*" << numArgs << 0;
2912+
return ExprError();
2913+
}
29022914

2915+
const Expr *PtrArg = TheCall->getArg(0);
2916+
const QualType PtrArgType = PtrArg->getType();
2917+
if (!PtrArgType->isPointerType()) {
2918+
Diag(PtrArg->getBeginLoc(), diag::err_typecheck_convert_incompatible)
2919+
<< PtrArgType << "pointer" << 1 << 0 << 3 << 1 << PtrArgType
2920+
<< "pointer";
2921+
return ExprError();
2922+
}
2923+
if (PtrArgType->getPointeeType().isConstQualified()) {
2924+
Diag(PtrArg->getBeginLoc(), diag::err_typecheck_assign_const)
2925+
<< TheCall->getSourceRange() << 5 /*ConstUnknown*/;
2926+
return ExprError();
2927+
}
2928+
if (RequireCompleteType(PtrArg->getBeginLoc(), PtrArgType->getPointeeType(),
2929+
diag::err_typecheck_decl_incomplete_type))
2930+
return ExprError();
2931+
break;
2932+
}
29032933
case Builtin::BI__sync_fetch_and_add:
29042934
case Builtin::BI__sync_fetch_and_add_1:
29052935
case Builtin::BI__sync_fetch_and_add_2:
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
// RUN: %clang_cc1 -triple=x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s
2+
3+
struct alignas(4) Foo {
4+
char a;
5+
alignas(2) char b;
6+
};
7+
8+
struct alignas(4) Bar {
9+
char c;
10+
alignas(2) char d;
11+
};
12+
13+
struct alignas(4) Baz : Foo {
14+
char e;
15+
Bar f;
16+
};
17+
18+
// Baz structure:
19+
// "a", PAD_1, "b", PAD_2, "c", PAD_3, PAD_4, PAD_5, "c", PAD_6, "d", PAD_7
20+
// %struct.Baz = type { %struct.Foo, i8, [3 x i8], %struct.Bar }
21+
// %struct.Foo = type { i8, i8, i8, i8 }
22+
// %struct.Bar = type { i8, i8, i8, i8 }
23+
24+
// CHECK-LABEL: define void @_Z7testBazP3Baz(%struct.Baz* %baz)
25+
// CHECK: [[ADDR:%.*]] = alloca %struct.Baz*
26+
// CHECK: store %struct.Baz* %baz, %struct.Baz** [[ADDR]]
27+
// CHECK: [[BAZ:%.*]] = load %struct.Baz*, %struct.Baz** [[ADDR]]
28+
// CHECK: [[BAZ_RAW_PTR:%.*]] = bitcast %struct.Baz* [[BAZ]] to i8*
29+
30+
// CHECK: [[FOO_BASE:%.*]] = getelementptr inbounds %struct.Baz, %struct.Baz* [[BAZ]], i32 0, i32 0
31+
// CHECK: [[FOO_RAW_PTR:%.*]] = bitcast %struct.Foo* [[FOO_BASE]] to i8*
32+
// CHECK: [[PAD_1:%.*]] = getelementptr i8, i8* [[FOO_RAW_PTR]], i32 1
33+
// CHECK: store i8 0, i8* [[PAD_1]]
34+
// CHECK: [[PAD_2:%.*]] = getelementptr i8, i8* [[FOO_RAW_PTR]], i32 3
35+
// CHECK: store i8 0, i8* [[PAD_2]]
36+
37+
// CHECK: [[PAD_3:%.*]] = getelementptr i8, i8* [[BAZ_RAW_PTR]], i32 5
38+
// CHECK: store i8 0, i8* [[PAD_3]]
39+
// CHECK: [[PAD_4:%.*]] = getelementptr i8, i8* [[BAZ_RAW_PTR]], i32 6
40+
// CHECK: store i8 0, i8* [[PAD_4]]
41+
// CHECK: [[PAD_5:%.*]] = getelementptr i8, i8* [[BAZ_RAW_PTR]], i32 7
42+
// CHECK: store i8 0, i8* [[PAD_5]]
43+
44+
// CHECK: [[BAR_MEMBER:%.*]] = getelementptr inbounds %struct.Baz, %struct.Baz* [[BAZ]], i32 0, i32 3
45+
// CHECK: [[BAR_RAW_PTR:%.*]] = bitcast %struct.Bar* [[BAR_MEMBER]] to i8*
46+
// CHECK: [[PAD_6:%.*]] = getelementptr i8, i8* [[BAR_RAW_PTR]], i32 1
47+
// CHECK: store i8 0, i8* [[PAD_6]]
48+
// CHECK: [[PAD_7:%.*]] = getelementptr i8, i8* [[BAR_RAW_PTR]], i32 3
49+
// CHECK: store i8 0, i8* [[PAD_7]]
50+
// CHECK: ret void
51+
void testBaz(Baz *baz) {
52+
__builtin_clear_padding(baz);
53+
}
54+
55+
struct UnsizedTail {
56+
int size;
57+
alignas(8) char buf[];
58+
59+
UnsizedTail(int size) : size(size) {}
60+
};
61+
62+
// UnsizedTail structure:
63+
// "size", PAD_1, PAD_2, PAD_3, PAD_4
64+
// %struct.UnsizedTail = type { i32, [4 x i8], [0 x i8] }
65+
66+
// CHECK-LABEL: define void @_Z15testUnsizedTailP11UnsizedTail(%struct.UnsizedTail* %u)
67+
// CHECK: [[U_ADDR:%.*]] = alloca %struct.UnsizedTail*
68+
// CHECK: store %struct.UnsizedTail* %u, %struct.UnsizedTail** [[U_ADDR]]
69+
// CHECK: [[U:%.*]] = load %struct.UnsizedTail*, %struct.UnsizedTail** [[U_ADDR]]
70+
// CHECK: [[U_RAW_PTR:%.*]] = bitcast %struct.UnsizedTail* [[U]] to i8*
71+
// CHECK: [[PAD_1:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 4
72+
// CHECK: store i8 0, i8* [[PAD_1]]
73+
// CHECK: [[PAD_2:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 5
74+
// CHECK: store i8 0, i8* [[PAD_2]]
75+
// CHECK: [[PAD_3:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 6
76+
// CHECK: store i8 0, i8* [[PAD_3]]
77+
// CHECK: [[PAD_4:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 7
78+
// CHECK: store i8 0, i8* [[PAD_4]]
79+
// CHECK: ret void
80+
void testUnsizedTail(UnsizedTail *u) {
81+
__builtin_clear_padding(u);
82+
}
83+
84+
struct ArrOfStructsWithPadding {
85+
Bar bars[2];
86+
};
87+
88+
// ArrOfStructsWithPadding structure:
89+
// "c" (1), PAD_1, "d" (1), PAD_2, "c" (2), PAD_3, "d" (2), PAD_4
90+
// %struct.ArrOfStructsWithPadding = type { [2 x %struct.Bar] }
91+
92+
// CHECK-LABEL: define void @_Z27testArrOfStructsWithPaddingP23ArrOfStructsWithPadding(%struct.ArrOfStructsWithPadding* %arr)
93+
// CHECK: [[ARR_ADDR:%.*]] = alloca %struct.ArrOfStructsWithPadding*
94+
// CHECK: store %struct.ArrOfStructsWithPadding* %arr, %struct.ArrOfStructsWithPadding** [[ARR_ADDR]]
95+
// CHECK: [[ARR:%.*]] = load %struct.ArrOfStructsWithPadding*, %struct.ArrOfStructsWithPadding** [[ARR_ADDR]]
96+
// CHECK: [[BARS:%.*]] = getelementptr inbounds %struct.ArrOfStructsWithPadding, %struct.ArrOfStructsWithPadding* [[ARR]], i32 0, i32 0
97+
// CHECK: [[FIRST:%.*]] = getelementptr inbounds [2 x %struct.Bar], [2 x %struct.Bar]* [[BARS]], i64 0, i64 0
98+
// CHECK: [[FIRST_RAW_PTR:%.*]] = bitcast %struct.Bar* [[FIRST]] to i8*
99+
// CHECK: [[PAD_1:%.*]] = getelementptr i8, i8* [[FIRST_RAW_PTR]], i32 1
100+
// CHECK: store i8 0, i8* [[PAD_1]]
101+
// CHECK: [[PAD_2:%.*]] = getelementptr i8, i8* %4, i32 3
102+
// CHECK: store i8 0, i8* [[PAD_2]]
103+
// CHECK: [[SECOND:%.*]] = getelementptr inbounds [2 x %struct.Bar], [2 x %struct.Bar]* [[BARS]], i64 0, i64 1
104+
// CHECK: [[SECOND_RAW_PTR:%.*]] = bitcast %struct.Bar* [[SECOND]] to i8*
105+
// CHECK: [[PAD_3:%.*]] = getelementptr i8, i8* [[SECOND_RAW_PTR]], i32 1
106+
// CHECK: store i8 0, i8* [[PAD_3]]
107+
// CHECK: [[PAD_4:%.*]] = getelementptr i8, i8* [[SECOND_RAW_PTR]], i32 3
108+
// CHECK: store i8 0, i8* [[PAD_4]]
109+
// CHECK: ret void
110+
void testArrOfStructsWithPadding(ArrOfStructsWithPadding *arr) {
111+
__builtin_clear_padding(arr);
112+
}

0 commit comments

Comments
 (0)