LLVM/project 458b1e9 — clang/lib/CodeGen CodeGenTBAA.cpp CodeGenTBAA.h, clang/test/CXX/drs cwg158.cpp
[TBAA] Refine pointer-tbaa for void pointers by pointer depth (#126047) Commit 77d3f8a avoids distinct tags for any pointers where the ultimate pointee type is `void`, to solve breakage in real-world code that uses (indirections to) `void*` for polymorphism over different pointer types. While this matches the TBAA implementation in GCC, this patch implements a refinement that distinguishes void pointers by pointer depth, as described in the "strict aliasing" documentation included in the aforementioned commit: > `void*` is permitted to alias any pointer type, `void**` is permitted > to alias any pointer to pointer type, and so on. For example, `void**` is no longer considered to alias `int*` in this refinement, but it remains possible to use `void**` for polymorphism over pointers to pointers.
Delta | File | |
---|---|---|
+41 | -6 | clang/lib/CodeGen/CodeGenTBAA.cpp |
+14 | -8 | clang/test/CodeGen/tbaa-pointers.c |
+5 | -0 | clang/lib/CodeGen/CodeGenTBAA.h |
+2 | -1 | clang/test/CXX/drs/cwg158.cpp |
+62 | -15 | 4 files |
@@ -80,6 +80,42 @@ llvm::MDNode *CodeGenTBAA::ge | |||
return Char; | return Char; | ||
} | } | ||
+llvm::MDNode *CodeGenTBAA::getAnyPtr(unsigned PtrDepth) { | |||
+ assert(PtrDepth >= 1 && "Pointer must have some depth"); | |||
+ | |||
+ // Populate at least PtrDepth elements in AnyPtrs. These are the type nodes | |||
+ // for "any" pointers of increasing pointer depth, and are organized in the | |||
+ // hierarchy: any pointer <- any p2 pointer <- any p3 pointer <- ... | |||
+ // | |||
+ // Note that AnyPtrs[Idx] is actually the node for pointer depth (Idx+1), | |||
+ // since there is no node for pointer depth 0. | |||
+ // | |||
+ // These "any" pointer type nodes are used in pointer TBAA. The type node of | |||
+ // a concrete pointer type has the "any" pointer type node of appropriate | |||
+ // pointer depth as its parent. The "any" pointer type nodes are also used | |||
+ // directly for accesses to void pointers, or to specific pointers that we | |||
+ // conservatively do not distinguish in pointer TBAA (e.g. pointers to | |||
+ // members). Essentially, this establishes that e.g. void** can alias with | |||
+ // any type that can unify with T**, ignoring things like qualifiers. Here, T | |||
+ // is a variable that represents an arbitrary type, including pointer types. | |||
+ // As such, each depth is naturally a subtype of the previous depth, and thus | |||
+ // transitively of all previous depths. | |||
+ if (AnyPtrs.size() < PtrDepth) { | |||
+ AnyPtrs.reserve(PtrDepth); | |||
+ auto Size = Module.getDataLayout().getPointerSize(); | |||
+ // Populate first element. | |||
+ if (AnyPtrs.empty()) | |||
+ AnyPtrs.push_back(createScalarType | |||
+ // Populate further elements. | |||
+ for (size_t Idx = AnyPtrs.size(); Idx < PtrDepth; ++Idx) { | |||
+ auto Name = ("any p" + llvm::Twine(Idx + 1) + " pointer").str(); | |||
+ AnyPtrs.push_back(createScalarType | |||
+ } | |||
+ } | |||
+ | |||
+ return AnyPtrs[PtrDepth - 1]; | |||
+} | |||
+ | |||
static bool TypeHasMayAlias(QualType QTy) { | static bool TypeHasMayAlias(QualType QTy) { | ||
// Tagged types have declarations, and therefore may have attributes. | // Tagged types have declarations, and therefore may have attributes. | ||
if (auto *TD = QTy->getAsTagDecl()) | if (auto *TD = QTy->getAsTagDecl()) | ||
@@ -202,9 +238,8 @@ llvm::MDNode *CodeGenTBAA::ge | |||
// they involve a significant representation difference. We don't | // they involve a significant representation difference. We don't | ||
// currently do so, however. | // currently do so, however. | ||
if (Ty->isPointerType() || Ty->isReferenceType()) { | if (Ty->isPointerType() || Ty->isReferenceType()) { | ||
- llvm::MDNode *AnyPtr = createScalarType | |||
if (!CodeGenOpts.PointerTBAA) | if (!CodeGenOpts.PointerTBAA) | ||
- return | + return getAnyPtr(); | ||
// C++ [basic.lval]p11 permits objects to accessed through an l-value of | // C++ [basic.lval]p11 permits objects to accessed through an l-value of | ||
// similar type. Two types are similar under C++ [conv.qual]p2 if the | // similar type. Two types are similar under C++ [conv.qual]p2 if the | ||
// decomposition of the types into pointers, member pointers, and arrays has | // decomposition of the types into pointers, member pointers, and arrays has | ||
@@ -232,7 +267,7 @@ llvm::MDNode *CodeGenTBAA::ge | |||
// common idioms and there is no good alternative to re-write the code | // common idioms and there is no good alternative to re-write the code | ||
// without strict-aliasing violations. | // without strict-aliasing violations. | ||
if (Ty->isVoidType()) | if (Ty->isVoidType()) | ||
- return | + return getAnyPtr(PtrDepth); | ||
assert(!isa<VariableArrayTyp | assert(!isa<VariableArrayTyp | ||
// When the underlying type is a builtin type, we compute the pointee type | // When the underlying type is a builtin type, we compute the pointee type | ||
@@ -256,7 +291,7 @@ llvm::MDNode *CodeGenTBAA::ge | |||
// similar-types rule. | // similar-types rule. | ||
const auto *RT = Ty->getAs<RecordType>(); | const auto *RT = Ty->getAs<RecordType>(); | ||
if (!RT) | if (!RT) | ||
- return | + return getAnyPtr(PtrDepth); | ||
// For unnamed structs or unions C's compatible types rule applies. Two | // For unnamed structs or unions C's compatible types rule applies. Two | ||
// compatible types in different compilation units can have different | // compatible types in different compilation units can have different | ||
@@ -270,7 +305,7 @@ llvm::MDNode *CodeGenTBAA::ge | |||
// compatibility rule, but it doesn't matter because you can never have a | // compatibility rule, but it doesn't matter because you can never have a | ||
// pointer to an anonymous struct or union. | // pointer to an anonymous struct or union. | ||
if (!RT->getDecl()->getDeclName()) | if (!RT->getDecl()->getDeclName()) | ||
- return | + return getAnyPtr(PtrDepth); | ||
// For non-builtin types use the mangled name of the canonical type. | // For non-builtin types use the mangled name of the canonical type. | ||
llvm::raw_svector_ostr | llvm::raw_svector_ostr | ||
@@ -281,7 +316,7 @@ llvm::MDNode *CodeGenTBAA::ge | |||
OutName += std::to_string(PtrDepth); | OutName += std::to_string(PtrDepth); | ||
OutName += " "; | OutName += " "; | ||
OutName += TyName; | OutName += TyName; | ||
- return createScalarTypeNode(OutName, | + return createScalarTypeNode(OutName, getAnyPtr(PtrDepth), Size); | ||
} | } | ||
// Accesses to arrays are accesses to objects of their element types. | // Accesses to arrays are accesses to objects of their element types. |
@@ -139,6 +139,7 @@ class CodeGenTBAA { | |||
llvm::MDNode *Root; | llvm::MDNode *Root; | ||
llvm::MDNode *Char; | llvm::MDNode *Char; | ||
+ llvm::SmallVector<llvm::MDNode *, 4> AnyPtrs; | |||
/// getRoot - This is the mdnode for the root of the metadata type graph | /// getRoot - This is the mdnode for the root of the metadata type graph | ||
/// for this translation unit. | /// for this translation unit. | ||
@@ -148,6 +149,10 @@ class CodeGenTBAA { | |||
/// considered to be equivalent to it. | /// considered to be equivalent to it. | ||
llvm::MDNode *getChar(); | llvm::MDNode *getChar(); | ||
+ /// getAnyPtr - This is the mdnode for any pointer type of (at least) the | |||
+ /// given pointer depth. | |||
+ llvm::MDNode *getAnyPtr(unsigned PtrDepth = 1); | |||
+ | |||
/// CollectFields - Collect information about the fields of a type for | /// CollectFields - Collect information about the fields of a type for | ||
/// !tbaa.struct metadata formation. Return false for an unsupported type. | /// !tbaa.struct metadata formation. Return false for an unsupported type. | ||
bool CollectFields(uint64_t BaseOffset, | bool CollectFields(uint64_t BaseOffset, |
@@ -42,5 +42,6 @@ const int * h(const int * (*p)[10], int *(*q)[9]) { | |||
} | } | ||
// POINTER-TBAA: [[PTRARRAY_TBAA]] = !{[[PTRARRAY_TY:!.+]], [[PTRARRAY_TY]], i64 0} | // POINTER-TBAA: [[PTRARRAY_TBAA]] = !{[[PTRARRAY_TY:!.+]], [[PTRARRAY_TY]], i64 0} | ||
-// POINTER-TBAA: [[PTRARRAY_TY]] = !{!"p2 int", [[ | +// POINTER-TBAA: [[PTRARRAY_TY]] = !{!"p2 int", [[ANYP2PTR:!.+]], i64 0} | ||
+// POINTER-TBAA: [[ANYP2PTR]] = !{!"any p2 pointer", [[ANYPTR:!.+]], | |||
// POINTER-TBAA: [[ANYPTR]] = !{!"any pointer" | // POINTER-TBAA: [[ANYPTR]] = !{!"any pointer" |
@@ -208,8 +208,10 @@ int void_ptrs(void **ptr) { | |||
// COMMON-LABEL: define i32 @void_ptrs( | // COMMON-LABEL: define i32 @void_ptrs( | ||
// COMMON-SAME: ptr noundef [[PTRA:%.+]]) | // COMMON-SAME: ptr noundef [[PTRA:%.+]]) | ||
// COMMON: [[PTR_ADDR:%.+]] = alloca ptr, align 8 | // COMMON: [[PTR_ADDR:%.+]] = alloca ptr, align 8 | ||
-// | +// DISABLE-NEXT: store ptr [[PTRA]], ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR]] | ||
-// | +// DEFAULT-NEXT: store ptr [[PTRA]], ptr [[PTR_ADDR]], align 8, !tbaa [[ANYP2:!.+]] | ||
+// DISABLE-NEXT: [[L0:%.+]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR]] | |||
+// DEFAULT-NEXT: [[L0:%.+]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[ANYP2]] | |||
// COMMON-NEXT: [[L1:%.+]] = load ptr, ptr [[L0]], align 8, !tbaa [[ANYPTR]] | // COMMON-NEXT: [[L1:%.+]] = load ptr, ptr [[L0]], align 8, !tbaa [[ANYPTR]] | ||
// COMMON-NEXT: [[BOOL:%.+]] = icmp ne ptr [[L1]], null | // COMMON-NEXT: [[BOOL:%.+]] = icmp ne ptr [[L1]], null | ||
// COMMON-NEXT: [[BOOL_EXT:%.+]] = zext i1 [[BOOL]] to i64 | // COMMON-NEXT: [[BOOL_EXT:%.+]] = zext i1 [[BOOL]] to i64 | ||
@@ -220,7 +222,8 @@ int void_ptrs(void **ptr) { | |||
} | } | ||
// DEFAULT: [[P2INT_0]] = !{[[P2INT:!.+]], [[P2INT]], i64 0} | // DEFAULT: [[P2INT_0]] = !{[[P2INT:!.+]], [[P2INT]], i64 0} | ||
-// DEFAULT: [[P2INT]] = !{!"p2 int", [[ | +// DEFAULT: [[P2INT]] = !{!"p2 int", [[ANY_P2_POINTER:!.+]], i64 0} | ||
+// DEFAULT: [[ANY_P2_POINTER]] = !{!"any p2 pointer", [[ANY_POINTER:!.+]], i64 0} | |||
// DISABLE: [[ANYPTR]] = !{[[ANY_POINTER:!.+]], [[ANY_POINTER]], i64 0} | // DISABLE: [[ANYPTR]] = !{[[ANY_POINTER:!.+]], [[ANY_POINTER]], i64 0} | ||
// COMMON: [[ANY_POINTER]] = !{!"any pointer", [[CHAR:!.+]], i64 0} | // COMMON: [[ANY_POINTER]] = !{!"any pointer", [[CHAR:!.+]], i64 0} | ||
// COMMON: [[CHAR]] = !{!"omnipotent char", [[TBAA_ROOT:!.+]], i64 0} | // COMMON: [[CHAR]] = !{!"omnipotent char", [[TBAA_ROOT:!.+]], i64 0} | ||
@@ -228,17 +231,19 @@ int void_ptrs(void **ptr) { | |||
// DEFAULT: [[P1INT_0]] = !{[[P1INT:!.+]], [[P1INT]], i64 0} | // DEFAULT: [[P1INT_0]] = !{[[P1INT:!.+]], [[P1INT]], i64 0} | ||
// DEFAULT: [[P1INT]] = !{!"p1 int", [[ANY_POINTER]], i64 0} | // DEFAULT: [[P1INT]] = !{!"p1 int", [[ANY_POINTER]], i64 0} | ||
// DEFAULT: [[P3INT_0]] = !{[[P3INT:!.+]], [[P3INT]], i64 0} | // DEFAULT: [[P3INT_0]] = !{[[P3INT:!.+]], [[P3INT]], i64 0} | ||
-// DEFAULT: [[P3INT]] = !{!"p3 int", [[ | +// DEFAULT: [[P3INT]] = !{!"p3 int", [[ANY_P3_POINTER:!.+]], i64 0} | ||
+// DEFAULT: [[ANY_P3_POINTER]] = !{!"any p3 pointer", [[ANY_P2_POINTER]], i64 0} | |||
// DEFAULT: [[P4CHAR_0]] = !{[[P4CHAR:!.+]], [[P4CHAR]], i64 0} | // DEFAULT: [[P4CHAR_0]] = !{[[P4CHAR:!.+]], [[P4CHAR]], i64 0} | ||
-// DEFAULT: [[P4CHAR]] = !{!"p4 omnipotent char", [[ | +// DEFAULT: [[P4CHAR]] = !{!"p4 omnipotent char", [[ANY_P4_POINTER:!.*]], i64 0} | ||
+// DEFAULT: [[ANY_P4_POINTER]] = !{!"any p4 pointer", [[ANY_P3_POINTER]], i64 0} | |||
// DEFAULT: [[P3CHAR_0]] = !{[[P3CHAR:!.+]], [[P3CHAR]], i64 0} | // DEFAULT: [[P3CHAR_0]] = !{[[P3CHAR:!.+]], [[P3CHAR]], i64 0} | ||
-// DEFAULT: [[P3CHAR]] = !{!"p3 omnipotent char", [[ | +// DEFAULT: [[P3CHAR]] = !{!"p3 omnipotent char", [[ANY_P3_POINTER]], i64 0} | ||
// DEFAULT: [[P2CHAR_0]] = !{[[P2CHAR:!.+]], [[P2CHAR]], i64 0} | // DEFAULT: [[P2CHAR_0]] = !{[[P2CHAR:!.+]], [[P2CHAR]], i64 0} | ||
-// DEFAULT: [[P2CHAR]] = !{!"p2 omnipotent char", [[ | +// DEFAULT: [[P2CHAR]] = !{!"p2 omnipotent char", [[ANY_P2_POINTER]], i64 0} | ||
// DEFAULT: [[P1CHAR_0]] = !{[[P1CHAR:!.+]], [[P1CHAR]], i64 0} | // DEFAULT: [[P1CHAR_0]] = !{[[P1CHAR:!.+]], [[P1CHAR]], i64 0} | ||
// DEFAULT: [[P1CHAR]] = !{!"p1 omnipotent char", [[ANY_POINTER]], i64 0} | // DEFAULT: [[P1CHAR]] = !{!"p1 omnipotent char", [[ANY_POINTER]], i64 0} | ||
// DEFAULT: [[P2S1_TAG]] = !{[[P2S1:!.+]], [[P2S1]], i64 0} | // DEFAULT: [[P2S1_TAG]] = !{[[P2S1:!.+]], [[P2S1]], i64 0} | ||
-// DEFAULT: [[P2S1]] = !{!"p2 _ZTS2S1", [[ | +// DEFAULT: [[P2S1]] = !{!"p2 _ZTS2S1", [[ANY_P2_POINTER]], i64 0} | ||
// DEFAULT: [[P1S1_TAG:!.+]] = !{[[P1S1:!.+]], [[P1S1]], i64 0} | // DEFAULT: [[P1S1_TAG:!.+]] = !{[[P1S1:!.+]], [[P1S1]], i64 0} | ||
// DEFAULT: [[P1S1]] = !{!"p1 _ZTS2S1", [[ANY_POINTER]], i64 0} | // DEFAULT: [[P1S1]] = !{!"p1 _ZTS2S1", [[ANY_POINTER]], i64 0} | ||
// DEFAULT: [[P1S2_TAG]] = !{[[P1S2:!.+]], [[P1S2]], i64 0} | // DEFAULT: [[P1S2_TAG]] = !{[[P1S2:!.+]], [[P1S2]], i64 0} | ||
@@ -251,3 +256,4 @@ int void_ptrs(void **ptr) { | |||
// COMMON: [[INT_TAG]] = !{[[INT_TY:!.+]], [[INT_TY]], i64 0} | // COMMON: [[INT_TAG]] = !{[[INT_TY:!.+]], [[INT_TY]], i64 0} | ||
// COMMON: [[INT_TY]] = !{!"int", [[CHAR]], i64 0} | // COMMON: [[INT_TY]] = !{!"int", [[CHAR]], i64 0} | ||
// DEFAULT: [[ANYPTR]] = !{[[ANY_POINTER]], [[ANY_POINTER]], i64 0} | // DEFAULT: [[ANYPTR]] = !{[[ANY_POINTER]], [[ANY_POINTER]], i64 0} | ||
+// DEFAULT: [[ANYP2]] = !{[[ANY_P2_POINTER]], [[ANY_P2_POINTER]], i64 0} |