Skip to content

Commit 835c71e

Browse files
pkwasnie-intelfda0
authored andcommitted
AlignmentAnalysis: set correct dst alignment for memcpy from
global constant If memcpy source is zeroinitialized constant global, memcpy is equivalent to memset to zero. In this case alignment can be set based only on destination. This change also removes early run of LLVM's MemCpy Optimization pass when Khronos SPIR-V translator is used, as there is no longer need to change memcpy to memsets before AlignmentAnalysis. (cherry picked from commit 10bc149)
1 parent 5639d3f commit 835c71e

File tree

4 files changed

+76
-3
lines changed

4 files changed

+76
-3
lines changed

IGC/AdaptorOCL/UnifyIROCL.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,6 @@ static void CommonOCLBasedPasses(
331331
#ifdef IGC_SCALAR_USE_KHRONOS_SPIRV_TRANSLATOR
332332
mpmSPIR.add(new PreprocessSPVIR());
333333
mpmSPIR.add(new PromoteBools());
334-
mpmSPIR.add(llvm::createMemCpyOptPass());
335334
#endif // IGC_SCALAR_USE_KHRONOS_SPIRV_TRANSLATOR
336335
mpmSPIR.add(new TypesLegalizationPass());
337336
mpmSPIR.add(new TargetLibraryInfoWrapperPass());

IGC/Compiler/Optimizer/OpenCLPasses/AlignmentAnalysis/AlignmentAnalysis.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,32 @@ void AlignmentAnalysis::SetInstAlignment(MemSetInst& I)
474474

475475
void AlignmentAnalysis::SetInstAlignment(MemCpyInst& I)
476476
{
477+
std::function<bool(Value*)> isConstGlobalZero = [&](Value* V)
478+
{
479+
if (auto* GEP = dyn_cast<GetElementPtrInst>(V))
480+
return isConstGlobalZero(GEP->getPointerOperand());
481+
482+
if (auto* GV = dyn_cast<GlobalVariable>(V))
483+
{
484+
if (!GV->isConstant())
485+
return false;
486+
487+
if (auto* initializer = GV->getInitializer())
488+
return initializer->isZeroValue();
489+
}
490+
491+
return false;
492+
};
493+
494+
// If memcpy source is zeroinitialized constant global, memcpy is equivalent to memset to zero.
495+
// In this case, we can set the alignment of memcpy to the alignment of its destination only.
496+
if (isConstGlobalZero(I.getRawSource()))
497+
{
498+
auto alignment = IGCLLVM::Max(IGCLLVM::getDestAlign(I), IGCLLVM::Align(getAlignValue(I.getRawDest())));
499+
I.setDestAlignment(alignment);
500+
return;
501+
}
502+
477503
// Set the align attribute of the memcpy based on the minimum alignment of its source and dest fields
478504
auto minRawAlignment = iSTD::Min(getAlignValue(I.getRawDest()), getAlignValue(I.getRawSource()));
479505
auto alignment = IGCLLVM::Max(IGCLLVM::getDestAlign(I), IGCLLVM::Align(minRawAlignment));
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2024 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
; RUN: igc_opt --igc-fix-alignment -S < %s 2>&1 | FileCheck %s
10+
11+
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-n8:16:32"
12+
target triple = "spir64-unknown-unknown"
13+
14+
@globalConstantZero = internal unnamed_addr addrspace(2) constant [8 x i8] zeroinitializer
15+
@globalNonconstantZero = internal unnamed_addr addrspace(2) global [8 x i8] zeroinitializer
16+
17+
; memcpy from a zeroinitialized constant global is equivalent to memset to zero.
18+
; In this case, we can set the alignment of memcpy based only on destination pointer.
19+
;
20+
; CHECK-LABEL: @memcpy_from_global_constant_zero(
21+
; CHECK: call void @llvm.memcpy.p0i8.p2i8.i64(i8* align 8 %2, i8 addrspace(2)* %3, i64 8, i1 false)
22+
define void @memcpy_from_global_constant_zero() {
23+
entry:
24+
%0 = alloca [5 x [6 x double]], align 8
25+
%1 = getelementptr inbounds [5 x [6 x double]], [5 x [6 x double]]* %0, i64 0, i64 0, i64 0
26+
%2 = bitcast double* %1 to i8*
27+
%3 = getelementptr inbounds [8 x i8], [8 x i8] addrspace(2)* @globalConstantZero, i32 0, i32 0
28+
call void @llvm.memcpy.p0i8.p2i8.i64(i8* %2, i8 addrspace(2)* %3, i64 8, i1 false)
29+
ret void
30+
}
31+
32+
; memcpy from a non-constant global has to be aligned based on both source and destination pointers.
33+
;
34+
; CHECK-LABEL: @memcpy_from_global_nonconstant_zero(
35+
; CHECK: call void @llvm.memcpy.p0i8.p2i8.i64(i8* align 1 %2, i8 addrspace(2)* %3, i64 8, i1 false)
36+
define void @memcpy_from_global_nonconstant_zero() {
37+
entry:
38+
%0 = alloca [5 x [6 x double]], align 8
39+
%1 = getelementptr inbounds [5 x [6 x double]], [5 x [6 x double]]* %0, i64 0, i64 0, i64 0
40+
%2 = bitcast double* %1 to i8*
41+
%3 = getelementptr inbounds [8 x i8], [8 x i8] addrspace(2)* @globalNonconstantZero, i32 0, i32 0
42+
call void @llvm.memcpy.p0i8.p2i8.i64(i8* %2, i8 addrspace(2)* %3, i64 8, i1 false)
43+
ret void
44+
}
45+
46+
declare void @llvm.memcpy.p0i8.p2i8.i64(i8* noalias nocapture writeonly, i8 addrspace(2)* noalias nocapture readonly, i64, i1 immarg)

IGC/ocloc_tests/SPIRV-Asm/OpCopyMemorySized_alignment.spvasm

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
; Test if in both cases AlignmentAnalysis pass assigns correct allignment to pointer.
1010

1111
; REQUIRES: regkeys, spirv-as, pvc-supported
12+
; UNSUPPORTED: legacy-translator
1213
; RUN: spirv-as --target-env spv1.0 -o %t.spv %s
1314
; RUN: ocloc compile -spirv_input -file %t.spv -device pvc -options " -igc_opts 'PrintToConsole=1 PrintAfter=AlignmentAnalysisPass'" 2>&1 | FileCheck %s
1415

@@ -55,6 +56,7 @@
5556
; CHECK-LABEL: @test(
5657
; CHECK-NOT: call void @llvm.memcpy
5758
; CHECK: [[ALLOCA:%.*]] = alloca %struct.complex
58-
; CHECK: [[PTR:%.*]] = bitcast %struct.complex* [[ALLOCA]] to i8*
59-
; CHECK: call void @llvm.memset.p0i8.i64(i8* align 8 [[PTR]], i8 0, i64 16, i1 false)
59+
; CHECK: [[DST:%.*]] = bitcast %struct.complex* [[ALLOCA]] to i8*
60+
; CHECK: [[SRC:%.*]] = getelementptr inbounds [16 x i8], [16 x i8] addrspace(2)* @0, i32 0, i32 0
61+
; CHECK: call void @llvm.memcpy.p0i8.p2i8.i64(i8* align 8 [[DST]], i8 {{.*}}[[SRC]], i64 16, i1 false)
6062
; CHECK: ret void

0 commit comments

Comments
 (0)