Skip to content

Commit d0a34ef

Browse files
bgajdaINTCsys_zuul
authored andcommitted
Convert LShr+And pattern to EE for later promotion to mov/region.
From: %28 = and i32 %24, 255 %29 = lshr i32 %24, 8 %30 = and i32 %29, 255 %res1 = mul nuw nsw i32 %28, 9798 %res2 = mul nuw nsw i32 %30, 19235 ... shr (16|M0) r32.0<1>:d r16.0<8;8,1>:ud 8:w and (16|M0) r32.0<1>:d r32.0<8;8,1>:d 255:w mul (16|M0) r32.0<1>:d r32.0<8;8,1>:d 19235:w To: %temp = bitcast i32 %24 to <4 x i8> %ee1 = extractelement <4 x i8> %temp, i32 0 %ee2 = extractelement <4 x i8> %temp, i32 1 %28 = zext i8 %ee1 to i32 %30 = zext i8 %ee2 to i32 ... mul (16|M0) r32.0<1>:d r16.1<4;1,0>:ub 19235:w And similar pattern: From: %shl = shl i32 %129, 16 %132 = ashr exact i32 %shl, 16 %133 = ashr i32 %129, 16 To: %temp = bitcast i32 %129 to <2 x i16> %ee1 = extractelement <2 x i16> %temp, i32 0 %ee2 = extractelement <2 x i16> %temp, i32 1 %132 = sext i8 %ee1 to i32 %133 = sext i8 %ee2 to i32 Change-Id: Ia8b09bdd07ee4cc72004539ea88f0a1e27d3e796
1 parent 342359b commit d0a34ef

File tree

2 files changed

+108
-0
lines changed

2 files changed

+108
-0
lines changed

IGC/Compiler/CustomSafeOptPass.cpp

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2273,7 +2273,114 @@ void GenSpecificPattern::visitBinaryOperator(BinaryOperator& I)
22732273
{
22742274
createBitcastExtractInsertPattern(I, nullptr, I.getOperand(0), 0, 1);
22752275
}
2276+
else
2277+
{
2278+
2279+
Instruction* AndSrc = nullptr;
2280+
ConstantInt* CI;
2281+
2282+
/*
2283+
From:
2284+
%28 = and i32 %24, 255
2285+
%29 = lshr i32 %24, 8
2286+
%30 = and i32 %29, 255
2287+
%31 = lshr i32 %24, 16
2288+
%32 = and i32 %31, 255
2289+
2290+
To:
2291+
%temp = bitcast i32 %24 to <4 x i8>
2292+
%ee1 = extractelement <4 x i8> %temp, i32 0
2293+
%ee2 = extractelement <4 x i8> %temp, i32 1
2294+
%ee3 = extractelement <4 x i8> %temp, i32 2
2295+
%28 = zext i8 %ee1 to i32
2296+
%30 = zext i8 %ee2 to i32
2297+
%32 = zext i8 %ee3 to i32
2298+
2299+
2300+
*/
2301+
auto pattern_And_0xFF = m_And(m_Instruction(AndSrc), m_SpecificInt(0xFF));
2302+
2303+
CodeGenContext* ctx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
2304+
bool bytesAllowed = IGC_IS_FLAG_ENABLED(EnableMixInt8Operands) && ctx->platform.supportByteALUOperation();
2305+
2306+
if (bytesAllowed && match(&I, pattern_And_0xFF) && I.getType()->isIntegerTy(32) && AndSrc->getType()->isIntegerTy(32))
2307+
{
2308+
Instruction* LhsSrc = nullptr;
2309+
2310+
auto LShr_Pattern = m_LShr(m_Instruction(LhsSrc), m_ConstantInt(CI));
2311+
bool LShrMatch = match(AndSrc, LShr_Pattern) && LhsSrc->getType()->isIntegerTy(32) && (CI->getZExtValue() % 8 == 0);
2312+
2313+
// in case there's no shr, it will be 0
2314+
uint32_t newIndex = 0;
2315+
2316+
if (LShrMatch) // extract inner
2317+
{
2318+
AndSrc = LhsSrc;
2319+
newIndex = (uint32_t)CI->getZExtValue() / 8;
2320+
}
2321+
2322+
llvm::IRBuilder<> builder(&I);
2323+
VectorType* vec4 = VectorType::get(builder.getInt8Ty(), 4);
2324+
Value* BC = builder.CreateBitCast(AndSrc, vec4);
2325+
Value* EE = builder.CreateExtractElement(BC, builder.getInt32(newIndex));
2326+
Value* Zext = builder.CreateZExt(EE, builder.getInt32Ty());
2327+
I.replaceAllUsesWith(Zext);
2328+
I.eraseFromParent();
2329+
2330+
}
2331+
2332+
}
22762333
}
2334+
else if (I.getOpcode() == Instruction::AShr)
2335+
{
2336+
/*
2337+
From:
2338+
%129 = i32...
2339+
%Temp = shl i32 %129, 16
2340+
%132 = ashr exact i32 %Temp, 16
2341+
%133 = ashr i32 %129, 16
2342+
2343+
To:
2344+
%129 = i32...
2345+
%temp = bitcast i32 %129 to <2 x i16>
2346+
%ee1 = extractelement <2 x i16> %temp, i32 0
2347+
%ee2 = extractelement <2 x i16> %temp, i32 1
2348+
%132 = sext i8 %ee1 to i32
2349+
%133 = sext i8 %ee2 to i32
2350+
Which will end up as regioning instead of 2 isntr.
2351+
*/
2352+
using namespace llvm::PatternMatch;
2353+
2354+
Instruction* AShrSrc = nullptr;
2355+
auto pattern_1 = m_AShr(m_Instruction(AShrSrc), m_SpecificInt(16));
2356+
2357+
if (match(&I, pattern_1) && I.getType()->isIntegerTy(32) && AShrSrc->getType()->isIntegerTy(32))
2358+
{
2359+
Instruction* ShlSrc = nullptr;
2360+
2361+
auto Shl_Pattern = m_Shl(m_Instruction(ShlSrc), m_SpecificInt(16));
2362+
bool submatch = match(AShrSrc, Shl_Pattern) && ShlSrc->getType()->isIntegerTy(32);
2363+
2364+
// in case there's no shr, we take upper half
2365+
uint32_t newIndex = 1;
2366+
2367+
// if there was Shl, we take lower half
2368+
if (submatch)
2369+
{
2370+
AShrSrc = ShlSrc;
2371+
newIndex = 0;
2372+
}
2373+
2374+
llvm::IRBuilder<> builder(&I);
2375+
VectorType* vec2 = VectorType::get(builder.getInt16Ty(), 2);
2376+
Value* BC = builder.CreateBitCast(AShrSrc, vec2);
2377+
Value* EE = builder.CreateExtractElement(BC, builder.getInt32(newIndex));
2378+
Value* Sext = builder.CreateSExt(EE, builder.getInt32Ty());
2379+
I.replaceAllUsesWith(Sext);
2380+
I.eraseFromParent();
2381+
}
2382+
}
2383+
22772384
}
22782385

22792386
void GenSpecificPattern::visitCmpInst(CmpInst& I)

IGC/common/igc_flags.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,7 @@ DECLARE_IGC_REGKEY(bool, cl_khr_srgb_image_writes, false, "Enable cl_khr_sr
380380
DECLARE_IGC_REGKEY(bool, MSAA16BitPayloadEnable, true, "Enable support for MSAA 16 bit payload , a hardware DCN supporting this from ICL+ to improve perf on MSAA workloads", false)
381381
DECLARE_IGC_REGKEY(bool, EnableInsertElementScalarCoalescing, false, "Enable coalescing on the scalar operand of insertelement", false)
382382
DECLARE_IGC_REGKEY(bool, EnableMixIntOperands, true, "Enable generating mix-sized operands for int ALU", false)
383+
DECLARE_IGC_REGKEY(bool, EnableMixInt8Operands, true, "Enable generating bytes in mix-sized operands for int ALU", false)
383384
DECLARE_IGC_REGKEY(bool, PixelShaderDoNotAbortOnSpill, false, "Do not abort on a spill", false)
384385
DECLARE_IGC_REGKEY(DWORD, ForcePixelShaderSIMDMode, 0, "Setting it to values def in igc.h will force SIMD mode compilation for pixel shaders. Note that only SIMD8 is compiled unless other ForcePixelShaderSIMD* are also selected", false)
385386
DECLARE_IGC_REGKEY(DWORD, StagedCompilationExperiments, 0, "Experiment with staged compilation when != 0", false)

0 commit comments

Comments
 (0)