Skip to content

Commit 0876553

Browse files
committed
Prevent 2d block loads with dimensions larger than the tensor block size
1 parent 96f96e9 commit 0876553

File tree

1 file changed

+18
-0
lines changed

1 file changed

+18
-0
lines changed

third_party/intel/lib/TritonIntelGPUToLLVM/LoadStoreOpToLLVM.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1628,6 +1628,24 @@ struct LoadOpConversion
16281628
numOperandsInnerDimPerLoad =
16291629
isOperandA ? numOperandsPer2DloadN : numOperandsPer2DLoadM;
16301630

1631+
// downscale if the load size is bigger than the block size
1632+
LLVM_DEBUG({
1633+
llvm::dbgs() << "numOperandsOuterDimPerLoad before downscaling = "
1634+
<< numOperandsOuterDimPerLoad << "\n";
1635+
llvm::dbgs() << "numOperandsInnerDimPerLoad before downscaling = "
1636+
<< numOperandsInnerDimPerLoad << "\n";
1637+
});
1638+
numOperandsOuterDimPerLoad =
1639+
std::max(std::min(numOperandsOuterDimPerLoad,
1640+
static_cast<unsigned>(tensorShape[dimOuter] /
1641+
elemsPerDPASInst[0])),
1642+
1u);
1643+
numOperandsInnerDimPerLoad =
1644+
std::max(std::min(numOperandsInnerDimPerLoad,
1645+
static_cast<unsigned>(tensorShape[dimInner] /
1646+
elemsPerDPASInst[1])),
1647+
1u);
1648+
16311649
LLVM_DEBUG({
16321650
llvm::dbgs() << "numOperandsOuterDimPerLoad = "
16331651
<< numOperandsOuterDimPerLoad << "\n";

0 commit comments

Comments
 (0)