Skip to content

Commit 24ea0fa

Browse files
committed
Fix failing CI test
Signed-off-by: Tiotto, Ettore <[email protected]>
1 parent ad37157 commit 24ea0fa

File tree

2 files changed

+30
-4
lines changed

2 files changed

+30
-4
lines changed

test/TritonIntelGPU/prefetch-to-llvm.mlir

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,3 +156,16 @@ module attributes {"ttg.num-warps" = 4 : i32, "ttg.threads-per-warp" = 16 : i32}
156156
tt.return
157157
}
158158
}
159+
160+
// -----
161+
162+
// COM: Currently the prefetch operation in this test cannot be lowered correctly, so we check that the test compiles cleanly and not 2D block prefetch operation gets generated.
163+
#mma = #triton_intel_gpu.dpas<{repeatCount = 8, systolicDepth = 8, executionSize = 16, opsPerChan = 1, threadsPerWarp = 16, warpsPerCTA = [4, 1], repCluster = [4, 1], A = [32, 8], B = [8, 16], C = [32, 16]}>
164+
module attributes {triton_intel_gpu.min_sg_size = 16 : i32, triton_intel_gpu.support_sg_2d_block, triton_intel_gpu.target_arch = "spir64", "ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32, ttg.target = "xpu", "ttg.threads-per-warp" = 16 : i32} {
165+
// CHECK-LABEL: llvm.func spir_kernelcc @kernel
166+
tt.func public @kernel(%arg0 : tensor<128x32x!tt.ptr<f32>, #ttg.dot_op<{opIdx = 0, parent = #mma, kWidth = 1}>>) {
167+
// CHECK-NOT: intel_sub_group_2d_block_prefetch
168+
triton_intel_gpu.prefetch %arg0 {boundaryCheck = array<i32>, cache = 1 : i32, evict = 1 : i32, isVolatile = false, operandSegmentSizes = array<i32: 1, 0, 0>, triton_intel_gpu.block_io = "row_major"} : tensor<128x32x!tt.ptr<f32>, #ttg.dot_op<{opIdx = 0, parent = #mma, kWidth = 1}>>
169+
tt.return
170+
}
171+
}

third_party/intel/lib/TritonIntelGPUToLLVM/LoadStoreOpToLLVM.cpp

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -329,10 +329,18 @@ struct PrefetchOpConversion
329329
LogicalResult
330330
matchAndRewrite(triton::gpu::intel::PrefetchOp op, OpAdaptor adaptor,
331331
ConversionPatternRewriter &rewriter) const final {
332-
Value ptr = op.getPtr();
333-
if (isTensorPointerType(ptr.getType()))
334-
return rewriteTensorPointerPrefetch(op, adaptor, rewriter);
335-
return rewriteRegularPointerPrefetch(op, adaptor, rewriter);
332+
LogicalResult res =
333+
isTensorPointerType(op.getPtr().getType())
334+
? rewriteTensorPointerPrefetch(op, adaptor, rewriter)
335+
: rewriteRegularPointerPrefetch(op, adaptor, rewriter);
336+
337+
// FIXME: the prefetch lowering code should never fail. Currently it does in
338+
// some cases. We should address those cases instead of removing the
339+
// prefetch operation.
340+
if (failed(res))
341+
rewriter.eraseOp(op);
342+
343+
return success();
336344
}
337345

338346
LogicalResult
@@ -645,6 +653,11 @@ struct PrefetchOpConversion
645653
masks[offset] = maskElems[i];
646654
}
647655

656+
// baseAddrs[{0, 0}] and baseAddrs[{1, 0}] are currently used to calculate
657+
// the pitch.
658+
if (baseAddrs.count({0, 0}) == 0 || baseAddrs.count({1, 0}) == 0)
659+
return failure();
660+
648661
Value base, baseWidth, baseHeight, rowStrideInBytes, colStride, offsetBaseX,
649662
offsetBaseY;
650663

0 commit comments

Comments
 (0)