Skip to content

Commit 47055aa

Browse files
committed
Support tensor of pointer as the pointer parameter of the prefetching operation. Add a mask operand for boundary check.
1 parent 0bbb0a5 commit 47055aa

File tree

5 files changed

+434
-71
lines changed

5 files changed

+434
-71
lines changed

test/TritonIntelGPU/ops.mlir

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
// RUN: triton-opt %s | FileCheck %s
2+
3+
#dpas = #triton_intel_gpu.dpas<{repeatCount = 8, systolicDepth = 8, executionSize = 16, opsPerChan = 2, threadsPerWarp = 16, warpsPerCTA = [1, 4], repCluster = [1, 1], A = [8, 16], B = [16, 16], C = [8, 16]}>
4+
module attributes {"ttg.num-warps" = 4 : i32, "ttg.threads-per-warp" = 16 : i32, "triton_intel_gpu.support_sg_2d_block"} {
5+
tt.func public @prefetch_op(%tensor_of_ptr: tensor<64x32x!tt.ptr<f16>, #ttg.dot_op<{opIdx = 0, parent = #dpas, kWidth = 1}>>,
6+
%block_ptr: !tt.ptr<tensor<64x32xf16, #ttg.dot_op<{opIdx = 0, parent = #dpas, kWidth = 1}>>, 1>) {
7+
// COM: Mask of tensor type
8+
// CHECK: triton_intel_gpu.prefetch
9+
%mask_tensor = arith.constant dense<1> : tensor<64x32xi1, #ttg.dot_op<{opIdx = 0, parent = #dpas, kWidth = 1}>>
10+
triton_intel_gpu.prefetch %tensor_of_ptr, %mask_tensor {boundaryCheck = array<i32>, cache = 1 : i32, evict = 1 : i32, isVolatile = false, operandSegmentSizes = array<i32: 1, 1, 1>, triton_intel_gpu.block_io = "row_major"} : tensor<64x32x!tt.ptr<f16>, #ttg.dot_op<{opIdx = 0, parent = #dpas, kWidth = 1}>>, tensor<64x32xi1, #ttg.dot_op<{opIdx = 0, parent = #dpas, kWidth = 1}>>
11+
12+
// COM: Mask of scalar type
13+
// CHECK: triton_intel_gpu.prefetch
14+
%mask_scalar = arith.constant 1 : i1
15+
triton_intel_gpu.prefetch %tensor_of_ptr, %mask_scalar {boundaryCheck = array<i32>, cache = 1 : i32, evict = 1 : i32, isVolatile = false, operandSegmentSizes = array<i32: 1, 1, 1>, triton_intel_gpu.block_io = "row_major"} : tensor<64x32x!tt.ptr<f16>, #ttg.dot_op<{opIdx = 0, parent = #dpas, kWidth = 1}>>, i1
16+
17+
// COM: Block pointer includes the boundary information. No mask
18+
// CHECK: triton_intel_gpu.prefetch
19+
triton_intel_gpu.prefetch %block_ptr {boundaryCheck = array<i32>, cache = 1 : i32, evict = 1 : i32, isVolatile = false, operandSegmentSizes = array<i32: 1, 1, 1>, triton_intel_gpu.block_io = "row_major"} : !tt.ptr<tensor<64x32xf16, #ttg.dot_op<{opIdx = 0, parent = #dpas, kWidth = 1}>>, 1>
20+
tt.return
21+
}
22+
}

0 commit comments

Comments
 (0)