Skip to content

Commit 6ebc5b9

Browse files
committed
add lit test
1 parent b090692 commit 6ebc5b9

File tree

1 file changed

+18
-0
lines changed

1 file changed

+18
-0
lines changed

test/TritonIntelGPU/blockptr_load.mlir

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,24 @@ module attributes {"ttg.num-warps" = 8 : i32, "ttg.threads-per-warp" = 16 : i32}
273273

274274
// -----
275275

276+
// COM: 2D block load reduced to be <= block size
277+
// CHECK-DAG: llvm.func spir_funccc @_Z51intel_sub_group_2d_block_read_transform_8b_32r16x2cPU3AS1viiiDv2_iPj(!llvm.ptr<1> {llvm.nonnull, llvm.readonly}, i32, i32, i32, vector<2xi32>, !llvm.ptr {llvm.nonnull, llvm.writeonly}) attributes {no_unwind, will_return}
278+
#dpas = #triton_intel_gpu.dpas<{repeatCount = 8, systolicDepth = 8, executionSize = 16, opsPerChan = 4, threadsPerWarp = 16, warpsPerCTA = [1, 4], repCluster = [1, 2], A = [8, 32], B = [32, 32], C = [8, 32]}>
279+
#dot1 = #ttg.dot_op<{opIdx = 1, parent = #dpas, kWidth=4}>
280+
module attributes {"ttg.num-warps" = 8 : i32, "ttg.threads-per-warp" = 16 : i32} {
281+
tt.func public @matmul_no_scf_with_advance_kernel(%arg0: !tt.ptr<i8>, %arg1: !tt.ptr<i8>, %arg2: i64, %arg3: i64, %arg4: i64, %arg5: i64, %arg7: i64) {
282+
%c0_i32 = arith.constant 0 : i32
283+
%c1_i64 = arith.constant 1 : i64
284+
%ptrB = tt.make_tensor_ptr %arg1, [%arg4, %arg3], [%arg7, %c1_i64], [%c0_i32, %c0_i32] {order = array<i32: 1, 0>} : <tensor<64x16xi8, #dot1>>
285+
// CHECK-COUNT-1: llvm.call spir_funccc @_Z51intel_sub_group_2d_block_read_transform_8b_32r16x2cPU3AS1viiiDv2_iPj({{.*}}) {{.*}} : (!llvm.ptr<1>{{.*}}, i32, i32, i32, vector<2xi32>, !llvm.ptr{{.*}}) -> ()
286+
// CHECK-NOT: llvm.call spir_funccc @_Z51intel_sub_group_2d_block_read_transform_8b_32r16x2cPU3AS1viiiDv2_iPj({{.*}}) {{.*}} : (!llvm.ptr<1>{{.*}}, i32, i32, i32, vector<2xi32>, !llvm.ptr{{.*}}) -> ()
287+
%B = tt.load %ptrB {boundaryCheck = array<i32: 0>, padding = 1 : i32, triton_intel_gpu.block_io = "row_major"} : !tt.ptr<tensor<64x16xi8, #dot1>>
288+
tt.return
289+
}
290+
}
291+
292+
// -----
293+
276294
#dpas = #triton_intel_gpu.dpas<{repeatCount = 8, systolicDepth = 8, executionSize = 16, opsPerChan = 2, threadsPerWarp = 16, warpsPerCTA = [1, 1], repCluster = [1, 2], A = [8, 16], B = [16, 32], C = [8, 32]}>
277295
#dot_b = #ttg.dot_op<{opIdx = 1, parent = #dpas, kWidth = 2}>
278296
module attributes {"ttg.num-warps" = 1 : i32, "ttg.threads-per-warp" = 16 : i32} {

0 commit comments

Comments
 (0)