@@ -364,20 +364,31 @@ module attributes {"ttg.num-warps" = 8 : i32, "ttg.threads-per-warp" = 16 : i32}
364
364
%c1_i64 = arith.constant 1 : i64
365
365
%c0_i32 = arith.constant 0 : i32
366
366
%0 = tt.make_tensor_ptr %arg0 , [%c64_i64 , %c64_i64 ], [%c1_i64 , %col_stride ], [%c0_i32 , %c0_i32 ] {order = array<i32 : 0 , 1 >} : <tensor <64 x16 xf16 , #blocked >>
367
+ // CHECK: llvm.call spir_funccc @_Z12get_local_idj
367
368
// CHECK-NOT: llvm.icmp "slt"
368
- // CHECK-COUNT-32: llvm.store
369
+ // CHECK: %[[threadID:.*]] = llvm.call spir_funccc @_Z12get_local_idj
370
+ // CHECK: %[[VAL_583:.*]] = llvm.trunc %[[threadID]] : i64 to i32
371
+ // CHECK: %[[VAL_584:.*]] = llvm.mlir.constant(16 : i32) : i32
372
+ // CHECK: %[[VAL_586:.*]] = llvm.udiv %[[VAL_583]], %[[VAL_584]] : i32
373
+ // CHECK: %[[VAL_587:.*]] = llvm.mlir.constant(3 : i32) : i32
374
+ // CHECK: %[[VAL_588:.*]] = llvm.and %[[VAL_586]], %[[VAL_587]] : i32
375
+ // CHECK: %[[threadPred:.*]] = llvm.icmp "eq" %[[VAL_588]], {{.*}} : i32
376
+ // CHECK-COUNT-32: llvm.cond_br %[[threadPred]]
369
377
tt.store %0 , %cst : !tt.ptr <tensor <64 x16 xf16 , #blocked >>
370
378
371
379
// CHECK-COUNT-16: llvm.icmp "slt"
372
- // CHECK-COUNT-32: llvm.store
380
+ // CHECK: %[[threadPred_0:.*]] = llvm.icmp "eq"
381
+ // CHECK-COUNT-32: llvm.and %[[threadPred_0]], {{.*}} : i1
373
382
tt.store %0 , %cst {boundaryCheck = array<i32 : 0 >} : !tt.ptr <tensor <64 x16 xf16 , #blocked >>
374
383
375
384
// CHECK-COUNT-16: llvm.icmp "slt"
376
- // CHECK-COUNT-32: llvm.store
385
+ // CHECK: %[[threadPred_1:.*]] = llvm.icmp "eq"
386
+ // CHECK-COUNT-32: llvm.and %[[threadPred_1]], {{.*}} : i1
377
387
tt.store %0 , %cst {boundaryCheck = array<i32 : 1 >} : !tt.ptr <tensor <64 x16 xf16 , #blocked >>
378
388
379
389
// CHECK-COUNT-32: llvm.icmp "slt"
380
- // CHECK-COUNT-32: llvm.store
390
+ // CHECK: %[[threadPred_2:.*]] = llvm.icmp "eq"
391
+ // CHECK-COUNT-32: llvm.and %[[threadPred_2]], {{.*}} : i1
381
392
tt.store %0 , %cst {boundaryCheck = array<i32 : 0 , 1 >} : !tt.ptr <tensor <64 x16 xf16 , #blocked >>
382
393
383
394
tt.return
0 commit comments