Skip to content

Commit 86163a7

Browse files
committed
add missed kernels
1 parent 433324c commit 86163a7

File tree

1 file changed

+16
-0
lines changed
  • src/cuda/GPU_Microbenchmark/ubench/core/lat_gmma

1 file changed

+16
-0
lines changed

src/cuda/GPU_Microbenchmark/ubench/core/lat_gmma/lat_gmma.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,22 @@ void run_f32accumulator_tests() {
264264
run_f32tf32tf32tf32_64x128x8_test();
265265
run_f32tf32tf32tf32_64x192x8_test();
266266
run_f32tf32tf32tf32_64x256x8_test();
267+
run_f32f16f16_64x8x16_test();
268+
run_f32f16f16_64x16x16_test();
269+
run_f32f16f16_64x32x16_test();
270+
run_f32f16f16_64x64x16_test();
271+
run_f32f16f16_64x96x16_test();
272+
run_f32f16f16_64x128x16_test();
273+
run_f32f16f16_64x192x16_test();
274+
run_f32f16f16_64x256x16_test();
275+
run_f32bf16bf16_64x8x16_test();
276+
run_f32bf16bf16_64x16x16_test();
277+
run_f32bf16bf16_64x32x16_test();
278+
run_f32bf16bf16_64x64x16_test();
279+
run_f32bf16bf16_64x96x16_test();
280+
run_f32bf16bf16_64x128x16_test();
281+
run_f32bf16bf16_64x192x16_test();
282+
run_f32bf16bf16_64x256x16_test();
267283
run_f32e4m3e4m3e4m3_64x8x32_test();
268284
run_f32e4m3e4m3e4m3_64x16x32_test();
269285
run_f32e4m3e4m3e4m3_64x32x32_test();

0 commit comments

Comments
 (0)