diff --git a/include/cute/arch/copy_sm100.hpp b/include/cute/arch/copy_sm100.hpp index f8a9a67b1d..40f363054d 100644 --- a/include/cute/arch/copy_sm100.hpp +++ b/include/cute/arch/copy_sm100.hpp @@ -377,7 +377,7 @@ struct SM100_UTCCP_128dp256bit_1cta #if defined(CUTE_ARCH_TCGEN05_TMEM_ENABLED) asm volatile ("tcgen05.cp.cta_group::1.128x256b [%0], %1;" : - : "r"(dst_addr) "l"(src_addr)); + : "r"(dst_addr), "l"(src_addr)); #else CUTE_INVALID_CONTROL_PATH("Trying to use UTCCP without CUTE_ARCH_TCGEN05_TMEM_ENABLED."); #endif @@ -396,7 +396,7 @@ struct SM100_UTCCP_128dp256bit_2cta #if defined(CUTE_ARCH_TCGEN05_TMEM_ENABLED) asm volatile ("tcgen05.cp.cta_group::2.128x256b [%0], %1;" : - : "r"(dst_addr) "l"(src_addr)); + : "r"(dst_addr), "l"(src_addr)); #else CUTE_INVALID_CONTROL_PATH("Trying to use UTCCP without CUTE_ARCH_TCGEN05_TMEM_ENABLED."); #endif @@ -414,7 +414,7 @@ struct SM100_UTCCP_128dp128bit_1cta #if defined(CUTE_ARCH_TCGEN05_TMEM_ENABLED) asm volatile ("tcgen05.cp.cta_group::1.128x128b [%0], %1;" : - : "r"(dst_addr) "l"(src_addr)); + : "r"(dst_addr), "l"(src_addr)); #else CUTE_INVALID_CONTROL_PATH("Trying to use UTCCP without CUTE_ARCH_TCGEN05_TMEM_ENABLED."); #endif @@ -432,7 +432,7 @@ struct SM100_UTCCP_128dp128bit_2cta #if defined(CUTE_ARCH_TCGEN05_TMEM_ENABLED) asm volatile ("tcgen05.cp.cta_group::2.128x128b [%0], %1;" : - : "r"(dst_addr) "l"(src_addr)); + : "r"(dst_addr), "l"(src_addr)); #else CUTE_INVALID_CONTROL_PATH("Trying to use UTCCP without CUTE_ARCH_TCGEN05_TMEM_ENABLED."); #endif @@ -452,7 +452,7 @@ struct SM100_UTCCP_4dp256bit_1cta #if defined(CUTE_ARCH_TCGEN05_TMEM_ENABLED) asm volatile ("tcgen05.cp.cta_group::1.4x256b [%0], %1;" : - : "r"(dst_addr) "l"(src_addr)); + : "r"(dst_addr), "l"(src_addr)); #else CUTE_INVALID_CONTROL_PATH("Trying to use UTCCP without CUTE_ARCH_TCGEN05_TMEM_ENABLED."); #endif @@ -471,7 +471,7 @@ struct SM100_UTCCP_4dp256bit_2cta #if defined(CUTE_ARCH_TCGEN05_TMEM_ENABLED) asm volatile ("tcgen05.cp.cta_group::2.4x256b [%0], %1;" : - : "r"(dst_addr) "l"(src_addr)); + : "r"(dst_addr), "l"(src_addr)); #else CUTE_INVALID_CONTROL_PATH("Trying to use UTCCP without CUTE_ARCH_TCGEN05_TMEM_ENABLED."); #endif @@ -490,7 +490,7 @@ struct SM100_UTCCP_4x32dp128bit_1cta #if defined(CUTE_ARCH_TCGEN05_TMEM_ENABLED) asm volatile ("tcgen05.cp.cta_group::1.32x128b.warpx4 [%0], %1;" : - : "r"(dst_addr) "l"(src_addr)); + : "r"(dst_addr), "l"(src_addr)); #else CUTE_INVALID_CONTROL_PATH("Trying to use UTCCP without CUTE_ARCH_TCGEN05_TMEM_ENABLED."); #endif @@ -509,7 +509,7 @@ struct SM100_UTCCP_4x32dp128bit_2cta #if defined(CUTE_ARCH_TCGEN05_TMEM_ENABLED) asm volatile ("tcgen05.cp.cta_group::2.32x128b.warpx4 [%0], %1;" : - : "r"(dst_addr) "l"(src_addr)); + : "r"(dst_addr), "l"(src_addr)); #else CUTE_INVALID_CONTROL_PATH("Trying to use UTCCP without CUTE_ARCH_TCGEN05_TMEM_ENABLED."); #endif @@ -528,7 +528,7 @@ struct SM100_UTCCP_2x64dp128bitlw0213_1cta #if defined(CUTE_ARCH_TCGEN05_TMEM_ENABLED) asm volatile ("tcgen05.cp.cta_group::1.64x128b.warpx2::02_13 [%0], %1;" : - : "r"(dst_addr) "l"(src_addr)); + : "r"(dst_addr), "l"(src_addr)); #else CUTE_INVALID_CONTROL_PATH("Trying to use UTCCP without CUTE_ARCH_TCGEN05_TMEM_ENABLED."); #endif @@ -547,7 +547,7 @@ struct SM100_UTCCP_2x64dp128bitlw0213_2cta #if defined(CUTE_ARCH_TCGEN05_TMEM_ENABLED) asm volatile ("tcgen05.cp.cta_group::2.64x128b.warpx2::02_13 [%0], %1;" : - : "r"(dst_addr) "l"(src_addr)); + : "r"(dst_addr), "l"(src_addr)); #else CUTE_INVALID_CONTROL_PATH("Trying to use UTCCP without CUTE_ARCH_TCGEN05_TMEM_ENABLED."); #endif @@ -568,7 +568,7 @@ struct SM100_UTCCP_2x64dp128bitlw0123_1cta #if defined(CUTE_ARCH_TCGEN05_TMEM_ENABLED) asm volatile ("tcgen05.cp.cta_group::1.64x128b.warpx2::01_23 [%0], %1;" : - : "r"(dst_addr) "l"(src_addr)); + : "r"(dst_addr), "l"(src_addr)); #else CUTE_INVALID_CONTROL_PATH("Trying to use UTCCP without CUTE_ARCH_TCGEN05_TMEM_ENABLED."); #endif @@ -589,7 +589,7 @@ struct SM100_UTCCP_2x64dp128bitlw0123_2cta #if defined(CUTE_ARCH_TCGEN05_TMEM_ENABLED) asm volatile ("tcgen05.cp.cta_group::2.64x128b.warpx2::01_23 [%0], %1;" : - : "r"(dst_addr) "l"(src_addr)); + : "r"(dst_addr), "l"(src_addr)); #else CUTE_INVALID_CONTROL_PATH("Trying to use UTCCP without CUTE_ARCH_TCGEN05_TMEM_ENABLED."); #endif