-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Open
Labels
Description
Which component has the problem?
CUTLASS C++
Bug Report
Describe the bug
Got building errors when using ConvKernel
with StreamKScheduler
Steps/Code to reproduce bug
Modified examples/76_blackwell_conv/76_blackwell_conv_fprop.cu
by adding StreamKScheduler
as below.
using ConvKernel = cutlass::conv::kernel::ConvUniversal<
ProblemShape,
CollectiveMainloop,
CollectiveEpilogue,
cutlass::gemm::StreamKScheduler
>;
But got following compiling errors
Building CUDA object examples/76_blackwell_conv/CMakeFiles/76_blackwell_conv_fprop.dir/76_blackwell_conv_fprop.cu.o
/home/jizhan/cutlass/include/cute/algorithm/tuple_algorithms.hpp(494): error: no instance of overloaded function "cute::get" matches the argument list
argument types are: (const ProblemShape)
return cute::make_tuple(get<I>(t)...);
^
/home/jizhan/cutlass/include/cute/container/tuple.hpp(245): note #3327-D: candidate function template "cute::get<I,T...>(cute::tuple<T...> &&) noexcept" failed deduction
get(tuple<T...>&& t) noexcept
^
/home/jizhan/cutlass/include/cute/container/tuple.hpp(236): note #3327-D: candidate function template "cute::get<I,T...>(cute::tuple<T...> &) noexcept" failed deduction
get(tuple<T...>& t) noexcept
^
/home/jizhan/cutlass/include/cute/container/tuple.hpp(227): note #3327-D: candidate function template "cute::get<I,T...>(const cute::tuple<T...> &) noexcept" failed deduction
get(tuple<T...> const& t) noexcept
^
/home/jizhan/cutlass/include/cute/container/type_list.hpp(48): note #3327-D: candidate function template "cute::get<I,T...>(const cute::type_list<T...> &) noexcept" failed deduction
get(type_list<T...> const&) noexcept {
^
/home/jizhan/cutlass/include/cute/container/cuda_types.hpp(157): note #3326-D: function template "cute::get<I>(cute::uint3 &&)" does not match because argument #1 does not match parameter
uint32_t&& get(uint3&& a)
^
/home/jizhan/cutlass/include/cute/container/cuda_types.hpp(141): note #3326-D: function template "cute::get<I>(const cute::uint3 &)" does not match because argument #1 does not match parameter
uint32_t const& get(uint3 const& a)
^
/home/jizhan/cutlass/include/cute/container/cuda_types.hpp(125): note #3326-D: function template "cute::get<I>(cute::uint3 &)" does not match because argument #1 does not match parameter
uint32_t& get(uint3& a)
^
/home/jizhan/cutlass/include/cute/container/cuda_types.hpp(91): note #3326-D: function template "cute::get<I>(cute::dim3 &&)" does not match because argument #1 does not match parameter
uint32_t&& get(dim3&& a)
^
/home/jizhan/cutlass/include/cute/container/cuda_types.hpp(72): note #3326-D: function template "cute::get<I>(const cute::dim3 &)" does not match because argument #1 does not match parameter
uint32_t const& get(dim3 const& a)
^
/home/jizhan/cutlass/include/cute/container/cuda_types.hpp(53): note #3326-D: function template "cute::get<I>(cute::dim3 &)" does not match because argument #1 does not match parameter
uint32_t& get(dim3& a)
^
/home/jizhan/cutlass/include/cute/numeric/integer_sequence.hpp(171): note #3327-D: candidate function template "cute::get<I,T,Ints...>(std::integer_sequence<T, Ints...>)" failed deduction
get(integer_sequence<T, Ints...>) {
^
/home/jizhan/cutlass/include/cute/container/array.hpp(422): note #3327-D: candidate function template "cute::get<I,T,N>(cute::array<T, N> &&)" failed deduction
T&& get(array<T,N>&& a)
^
/home/jizhan/cutlass/include/cute/container/array.hpp(414): note #3327-D: candidate function template "cute::get<I,T,N>(const cute::array<T, N> &)" failed deduction
T const& get(array<T,N> const& a)
^
/home/jizhan/cutlass/include/cute/container/array.hpp(406): note #3327-D: candidate function template "cute::get<I,T,N>(cute::array<T, N> &)" failed deduction
T& get(array<T,N>& a)
^
detected during:
instantiation of "auto cute::select<I...,T>(const T &) [with I=<0, 1, 3>, T=ProblemShape]" at line 596 of /home/jizhan/cutlass/include/cutlass/gemm/kernel/sm100_tile_scheduler.hpp
instantiation of "dim3 cutlass::gemm::kernel::detail::PersistentTileSchedulerSm100<ClusterShape_, Stages_>::get_tiled_cta_shape_mnl(ProblemShapeMNKL, TileShape, AtomThrShape, ClusterShape) [with ClusterShape_=cute::tuple<cute::_1, cute::_1, cute::_1>, Stages_=1U, ProblemShapeMNKL=ProblemShape, TileShape=cute::tuple<cute::_128, cute::_128, cute::tuple<cute::_64>>, AtomThrShape=cute::tuple<cute::_1, cute::_1, cute::_1>, ClusterShape=ClusterShape]" at line 206 of /home/jizhan/cutlass/include/cutlass/gemm/kernel/sm100_tile_scheduler_stream_k.hpp
instantiation of "dim3 cutlass::gemm::kernel::detail::PersistentTileSchedulerSm100StreamK<TileShape, ClusterShape, Stages_>::get_tiled_cta_shape_mnl(ProblemShape, TileShapeMNK, AtomThrShape, ClusterShape) [with TileShape=cute::tuple<cute::_128, cute::_128, cute::tuple<cute::_64>>, ClusterShape=ClusterShape, Stages_=1U, ProblemShape=ProblemShape, TileShapeMNK=cute::tuple<cute::_128, cute::_128, cute::tuple<cute::_64>>, AtomThrShape=cute::tuple<cute::_1, cute::_1, cute::_1>]" at line 159 of /home/jizhan/cutlass/include/cutlass/gemm/kernel/sm100_tile_scheduler_stream_k.hpp
instantiation of "cutlass::gemm::kernel::detail::PersistentTileSchedulerSm100StreamK<TileShape, ClusterShape, Stages_>::Params cutlass::gemm::kernel::detail::PersistentTileSchedulerSm100StreamK<TileShape, ClusterShape, Stages_>::to_underlying_arguments(ProblemShape, TileShapeMNK, AtomThrShape, ClusterShape, const cutlass::KernelHardwareInfo &, const cutlass::gemm::kernel::detail::PersistentTileSchedulerSm100StreamK<TileShape, ClusterShape, Stages_>::Arguments &, void *, uint32_t) [with TileShape=cute::tuple<cute::_128, cute::_128, cute::tuple<cute::_64>>, ClusterShape=ClusterShape, Stages_=1U, ProblemShape=ProblemShape, TileShapeMNK=cute::tuple<cute::_128, cute::_128, cute::tuple<cute::_64>>, AtomThrShape=cute::tuple<cute::_1, cute::_1, cute::_1>]" at line 262 of /home/jizhan/cutlass/include/cutlass/conv/kernel/sm100_implicit_gemm_tma_warpspecialized.hpp
instantiation of "cutlass::conv::kernel::ConvUniversal<ProblemShape_, CollectiveMainloop_, CollectiveEpilogue_, TileSchedulerTag_, std::enable_if_t<std::is_base_of_v, void>>::Params cutlass::conv::kernel::ConvUniversal<ProblemShape_, CollectiveMainloop_, CollectiveEpilogue_, TileSchedulerTag_, std::enable_if_t<std::is_base_of_v, void>>::to_underlying_arguments(const cutlass::conv::kernel::ConvUniversal<ProblemShape_, CollectiveMainloop_, CollectiveEpilogue_, TileSchedulerTag_, std::enable_if_t<std::is_base_of_v, void>>::Arguments &, void *) [with ProblemShape_=ProblemShape, CollectiveMainloop_=CollectiveMainloop, CollectiveEpilogue_=CollectiveEpilogue, TileSchedulerTag_=cutlass::gemm::StreamKScheduler]" at line 246 of /home/jizhan/cutlass/include/cutlass/conv/device/conv_universal_adapter.hpp
instantiation of "cutlass::Status cutlass::conv::device::ConvUniversalAdapter<ConvKernel_>::initialize(const cutlass::conv::device::ConvUniversalAdapter<ConvKernel_>::Arguments &, void *, cudaStream_t, cutlass::CudaHostAdapter *) [with ConvKernel_=ConvKernel]" at line 446 of /home/jizhan/cutlass/examples/76_blackwell_conv/76_blackwell_conv_fprop.cu
instantiation of "int run<Gemm>(Options &) [with Gemm=Conv]" at line 538 of /home/jizhan/cutlass/examples/76_blackwell_conv/76_blackwell_conv_fprop.cu
Environment details (please complete the following information):
B200, CUTLASS 4.2.1, CUDA 12.8