@@ -31,8 +31,8 @@ template cuda_assign_binding(kernel_name: string, binding_name: untyped)=
3131 proc `binding_name` [T: SomeFloat ](
3232 blocksPerGrid, threadsPerBlock: cint ,
3333 rank, len: cint ,
34- dst_shape, dst_strides: ptr cint , dst_offset: cint , dst_data: ptr T,
35- src_shape, src_strides: ptr cint , src_offset: cint , src_data: ptr T
34+ dst_shape, dst_strides: ptr UncheckedArray [ cint ] , dst_offset: cint , dst_data: ptr T,
35+ src_shape, src_strides: ptr UncheckedArray [ cint ] , src_offset: cint , src_data: ptr T
3636 ) {.importcpp : import_string, noSideEffect .}
3737
3838
@@ -86,9 +86,9 @@ template cuda_assign_call*[T: SomeFloat](
8686 kernel_name [T](
8787 CUDA_HOF_TPB , CUDA_HOF_BPG ,
8888 src.rank, dst.len, # Note: small shortcut, in this case len and size are the same
89- dst.shape[] , dst.strides[] ,
89+ dst.shape.value , dst.strides.value ,
9090 dst.offset, dst.data,
91- src.shape[] , src.strides[] ,
91+ src.shape.value , src.strides.value ,
9292 src.offset, src.data
9393 )
9494
@@ -106,9 +106,9 @@ template cuda_binary_binding(kernel_name: string, binding_name: untyped)=
106106 proc `binding_name` [T: SomeFloat ](
107107 blocksPerGrid, threadsPerBlock: cint ,
108108 rank, len: cint ,
109- dst_shape, dst_strides: ptr cint , dst_offset: cint , dst_data: ptr T,
110- a_shape, a_strides: ptr cint , a_offset: cint , a_data: ptr T,
111- b_shape, b_strides: ptr cint , b_offset: cint , b_data: ptr T
109+ dst_shape, dst_strides: ptr UncheckedArray [ cint ] , dst_offset: cint , dst_data: ptr T,
110+ a_shape, a_strides: ptr UncheckedArray [ cint ] , a_offset: cint , a_data: ptr T,
111+ b_shape, b_strides: ptr UncheckedArray [ cint ] , b_offset: cint , b_data: ptr T
112112 ) {.importcpp : import_string, noSideEffect .}
113113
114114
@@ -170,11 +170,11 @@ template cuda_binary_call*[T: SomeFloat](
170170 kernel_name (
171171 CUDA_HOF_TPB , CUDA_HOF_BPG ,
172172 src_a.rank, dst.len, # Note: small shortcut, in this case len and size are the same
173- dst.shape[] , dst.strides[] ,
173+ dst.shape.value , dst.strides.value ,
174174 dst.offset, dst.data,
175- src_a.shape[] , src_a.strides[] ,
175+ src_a.shape.value , src_a.strides.value ,
176176 src_a.offset, src_a.data,
177- src_b.shape[] , src_b.strides[] ,
177+ src_b.shape.value , src_b.strides.value ,
178178 src_b.offset, src_b.data
179179 )
180180
@@ -193,8 +193,8 @@ template cuda_rscal_binding(kernel_name: string, binding_name: untyped)=
193193 proc `binding_name` [T: SomeFloat ](
194194 blocksPerGrid, threadsPerBlock: cint ,
195195 rank, len: cint ,
196- dst_shape, dst_strides: ptr cint , dst_offset: cint , dst_data: ptr T,
197- src_shape, src_strides: ptr cint , src_offset: cint , src_data: ptr T,
196+ dst_shape, dst_strides: ptr UncheckedArray [ cint ] , dst_offset: cint , dst_data: ptr T,
197+ src_shape, src_strides: ptr UncheckedArray [ cint ] , src_offset: cint , src_data: ptr T,
198198 beta: T
199199 ) {.importcpp : import_string, noSideEffect .}
200200
@@ -252,9 +252,9 @@ template cuda_rscal_call*[T: SomeFloat](
252252 kernel_name [T](
253253 CUDA_HOF_TPB , CUDA_HOF_BPG ,
254254 src.rank, dst.len, # Note: small shortcut, in this case len and size are the same
255- dst.shape[] , dst.strides[] ,
255+ dst.shape.value , dst.strides.value ,
256256 dst.offset, dst.data,
257- src.shape[] , src.strides[] ,
257+ src.shape.value , src.strides.value ,
258258 src.offset, src.data,
259259 beta
260260 )
@@ -274,9 +274,9 @@ template cuda_lscal_binding(kernel_name: string, binding_name: untyped)=
274274 proc `binding_name` [T: SomeFloat ](
275275 blocksPerGrid, threadsPerBlock: cint ,
276276 rank, len: cint ,
277- dst_shape, dst_strides: ptr cint , dst_offset: cint , dst_data: ptr T,
277+ dst_shape, dst_strides: ptr UncheckedArray [ cint ] , dst_offset: cint , dst_data: ptr T,
278278 alpha: T,
279- src_shape, src_strides: ptr cint , src_offset: cint , src_data: ptr T,
279+ src_shape, src_strides: ptr UncheckedArray [ cint ] , src_offset: cint , src_data: ptr T,
280280 ) {.importcpp : import_string, noSideEffect .}
281281
282282
@@ -332,10 +332,10 @@ template cuda_lscal_call*[T: SomeFloat](
332332 kernel_name [T](
333333 CUDA_HOF_TPB , CUDA_HOF_BPG ,
334334 src.rank, dst.len, # Note: small shortcut, in this case len and size are the same
335- dst.shape[] , dst.strides[] ,
335+ dst.shape.value , dst.strides.value ,
336336 dst.offset, dst.data,
337337 alpha,
338- src.shape[] , src.strides[] ,
338+ src.shape.value , src.strides.value ,
339339 src.offset, src.data
340340 )
341341
@@ -352,7 +352,7 @@ template cuda_assignscal_binding(kernel_name: string, binding_name: untyped)=
352352 proc `binding_name` [T: SomeFloat ](
353353 blocksPerGrid, threadsPerBlock: cint ,
354354 rank, len: cint ,
355- dst_shape, dst_strides: ptr cint , dst_offset: cint , dst_data: ptr T,
355+ dst_shape, dst_strides: ptr UncheckedArray [ cint ] , dst_offset: cint , dst_data: ptr T,
356356 scalar: T
357357 ) {.importcpp : import_string, noSideEffect .}
358358
@@ -402,7 +402,7 @@ template cuda_assignscal_call*[T: SomeFloat](
402402 kernel_name [T](
403403 CUDA_HOF_TPB , CUDA_HOF_BPG ,
404404 dst.rank, dst.len, # Note: small shortcut, in this case len and size are the same
405- dst.shape[] , dst.strides[] ,
405+ dst.shape.value , dst.strides.value ,
406406 dst.offset, dst.data,
407407 val
408408 )
0 commit comments