@@ -16,73 +16,87 @@ import
1616 ../ laser/ dynamic_stack_arrays,
1717 ../ laser/ tensor/ datatypes,
1818 nimblas,
19- nimcuda/ cuda12_5/ [cuda_runtime_api, check],
2019 # Standard library
2120 std/ [complex]
2221
2322export nimblas.OrderType , complex
2423export datatypes, dynamic_stack_arrays
2524
26- type
27- CudaTensorRefTrackerObj * [T: SomeFloat ] = object
28- value* : ptr UncheckedArray [T]
29-
30- CudaTensorRefTracker * [T] = ref CudaTensorRefTrackerObj [T]
31-
32- CudaStorage * [T: SomeFloat ] = object
33- # # Opaque seq-like structure for storage on the Cuda backend.
34- # #
35- # # Nim garbage collector will automatically ask cuda to clear GPU memory if data becomes unused.
36- # #
37- # TODO : Forward declaring this and making this completely private prevent assignment in newCudaStorage from working
38- Flen * : int
39- Fdata * : ptr UncheckedArray [T]
40- Fref_tracking* : CudaTensorRefTracker [T] # We keep ref tracking for the GC in a separate field to avoid double indirection.
41-
42- CudaTensor * [T: SomeFloat ] = object
43- # # Tensor data structure stored on Nvidia GPU (Cuda)
44- # # - ``shape``: Dimensions of the CudaTensor
45- # # - ``strides``: Numbers of items to skip to get the next item along a dimension.
46- # # - ``offset``: Offset to get the first item of the CudaTensor. Note: offset can be negative, in particular for slices.
47- # # - ``storage``: An opaque data storage for the CudaTensor
48- # #
49- # # Warning ⚠:
50- # # Assignment ``var a = b`` does not copy the data. Data modification on one CudaTensor will be reflected on the other.
51- # # However modification on metadata (shape, strides or offset) will not affect the other tensor.
52- # # Explicit copies can be made with ``clone``: ``var a = b.clone``
53- shape* : Metadata
54- strides* : Metadata
55- offset* : int
56- storage* : CudaStorage [T]
57-
58- ClStorage * [T: SomeFloat ] = object
59- # # Opaque seq-like structure for storage on the OpenCL backend.
60- Flen * : int
61- Fdata * : ptr UncheckedArray [T]
62- Fref_tracking* : ref [ptr UncheckedArray [T]] # We keep ref tracking for the GC in a separate field to avoid double indirection.
63-
64- ClTensor * [T: SomeFloat ] = object
65- # # Tensor data structure stored on OpenCL (CPU, GPU, FPGAs or other accelerators)
66- # # - ``shape``: Dimensions of the CudaTensor
67- # # - ``strides``: Numbers of items to skip to get the next item along a dimension.
68- # # - ``offset``: Offset to get the first item of the CudaTensor. Note: offset can be negative, in particular for slices.
69- # # - ``storage``: An opaque data storage for the CudaTensor
70- # #
71- # # Warning ⚠:
72- # # Assignment ``var a = b`` does not copy the data. Data modification on one CudaTensor will be reflected on the other.
73- # # However modification on metadata (shape, strides or offset) will not affect the other tensor.
74- # # Explicit copies can be made with ``clone``: ``var a = b.clone``
75- shape* : Metadata
76- strides* : Metadata
77- offset* : int
78- storage* : ClStorage [T]
79-
80- AnyTensor * [T] = Tensor [T] or CudaTensor [T] or ClTensor [T]
81-
82-
83- proc deallocCuda * [T](p: CudaTensorRefTracker [T]) {.noSideEffect .}=
84- if not p.value.isNil:
85- check cudaFree (p.value)
25+ when defined (cuda):
26+ import nimcuda/ cuda12_5/ [cuda_runtime_api, check]
27+
28+ type
29+ CudaTensorRefTrackerObj * [T: SomeFloat ] = object
30+ value* : ptr UncheckedArray [T]
31+
32+ CudaTensorRefTracker * [T] = ref CudaTensorRefTrackerObj [T]
33+
34+ CudaStorage * [T: SomeFloat ] = object
35+ # # Opaque seq-like structure for storage on the Cuda backend.
36+ # #
37+ # # Nim garbage collector will automatically ask cuda to clear GPU memory if data becomes unused.
38+ # #
39+ # TODO : Forward declaring this and making this completely private prevent assignment in newCudaStorage from working
40+ Flen * : int
41+ Fdata * : ptr UncheckedArray [T]
42+ Fref_tracking* : CudaTensorRefTracker [T] # We keep ref tracking for the GC in a separate field to avoid double indirection.
43+
44+ CudaTensor * [T: SomeFloat ] = object
45+ # # Tensor data structure stored on Nvidia GPU (Cuda)
46+ # # - ``shape``: Dimensions of the CudaTensor
47+ # # - ``strides``: Numbers of items to skip to get the next item along a dimension.
48+ # # - ``offset``: Offset to get the first item of the CudaTensor. Note: offset can be negative, in particular for slices.
49+ # # - ``storage``: An opaque data storage for the CudaTensor
50+ # #
51+ # # Warning ⚠:
52+ # # Assignment ``var a = b`` does not copy the data. Data modification on one CudaTensor will be reflected on the other.
53+ # # However modification on metadata (shape, strides or offset) will not affect the other tensor.
54+ # # Explicit copies can be made with ``clone``: ``var a = b.clone``
55+ shape* : Metadata
56+ strides* : Metadata
57+ offset* : int
58+ storage* : CudaStorage [T]
59+
60+ proc deallocCuda * [T](p: CudaTensorRefTracker [T]) {.noSideEffect .}=
61+ if not p.value.isNil:
62+ check cudaFree (p.value)
63+
64+ when defined (opencl):
65+ type
66+ ClStorage * [T: SomeFloat ] = object
67+ # # Opaque seq-like structure for storage on the OpenCL backend.
68+ Flen * : int
69+ Fdata * : ptr UncheckedArray [T]
70+ Fref_tracking* : ref [ptr UncheckedArray [T]] # We keep ref tracking for the GC in a separate field to avoid double indirection.
71+
72+ ClTensor * [T: SomeFloat ] = object
73+ # # Tensor data structure stored on OpenCL (CPU, GPU, FPGAs or other accelerators)
74+ # # - ``shape``: Dimensions of the CudaTensor
75+ # # - ``strides``: Numbers of items to skip to get the next item along a dimension.
76+ # # - ``offset``: Offset to get the first item of the CudaTensor. Note: offset can be negative, in particular for slices.
77+ # # - ``storage``: An opaque data storage for the CudaTensor
78+ # #
79+ # # Warning ⚠:
80+ # # Assignment ``var a = b`` does not copy the data. Data modification on one CudaTensor will be reflected on the other.
81+ # # However modification on metadata (shape, strides or offset) will not affect the other tensor.
82+ # # Explicit copies can be made with ``clone``: ``var a = b.clone``
83+ shape* : Metadata
84+ strides* : Metadata
85+ offset* : int
86+ storage* : ClStorage [T]
87+
88+ when defined (cuda) and defined (opencl):
89+ type AnyTensor * [T] = Tensor [T] or CudaTensor [T] or ClTensor [T]
90+ elif defined (cuda):
91+ type AnyTensor * [T] = Tensor [T] or CudaTensor [T]
92+ elif defined (opencl):
93+ type AnyTensor * [T] = Tensor [T] or ClTensor [T]
94+ else :
95+ type AnyTensor * [T] = Tensor [T]
96+
97+ type GpuTensor [T] = AnyTensor [T] and not Tensor [T]
98+
99+
86100
87101
88102# ###############
@@ -102,10 +116,10 @@ proc `data=`*[T](t: var Tensor[T], s: seq[T]) {.deprecated: "Use copyFromRaw ins
102116# Tensor Metadata
103117# ################
104118
105- func rank * [T](t: CudaTensor [T] or ClTensor [T]): range [0 .. LASER_MAXRANK ] {.inline .} =
119+ func rank * [T](t: GpuTensor [T]): range [0 .. LASER_MAXRANK ] {.inline .} =
106120 t.shape.len
107121
108- func size * [T](t: CudaTensor [T] or ClTensor [T]): Natural {.inline .} =
122+ func size * [T](t: GpuTensor [T]): Natural {.inline .} =
109123 t.shape.product
110124
111125proc shape_to_strides * (shape: Metadata , layout: OrderType = rowMajor, result: var Metadata ) {.noSideEffect .} =
@@ -131,7 +145,7 @@ proc shape_to_strides*(shape: Metadata, layout: OrderType = rowMajor, result: va
131145 accum *= shape[i]
132146 return
133147
134- func is_C_contiguous * (t: CudaTensor or ClTensor ): bool =
148+ func is_C_contiguous * (t: GpuTensor ): bool =
135149 # # Check if the tensor follows C convention / is row major
136150 var cur_size = 1
137151 for i in countdown (t.rank - 1 ,0 ):
@@ -182,14 +196,14 @@ proc get_offset_ptr*[T: KnownSupportsCopyMem](t: Tensor[T]): ptr T {.noSideEffec
182196proc get_offset_ptr * [T: not KnownSupportsCopyMem ](t: AnyTensor [T]): ptr T {.error : " `get_offset_ptr`" &
183197 " cannot be safely used for GC'ed types!" .}
184198
185- proc get_data_ptr * [T](t: CudaTensor [T] or ClTensor [T]): ptr T {.noSideEffect , inline .}=
199+ proc get_data_ptr * [T](t: GpuTensor [T]): ptr T {.noSideEffect , inline .}=
186200 # # Input:
187201 # # - A tensor
188202 # # Returns:
189203 # # - A pointer to the real start of its data (no offset)
190204 cast [ptr T](t.storage.Fdata )
191205
192- proc get_offset_ptr * [T](t: CudaTensor [T] or ClTensor [T]): ptr T {.noSideEffect , inline .}=
206+ proc get_offset_ptr * [T](t: GpuTensor [T]): ptr T {.noSideEffect , inline .}=
193207 # # Input:
194208 # # - A tensor
195209 # # Returns:
0 commit comments