File tree Expand file tree Collapse file tree 3 files changed +12
-9
lines changed Expand file tree Collapse file tree 3 files changed +12
-9
lines changed Original file line number Diff line number Diff line change @@ -239,8 +239,9 @@ Handle cases where the last tile might be smaller than `tile_size`.
239239### 2. ** Vectorized function pattern**
240240
241241``` mojo
242- @parameter
243- fn vectorized_add[width: Int](i: Int):
242+ fn vectorized_add[
243+ width: Int
244+ ](i: Int) unified {read tile_start, read a, read b, mut output}:
244245 global_idx = tile_start + i
245246 if global_idx + width <= size: # Bounds checking
246247 # SIMD operations here
@@ -251,7 +252,7 @@ The `width` parameter is automatically determined by the vectorize function.
251252### 3. ** Calling vectorize**
252253
253254``` mojo
254- vectorize[vectorized_add, simd_width](actual_tile_size)
255+ vectorize[simd_width](actual_tile_size, vectorized_add )
255256```
256257
257258This automatically handles the vectorization loop with the provided SIMD width.
@@ -337,8 +338,9 @@ actual_tile_size = tile_end - tile_start
337338** Automatic vectorization mechanism:**
338339
339340``` mojo
340- @parameter
341- fn vectorized_add[width: Int](i: Int):
341+ fn vectorized_add[
342+ width: Int
343+ ](i: Int) unified {read tile_start, read a, read b, mut output}:
342344 global_idx = tile_start + i
343345 if global_idx + width <= size:
344346 # Automatic SIMD optimization
Original file line number Diff line number Diff line change @@ -31,7 +31,7 @@ system-requirements = { macos = "15.0" }
3131[dependencies ]
3232python = " ==3.12"
3333mojo = " <1.0.0" # includes `mojo-compiler`, lsp, debugger, formatter etc.
34- max = " ==25.7 .0.dev2025111305 "
34+ max = " ==26.1 .0.dev2025112105 "
3535bash = " >=5.2.21,<6"
3636manim = " >=0.18.1,<0.19"
3737mdbook = " >=0.4.48,<0.5"
Original file line number Diff line number Diff line change @@ -166,8 +166,9 @@ fn vectorize_within_tiles_elementwise_add[
166166 tile_end = min (tile_start + tile_size, size)
167167 actual_tile_size = tile_end - tile_start
168168
169- @parameter
170- fn vectorized_add [width : Int](i : Int):
169+ fn vectorized_add [
170+ width : Int
171+ ](i : Int) unified {read tile_start , read a , read b , mut output }:
171172 global_idx = tile_start + i
172173 if global_idx + width <= size:
173174 a_vec = a.aligned_load[width](global_idx, 0 )
@@ -176,7 +177,7 @@ fn vectorize_within_tiles_elementwise_add[
176177 output.aligned_store[width](global_idx, 0 , result)
177178
178179 # Use vectorize within each tile
179- vectorize[vectorized_add, simd_width](actual_tile_size)
180+ vectorize[simd_width](actual_tile_size, vectorized_add )
180181
181182 num_tiles = (size + tile_size - 1 ) // tile_size
182183 elementwise[
You can’t perform that action at this time.
0 commit comments