From fda759355444e7ec8f72bbeecdc0afcdba3b7353 Mon Sep 17 00:00:00 2001 From: Samuel Fischer Date: Fri, 24 Oct 2025 18:21:07 +0200 Subject: [PATCH 1/7] Test-wise benchmark of position velocity example --- benchmark/query_benchmark.mojo | 48 ++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/benchmark/query_benchmark.mojo b/benchmark/query_benchmark.mojo index 1209ca4e..573dc63b 100644 --- a/benchmark/query_benchmark.mojo +++ b/benchmark/query_benchmark.mojo @@ -35,6 +35,48 @@ fn benchmark_query_1_comp_1_000_000( bencher.iter[bench_fn]() +fn benchmark_vel_pos_add_1_000_000( + mut bencher: Bencher, +) raises capturing: + pos = Position(1.0, 2.0) + vel = Velocity(0.1, 0.2) + + @always_inline + @parameter + fn bench_fn() capturing raises: + world = SmallWorld() + for _ in range(1000): + _ = world.add_entity(pos, vel) + for _ in range(1000): + for entity in world.query[Position](): + ref pos = entity.get[Position]() + ref vel = entity.get[Velocity]() + pos.x += vel.dx + pos.y += vel.dy + + bencher.iter[bench_fn]() + +fn benchmark_vel_pos_add_aos_1_000_000( + mut bencher: Bencher, +) raises capturing: + pos = Position(1.0, 2.0) + vel = Velocity(0.1, 0.2) + + @always_inline + @parameter + fn bench_fn() capturing raises: + l1 = List[Position](length=1000, fill=pos) + l2 = List[Velocity](length=1000, fill=vel) + for _ in range(1000): + for i in range(len(l1)): + ref pos = l1[i] + ref vel = l2[i] + pos.x += vel.dx + pos.y += vel.dy + + bencher.iter[bench_fn]() + + fn benchmark_query_2_comp_1_000_000( mut bencher: Bencher, ) raises capturing: @@ -135,6 +177,12 @@ fn run_all_query_benchmarks() raises: fn run_all_query_benchmarks(mut bench: Bench) raises: + bench.bench_function[benchmark_vel_pos_add_1_000_000]( + BenchId("10^3 * 10^3 * pos vel add") + ) + bench.bench_function[benchmark_vel_pos_add_aos_1_000_000]( + BenchId("10^3 * 10^3 * pos vel add aos") + ) bench.bench_function[benchmark_query_has_1_000_000]( BenchId("10^6 * query has") ) From 17133258d68252021a1ace04cc59495d854e6f54 Mon Sep 17 00:00:00 2001 From: Samuel Fischer Date: Fri, 24 Oct 2025 18:23:06 +0200 Subject: [PATCH 2/7] Apply formatter --- benchmark/query_benchmark.mojo | 1 + 1 file changed, 1 insertion(+) diff --git a/benchmark/query_benchmark.mojo b/benchmark/query_benchmark.mojo index 573dc63b..be0d401a 100644 --- a/benchmark/query_benchmark.mojo +++ b/benchmark/query_benchmark.mojo @@ -56,6 +56,7 @@ fn benchmark_vel_pos_add_1_000_000( bencher.iter[bench_fn]() + fn benchmark_vel_pos_add_aos_1_000_000( mut bencher: Bencher, ) raises capturing: From 4b3ec882cc28bd6332d60379171176e6424cc596 Mon Sep 17 00:00:00 2001 From: Samuel Fischer Date: Sat, 25 Oct 2025 00:22:23 +0200 Subject: [PATCH 3/7] Add further optimized benchmarks --- benchmark/query_benchmark.mojo | 190 ++++++++++++++++++++++++++++++--- 1 file changed, 174 insertions(+), 16 deletions(-) diff --git a/benchmark/query_benchmark.mojo b/benchmark/query_benchmark.mojo index be0d401a..66d5b11e 100644 --- a/benchmark/query_benchmark.mojo +++ b/benchmark/query_benchmark.mojo @@ -4,7 +4,9 @@ from larecs.world import World from larecs.entity import Entity from larecs.component import ComponentType from larecs.test_utils import * - +from larecs import MutableEntityAccessor +from sys.info import simdwidthof +from algorithm import vectorize fn benchmark_add_entity_1_000_000(mut bencher: Bencher) raises capturing: @always_inline @@ -77,6 +79,156 @@ fn benchmark_vel_pos_add_aos_1_000_000( bencher.iter[bench_fn]() +# fn benchmark_vel_pos_add_aos_vec_1_000_000( +# mut bencher: Bencher, +# ) raises capturing: +# pos2 = Position(1.0, 2.0) +# vel2 = Velocity(0.1, 0.2) +# alias stride = 2 + + +# alias simd_width = simdwidthof[Float64]() + +# @always_inline +# @parameter +# fn bench_fn() capturing raises: +# l1 = List[Position](length=1000, fill=pos2) +# l2 = List[Velocity](length=1000, fill=vel2) + +# @parameter +# fn move[simd_width: Int](i: Int): +# try: +# pos = Pointer(to=l1[i]) +# vel = Pointer(to=l2[i]) +# except: +# return + +# pos_x_ptr = UnsafePointer(to=pos[].x) +# pos_y_ptr = UnsafePointer(to=pos[].y) +# vel_x_ptr = UnsafePointer(to=vel[].dx) +# vel_y_ptr = UnsafePointer(to=vel[].dy) + +# pos_x = pos_x_ptr.strided_load[width=simd_width](stride) +# pos_y = pos_y_ptr.strided_load[width=simd_width](stride) +# vel_x = vel_x_ptr.strided_load[width=simd_width](stride) +# vel_y = vel_y_ptr.strided_load[width=simd_width](stride) + +# pos_x += vel_x +# pos_y += vel_y +# pos_x_ptr.strided_store[width=simd_width](pos_x, stride) +# pos_y_ptr.strided_store[width=simd_width](pos_y, stride) + +# for _ in range(1000): +# vectorize[move, simd_width](len(l1)) + +# bencher.iter[bench_fn]() + +fn benchmark_vel_pos_add_aos_vec_1_000_000( + mut bencher: Bencher, +) raises capturing: + pos2 = Position(1.0, 2.0) + vel2 = Velocity(0.1, 0.2) + alias stride = 2 + + alias simd_width = simdwidthof[Float64]() + + @always_inline + @parameter + fn bench_fn() capturing raises: + l1 = List[Position](length=1000, fill=pos2) + l2 = List[Velocity](length=1000, fill=vel2) + + @parameter + fn move[simd_width: Int](i: Int): + # var pos_ptr = l1.unsafe_ptr().offset(i).bitcast[Float64]() + var pos_ptr = UnsafePointer(to=l1[i]).bitcast[Float64]() + var pos = pos_ptr.load[width=simd_width]() + var vel = l2.unsafe_ptr().offset(i).bitcast[Float64]().load[width=simd_width]() + + pos_ptr.store(pos + vel) + + for _ in range(1000): + vectorize[move, simd_width//2](len(l1)) + + bencher.iter[bench_fn]() + +@fieldwise_init +struct PosX(Copyable, Movable): + var x: Float64 + +@fieldwise_init +struct PosY(Copyable, Movable): + var y: Float64 + +@fieldwise_init +struct VelX(Copyable, Movable): + var dx: Float64 + +@fieldwise_init +struct VelY(Copyable, Movable): + var dy: Float64 + + +fn benchmark_vel_pos_add_vec_1_000_000( + mut bencher: Bencher, +) raises capturing: + + @parameter + fn move[simd_width: Int](entity: MutableEntityAccessor): + try: + var pos_ptr = UnsafePointer(to=entity.get[Position]()).bitcast[Float64]() + var vel = UnsafePointer(to=entity.get[Velocity]()).bitcast[Float64]().load[width=simd_width]() + var pos = pos_ptr.load[width=simd_width]() + pos_ptr.store(pos + vel) + except: + return + + alias simd_width = simdwidthof[Float64]() + + @always_inline + @parameter + fn bench_fn() capturing raises: + world = SmallWorld() + _ = world.add_entities(Position(1.0, 2.0), Velocity(0.1, 0.2), count=1000) + for _ in range(1000): + world.apply[move, simd_width=simd_width](world.query[Position, Velocity]()) + + bencher.iter[bench_fn]() + +# fn benchmark_vel_pos_add_vec_1_000_000( +# mut bencher: Bencher, +# ) raises capturing: + +# @parameter +# fn move[simd_width: Int](entity: MutableEntityAccessor): +# try: +# var posX_ptr = UnsafePointer(to=entity.get[PosX]()).bitcast[Float64]() +# var velX_ptr = UnsafePointer(to=entity.get[VelX]()).bitcast[Float64]() +# var posX = posX_ptr.load[width=simd_width]() +# var velX = velX_ptr.load[width=simd_width]() +# posX_ptr.store(posX + velX) + +# var posY_ptr = UnsafePointer(to=entity.get[PosY]()).bitcast[Float64]() +# var velY_ptr = UnsafePointer(to=entity.get[VelY]()).bitcast[Float64]() +# var posY = posY_ptr.load[width=simd_width]() +# var velY = velY_ptr.load[width=simd_width]() +# posY_ptr.store(posY + velY) + +# except: +# return + +# alias simd_width = simdwidthof[Float64]() + +# @always_inline +# @parameter +# fn bench_fn() capturing raises: +# world = World[PosX, VelX, PosY, VelY]() +# _ = world.add_entities(PosX(1.0), VelX(0.1), PosY(2.0), VelY(0.2), count=1000) +# for _ in range(1000): +# world.apply[move, simd_width=simd_width](world.query[PosX, VelX, PosY, VelY]()) + +# bencher.iter[bench_fn]() + fn benchmark_query_2_comp_1_000_000( mut bencher: Bencher, @@ -178,27 +330,33 @@ fn run_all_query_benchmarks() raises: fn run_all_query_benchmarks(mut bench: Bench) raises: + bench.bench_function[benchmark_vel_pos_add_aos_vec_1_000_000]( + BenchId("10^3 * 10^3 * pos vel add aos vec") + ) + bench.bench_function[benchmark_vel_pos_add_vec_1_000_000]( + BenchId("10^3 * 10^3 * pos vel add vec") + ) bench.bench_function[benchmark_vel_pos_add_1_000_000]( BenchId("10^3 * 10^3 * pos vel add") ) bench.bench_function[benchmark_vel_pos_add_aos_1_000_000]( BenchId("10^3 * 10^3 * pos vel add aos") ) - bench.bench_function[benchmark_query_has_1_000_000]( - BenchId("10^6 * query has") - ) - bench.bench_function[benchmark_query_1_comp_1_000_000]( - BenchId("10^6 * query & get 1 comp") - ) - bench.bench_function[benchmark_query_2_comp_1_000_000]( - BenchId("10^6 * query & get 2 comp") - ) - bench.bench_function[benchmark_query_5_comp_1_000_000]( - BenchId("10^6 * query & get 5 comp") - ) - bench.bench_function[benchmark_query_get_iter_1_000_000]( - BenchId("10^6 * get query iter") - ) + # bench.bench_function[benchmark_query_has_1_000_000]( + # BenchId("10^6 * query has") + # ) + # bench.bench_function[benchmark_query_1_comp_1_000_000]( + # BenchId("10^6 * query & get 1 comp") + # ) + # bench.bench_function[benchmark_query_2_comp_1_000_000]( + # BenchId("10^6 * query & get 2 comp") + # ) + # bench.bench_function[benchmark_query_5_comp_1_000_000]( + # BenchId("10^6 * query & get 5 comp") + # ) + # bench.bench_function[benchmark_query_get_iter_1_000_000]( + # BenchId("10^6 * get query iter") + # ) def main(): From 4a2786c75b7ccdcd2dcaa62d86de50996a526a47 Mon Sep 17 00:00:00 2001 From: Samuel Fischer Date: Sat, 25 Oct 2025 00:24:21 +0200 Subject: [PATCH 4/7] Apply formatter --- benchmark/query_benchmark.mojo | 144 ++++++++++++++++++++------------- 1 file changed, 90 insertions(+), 54 deletions(-) diff --git a/benchmark/query_benchmark.mojo b/benchmark/query_benchmark.mojo index 66d5b11e..c58e2578 100644 --- a/benchmark/query_benchmark.mojo +++ b/benchmark/query_benchmark.mojo @@ -8,6 +8,7 @@ from larecs import MutableEntityAccessor from sys.info import simdwidthof from algorithm import vectorize + fn benchmark_add_entity_1_000_000(mut bencher: Bencher) raises capturing: @always_inline @parameter @@ -79,6 +80,7 @@ fn benchmark_vel_pos_add_aos_1_000_000( bencher.iter[bench_fn]() + # fn benchmark_vel_pos_add_aos_vec_1_000_000( # mut bencher: Bencher, # ) raises capturing: @@ -117,12 +119,13 @@ fn benchmark_vel_pos_add_aos_1_000_000( # pos_y += vel_y # pos_x_ptr.strided_store[width=simd_width](pos_x, stride) # pos_y_ptr.strided_store[width=simd_width](pos_y, stride) - + # for _ in range(1000): # vectorize[move, simd_width](len(l1)) # bencher.iter[bench_fn]() + fn benchmark_vel_pos_add_aos_vec_1_000_000( mut bencher: Bencher, ) raises capturing: @@ -143,41 +146,55 @@ fn benchmark_vel_pos_add_aos_vec_1_000_000( # var pos_ptr = l1.unsafe_ptr().offset(i).bitcast[Float64]() var pos_ptr = UnsafePointer(to=l1[i]).bitcast[Float64]() var pos = pos_ptr.load[width=simd_width]() - var vel = l2.unsafe_ptr().offset(i).bitcast[Float64]().load[width=simd_width]() + var vel = ( + l2.unsafe_ptr() + .offset(i) + .bitcast[Float64]() + .load[width=simd_width]() + ) pos_ptr.store(pos + vel) - + for _ in range(1000): - vectorize[move, simd_width//2](len(l1)) + vectorize[move, simd_width // 2](len(l1)) bencher.iter[bench_fn]() + @fieldwise_init struct PosX(Copyable, Movable): var x: Float64 + @fieldwise_init struct PosY(Copyable, Movable): var y: Float64 + @fieldwise_init struct VelX(Copyable, Movable): var dx: Float64 + @fieldwise_init struct VelY(Copyable, Movable): var dy: Float64 -fn benchmark_vel_pos_add_vec_1_000_000( +fn benchmark_vel_pos_add_vec_optimized_1_000_000( mut bencher: Bencher, ) raises capturing: - @parameter fn move[simd_width: Int](entity: MutableEntityAccessor): try: - var pos_ptr = UnsafePointer(to=entity.get[Position]()).bitcast[Float64]() - var vel = UnsafePointer(to=entity.get[Velocity]()).bitcast[Float64]().load[width=simd_width]() + var pos_ptr = UnsafePointer(to=entity.get[Position]()).bitcast[ + Float64 + ]() + var vel = ( + UnsafePointer(to=entity.get[Velocity]()) + .bitcast[Float64]() + .load[width=simd_width]() + ) var pos = pos_ptr.load[width=simd_width]() pos_ptr.store(pos + vel) except: @@ -189,45 +206,61 @@ fn benchmark_vel_pos_add_vec_1_000_000( @parameter fn bench_fn() capturing raises: world = SmallWorld() - _ = world.add_entities(Position(1.0, 2.0), Velocity(0.1, 0.2), count=1000) + _ = world.add_entities( + Position(1.0, 2.0), Velocity(0.1, 0.2), count=1000 + ) for _ in range(1000): - world.apply[move, simd_width=simd_width](world.query[Position, Velocity]()) + world.apply[move, simd_width=simd_width]( + world.query[Position, Velocity]() + ) bencher.iter[bench_fn]() -# fn benchmark_vel_pos_add_vec_1_000_000( -# mut bencher: Bencher, -# ) raises capturing: -# @parameter -# fn move[simd_width: Int](entity: MutableEntityAccessor): -# try: -# var posX_ptr = UnsafePointer(to=entity.get[PosX]()).bitcast[Float64]() -# var velX_ptr = UnsafePointer(to=entity.get[VelX]()).bitcast[Float64]() -# var posX = posX_ptr.load[width=simd_width]() -# var velX = velX_ptr.load[width=simd_width]() -# posX_ptr.store(posX + velX) - -# var posY_ptr = UnsafePointer(to=entity.get[PosY]()).bitcast[Float64]() -# var velY_ptr = UnsafePointer(to=entity.get[VelY]()).bitcast[Float64]() -# var posY = posY_ptr.load[width=simd_width]() -# var velY = velY_ptr.load[width=simd_width]() -# posY_ptr.store(posY + velY) - -# except: -# return +fn benchmark_vel_pos_add_vec_1_000_000( + mut bencher: Bencher, +) raises capturing: + @parameter + fn move[simd_width: Int](entity: MutableEntityAccessor): + try: + var posX_ptr = UnsafePointer(to=entity.get[PosX]()).bitcast[ + Float64 + ]() + var velX_ptr = UnsafePointer(to=entity.get[VelX]()).bitcast[ + Float64 + ]() + var posX = posX_ptr.load[width=simd_width]() + var velX = velX_ptr.load[width=simd_width]() + posX_ptr.store(posX + velX) + + var posY_ptr = UnsafePointer(to=entity.get[PosY]()).bitcast[ + Float64 + ]() + var velY_ptr = UnsafePointer(to=entity.get[VelY]()).bitcast[ + Float64 + ]() + var posY = posY_ptr.load[width=simd_width]() + var velY = velY_ptr.load[width=simd_width]() + posY_ptr.store(posY + velY) -# alias simd_width = simdwidthof[Float64]() + except: + return -# @always_inline -# @parameter -# fn bench_fn() capturing raises: -# world = World[PosX, VelX, PosY, VelY]() -# _ = world.add_entities(PosX(1.0), VelX(0.1), PosY(2.0), VelY(0.2), count=1000) -# for _ in range(1000): -# world.apply[move, simd_width=simd_width](world.query[PosX, VelX, PosY, VelY]()) + alias simd_width = simdwidthof[Float64]() -# bencher.iter[bench_fn]() + @always_inline + @parameter + fn bench_fn() capturing raises: + world = World[PosX, VelX, PosY, VelY]() + _ = world.add_entities( + PosX(1.0), VelX(0.1), PosY(2.0), VelY(0.2), count=1000 + ) + for _ in range(1000): + world.apply[move, simd_width=simd_width]( + world.query[PosX, VelX, PosY, VelY]() + ) + + bencher.iter[bench_fn]() fn benchmark_query_2_comp_1_000_000( @@ -333,6 +366,9 @@ fn run_all_query_benchmarks(mut bench: Bench) raises: bench.bench_function[benchmark_vel_pos_add_aos_vec_1_000_000]( BenchId("10^3 * 10^3 * pos vel add aos vec") ) + bench.bench_function[benchmark_vel_pos_add_vec_optimized_1_000_000]( + BenchId("10^3 * 10^3 * pos vel add aos vec optimized") + ) bench.bench_function[benchmark_vel_pos_add_vec_1_000_000]( BenchId("10^3 * 10^3 * pos vel add vec") ) @@ -342,21 +378,21 @@ fn run_all_query_benchmarks(mut bench: Bench) raises: bench.bench_function[benchmark_vel_pos_add_aos_1_000_000]( BenchId("10^3 * 10^3 * pos vel add aos") ) - # bench.bench_function[benchmark_query_has_1_000_000]( - # BenchId("10^6 * query has") - # ) - # bench.bench_function[benchmark_query_1_comp_1_000_000]( - # BenchId("10^6 * query & get 1 comp") - # ) - # bench.bench_function[benchmark_query_2_comp_1_000_000]( - # BenchId("10^6 * query & get 2 comp") - # ) - # bench.bench_function[benchmark_query_5_comp_1_000_000]( - # BenchId("10^6 * query & get 5 comp") - # ) - # bench.bench_function[benchmark_query_get_iter_1_000_000]( - # BenchId("10^6 * get query iter") - # ) + bench.bench_function[benchmark_query_has_1_000_000]( + BenchId("10^6 * query has") + ) + bench.bench_function[benchmark_query_1_comp_1_000_000]( + BenchId("10^6 * query & get 1 comp") + ) + bench.bench_function[benchmark_query_2_comp_1_000_000]( + BenchId("10^6 * query & get 2 comp") + ) + bench.bench_function[benchmark_query_5_comp_1_000_000]( + BenchId("10^6 * query & get 5 comp") + ) + bench.bench_function[benchmark_query_get_iter_1_000_000]( + BenchId("10^6 * get query iter") + ) def main(): From 881dfb5966f2f48a3bf75e09652bdbcd47e5c632 Mon Sep 17 00:00:00 2001 From: Samuel Fischer Date: Sat, 25 Oct 2025 12:45:49 +0200 Subject: [PATCH 5/7] Fix wrong simd width, clean up, reorder --- benchmark/query_benchmark.mojo | 54 +++++++++++++++------------------- 1 file changed, 24 insertions(+), 30 deletions(-) diff --git a/benchmark/query_benchmark.mojo b/benchmark/query_benchmark.mojo index c58e2578..e5c04f1f 100644 --- a/benchmark/query_benchmark.mojo +++ b/benchmark/query_benchmark.mojo @@ -210,7 +210,7 @@ fn benchmark_vel_pos_add_vec_optimized_1_000_000( Position(1.0, 2.0), Velocity(0.1, 0.2), count=1000 ) for _ in range(1000): - world.apply[move, simd_width=simd_width]( + world.apply[move, simd_width = simd_width // 2]( world.query[Position, Velocity]() ) @@ -223,24 +223,18 @@ fn benchmark_vel_pos_add_vec_1_000_000( @parameter fn move[simd_width: Int](entity: MutableEntityAccessor): try: - var posX_ptr = UnsafePointer(to=entity.get[PosX]()).bitcast[ - Float64 - ]() - var velX_ptr = UnsafePointer(to=entity.get[VelX]()).bitcast[ - Float64 - ]() + var posX_ptr = UnsafePointer(to=entity.get[PosX]().x) var posX = posX_ptr.load[width=simd_width]() - var velX = velX_ptr.load[width=simd_width]() + var velX = UnsafePointer(to=entity.get[VelX]().dx).load[ + width=simd_width + ]() posX_ptr.store(posX + velX) - var posY_ptr = UnsafePointer(to=entity.get[PosY]()).bitcast[ - Float64 - ]() - var velY_ptr = UnsafePointer(to=entity.get[VelY]()).bitcast[ - Float64 - ]() + var posY_ptr = UnsafePointer(to=entity.get[PosY]().y) var posY = posY_ptr.load[width=simd_width]() - var velY = velY_ptr.load[width=simd_width]() + var velY = UnsafePointer(to=entity.get[VelY]().dy).load[ + width=simd_width + ]() posY_ptr.store(posY + velY) except: @@ -363,21 +357,6 @@ fn run_all_query_benchmarks() raises: fn run_all_query_benchmarks(mut bench: Bench) raises: - bench.bench_function[benchmark_vel_pos_add_aos_vec_1_000_000]( - BenchId("10^3 * 10^3 * pos vel add aos vec") - ) - bench.bench_function[benchmark_vel_pos_add_vec_optimized_1_000_000]( - BenchId("10^3 * 10^3 * pos vel add aos vec optimized") - ) - bench.bench_function[benchmark_vel_pos_add_vec_1_000_000]( - BenchId("10^3 * 10^3 * pos vel add vec") - ) - bench.bench_function[benchmark_vel_pos_add_1_000_000]( - BenchId("10^3 * 10^3 * pos vel add") - ) - bench.bench_function[benchmark_vel_pos_add_aos_1_000_000]( - BenchId("10^3 * 10^3 * pos vel add aos") - ) bench.bench_function[benchmark_query_has_1_000_000]( BenchId("10^6 * query has") ) @@ -393,6 +372,21 @@ fn run_all_query_benchmarks(mut bench: Bench) raises: bench.bench_function[benchmark_query_get_iter_1_000_000]( BenchId("10^6 * get query iter") ) + bench.bench_function[benchmark_vel_pos_add_aos_1_000_000]( + BenchId("10^3 * 10^3 * pos vel add aos") + ) + bench.bench_function[benchmark_vel_pos_add_1_000_000]( + BenchId("10^3 * 10^3 * pos vel add") + ) + bench.bench_function[benchmark_vel_pos_add_aos_vec_1_000_000]( + BenchId("10^3 * 10^3 * pos vel add aos vec optimized") + ) + bench.bench_function[benchmark_vel_pos_add_vec_1_000_000]( + BenchId("10^3 * 10^3 * pos vel add vec") + ) + bench.bench_function[benchmark_vel_pos_add_vec_optimized_1_000_000]( + BenchId("10^3 * 10^3 * pos vel add vec optimized") + ) def main(): From 2001c243aaccb4faf97d5768efdf9b6916106ed1 Mon Sep 17 00:00:00 2001 From: Samuel Fischer Date: Sat, 25 Oct 2025 13:02:31 +0200 Subject: [PATCH 6/7] Further fix --- benchmark/query_benchmark.mojo | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/benchmark/query_benchmark.mojo b/benchmark/query_benchmark.mojo index e5c04f1f..81549453 100644 --- a/benchmark/query_benchmark.mojo +++ b/benchmark/query_benchmark.mojo @@ -23,14 +23,11 @@ fn benchmark_add_entity_1_000_000(mut bencher: Bencher) raises capturing: fn benchmark_query_1_comp_1_000_000( mut bencher: Bencher, ) raises capturing: - pos = Position(1.0, 2.0) - @always_inline @parameter fn bench_fn() capturing raises: world = SmallWorld() - for _ in range(1000): - _ = world.add_entity(pos) + _ = world.add_entities(Position(1.0, 2.0), count=1000) for _ in range(1000): for entity in world.query[Position](): keep(entity.get[Position]().x) @@ -41,15 +38,13 @@ fn benchmark_query_1_comp_1_000_000( fn benchmark_vel_pos_add_1_000_000( mut bencher: Bencher, ) raises capturing: - pos = Position(1.0, 2.0) - vel = Velocity(0.1, 0.2) - @always_inline @parameter fn bench_fn() capturing raises: world = SmallWorld() - for _ in range(1000): - _ = world.add_entity(pos, vel) + _ = world.add_entities( + Position(1.0, 2.0), Velocity(0.1, 0.2), count=1000 + ) for _ in range(1000): for entity in world.query[Position](): ref pos = entity.get[Position]() @@ -145,12 +140,12 @@ fn benchmark_vel_pos_add_aos_vec_1_000_000( fn move[simd_width: Int](i: Int): # var pos_ptr = l1.unsafe_ptr().offset(i).bitcast[Float64]() var pos_ptr = UnsafePointer(to=l1[i]).bitcast[Float64]() - var pos = pos_ptr.load[width=simd_width]() + var pos = pos_ptr.load[width = simd_width * 2]() var vel = ( l2.unsafe_ptr() .offset(i) .bitcast[Float64]() - .load[width=simd_width]() + .load[width = simd_width * 2]() ) pos_ptr.store(pos + vel) @@ -193,9 +188,9 @@ fn benchmark_vel_pos_add_vec_optimized_1_000_000( var vel = ( UnsafePointer(to=entity.get[Velocity]()) .bitcast[Float64]() - .load[width=simd_width]() + .load[width = simd_width * 2]() ) - var pos = pos_ptr.load[width=simd_width]() + var pos = pos_ptr.load[width = simd_width * 2]() pos_ptr.store(pos + vel) except: return From 37a297983af09c4522766f25282ff29d2bfd24b9 Mon Sep 17 00:00:00 2001 From: Samuel Fischer Date: Sat, 25 Oct 2025 13:55:44 +0200 Subject: [PATCH 7/7] Add a split pos vel implementation --- benchmark/query_benchmark.mojo | 74 ++++++++++++++++++++++++++++------ 1 file changed, 62 insertions(+), 12 deletions(-) diff --git a/benchmark/query_benchmark.mojo b/benchmark/query_benchmark.mojo index 81549453..d06e735f 100644 --- a/benchmark/query_benchmark.mojo +++ b/benchmark/query_benchmark.mojo @@ -157,23 +157,23 @@ fn benchmark_vel_pos_add_aos_vec_1_000_000( @fieldwise_init -struct PosX(Copyable, Movable): - var x: Float64 +struct PosX(Copyable & Movable): + var value: Float64 @fieldwise_init -struct PosY(Copyable, Movable): - var y: Float64 +struct PosY(Copyable & Movable): + var value: Float64 @fieldwise_init -struct VelX(Copyable, Movable): - var dx: Float64 +struct VelX(Copyable & Movable): + var value: Float64 @fieldwise_init -struct VelY(Copyable, Movable): - var dy: Float64 +struct VelY(Copyable & Movable): + var value: Float64 fn benchmark_vel_pos_add_vec_optimized_1_000_000( @@ -218,16 +218,16 @@ fn benchmark_vel_pos_add_vec_1_000_000( @parameter fn move[simd_width: Int](entity: MutableEntityAccessor): try: - var posX_ptr = UnsafePointer(to=entity.get[PosX]().x) + var posX_ptr = UnsafePointer(to=entity.get[PosX]().value) var posX = posX_ptr.load[width=simd_width]() - var velX = UnsafePointer(to=entity.get[VelX]().dx).load[ + var velX = UnsafePointer(to=entity.get[VelX]().value).load[ width=simd_width ]() posX_ptr.store(posX + velX) - var posY_ptr = UnsafePointer(to=entity.get[PosY]().y) + var posY_ptr = UnsafePointer(to=entity.get[PosY]().value) var posY = posY_ptr.load[width=simd_width]() - var velY = UnsafePointer(to=entity.get[VelY]().dy).load[ + var velY = UnsafePointer(to=entity.get[VelY]().value).load[ width=simd_width ]() posY_ptr.store(posY + velY) @@ -252,6 +252,53 @@ fn benchmark_vel_pos_add_vec_1_000_000( bencher.iter[bench_fn]() +fn benchmark_vel_pos_add_vec_split_1_000_000( + mut bencher: Bencher, +) raises capturing: + @parameter + fn move_x[simd_width: Int](entity: MutableEntityAccessor): + try: + var pos_ptr = UnsafePointer(to=entity.get[PosX]().value) + var pos = pos_ptr.load[width=simd_width]() + var vel = UnsafePointer(to=entity.get[VelX]().value).load[ + width=simd_width + ]() + pos_ptr.store(pos + vel) + except: + return + + @parameter + fn move_y[simd_width: Int](entity: MutableEntityAccessor): + try: + var pos_ptr = UnsafePointer(to=entity.get[PosY]().value) + var pos = pos_ptr.load[width=simd_width]() + var vel = UnsafePointer(to=entity.get[VelY]().value).load[ + width=simd_width + ]() + pos_ptr.store(pos + vel) + except: + return + + alias simd_width = simdwidthof[Float64]() + + @always_inline + @parameter + fn bench_fn() capturing raises: + world = World[PosX, VelX, PosY, VelY]() + _ = world.add_entities( + PosX(1.0), VelX(0.1), PosY(2.0), VelY(0.2), count=1000 + ) + for _ in range(1000): + world.apply[move_x, simd_width=simd_width]( + world.query[PosX, VelX, PosY, VelY]() + ) + world.apply[move_y, simd_width=simd_width]( + world.query[PosX, VelX, PosY, VelY]() + ) + + bencher.iter[bench_fn]() + + fn benchmark_query_2_comp_1_000_000( mut bencher: Bencher, ) raises capturing: @@ -379,6 +426,9 @@ fn run_all_query_benchmarks(mut bench: Bench) raises: bench.bench_function[benchmark_vel_pos_add_vec_1_000_000]( BenchId("10^3 * 10^3 * pos vel add vec") ) + bench.bench_function[benchmark_vel_pos_add_vec_split_1_000_000]( + BenchId("10^3 * 10^3 * pos vel add vec split") + ) bench.bench_function[benchmark_vel_pos_add_vec_optimized_1_000_000]( BenchId("10^3 * 10^3 * pos vel add vec optimized") )