1+ use wgpu:: util:: DeviceExt ;
2+
13use super :: { shader_module, Options } ;
2- use core:: num:: NonZeroU64 ;
3-
4- fn create_device_queue ( ) -> ( wgpu:: Device , wgpu:: Queue ) {
5- async fn create_device_queue_async ( ) -> ( wgpu:: Device , wgpu:: Queue ) {
6- let instance = wgpu:: Instance :: new ( wgpu:: BackendBit :: PRIMARY ) ;
7- let adapter = instance
8- . request_adapter ( & wgpu:: RequestAdapterOptions {
9- power_preference : wgpu:: PowerPreference :: default ( ) ,
10- compatible_surface : None ,
11- } )
12- . await
13- . expect ( "Failed to find an appropriate adapter" ) ;
14-
15- adapter
16- . request_device (
17- & wgpu:: DeviceDescriptor {
18- label : None ,
19- features : wgpu:: Features :: empty ( ) ,
20- limits : wgpu:: Limits :: default ( ) ,
21- } ,
22- None ,
23- )
24- . await
25- . expect ( "Failed to create device" )
26- }
4+ use futures:: future:: join;
5+ use std:: { convert:: TryInto , future:: Future , num:: NonZeroU64 , time:: Duration } ;
6+
7+ fn block_on < T > ( future : impl Future < Output = T > ) -> T {
278 cfg_if:: cfg_if! {
289 if #[ cfg( target_arch = "wasm32" ) ] {
29- wasm_bindgen_futures:: spawn_local( create_device_queue_async ( ) )
10+ wasm_bindgen_futures:: spawn_local( future )
3011 } else {
31- futures:: executor:: block_on( create_device_queue_async ( ) )
12+ futures:: executor:: block_on( future )
3213 }
3314 }
3415}
3516
3617pub fn start ( options : & Options ) {
3718 let shader_binary = shader_module ( options. shader ) ;
3819
39- let ( device, queue) = create_device_queue ( ) ;
20+ block_on ( start_internal ( options, shader_binary) )
21+ }
22+
23+ pub async fn start_internal (
24+ _options : & Options ,
25+ shader_binary : wgpu:: ShaderModuleDescriptor < ' static > ,
26+ ) {
27+ let instance = wgpu:: Instance :: new ( wgpu:: BackendBit :: PRIMARY ) ;
28+ let adapter = instance
29+ . request_adapter ( & wgpu:: RequestAdapterOptions {
30+ power_preference : wgpu:: PowerPreference :: default ( ) ,
31+ compatible_surface : None ,
32+ } )
33+ . await
34+ . expect ( "Failed to find an appropriate adapter" ) ;
4035
36+ let timestamp_period = adapter. get_timestamp_period ( ) ;
37+ let ( device, queue) = adapter
38+ . request_device (
39+ & wgpu:: DeviceDescriptor {
40+ label : None ,
41+ features : wgpu:: Features :: TIMESTAMP_QUERY ,
42+ limits : wgpu:: Limits :: default ( ) ,
43+ } ,
44+ None ,
45+ )
46+ . await
47+ . expect ( "Failed to create device" ) ;
48+ drop ( instance) ;
49+ drop ( adapter) ;
4150 // Load the shaders from disk
4251 let module = device. create_shader_module ( & shader_binary) ;
4352
53+ let top = 2u32 . pow ( 20 ) ;
54+ let src_range = 1 ..top;
55+
56+ let src = src_range
57+ . clone ( )
58+ // Not sure which endianness is correct to use here
59+ . map ( u32:: to_ne_bytes)
60+ . flat_map ( core:: array:: IntoIter :: new)
61+ . collect :: < Vec < _ > > ( ) ;
62+
4463 let bind_group_layout = device. create_bind_group_layout ( & wgpu:: BindGroupLayoutDescriptor {
4564 label : None ,
4665 entries : & [
@@ -72,10 +91,26 @@ pub fn start(options: &Options) {
7291 entry_point : "main_cs" ,
7392 } ) ;
7493
75- let buf = device. create_buffer ( & wgpu:: BufferDescriptor {
94+ let readback_buffer = device. create_buffer ( & wgpu:: BufferDescriptor {
7695 label : None ,
77- size : 1 ,
78- usage : wgpu:: BufferUsage :: STORAGE ,
96+ size : src. len ( ) as wgpu:: BufferAddress ,
97+ // Can be read to the CPU, and can be copied from the shader's storage buffer
98+ usage : wgpu:: BufferUsage :: MAP_READ | wgpu:: BufferUsage :: COPY_DST ,
99+ mapped_at_creation : false ,
100+ } ) ;
101+
102+ let storage_buffer = device. create_buffer_init ( & wgpu:: util:: BufferInitDescriptor {
103+ label : Some ( "Collatz Conjecture Input" ) ,
104+ contents : & src,
105+ usage : wgpu:: BufferUsage :: STORAGE
106+ | wgpu:: BufferUsage :: COPY_DST
107+ | wgpu:: BufferUsage :: COPY_SRC ,
108+ } ) ;
109+
110+ let timestamp_buffer = device. create_buffer ( & wgpu:: BufferDescriptor {
111+ label : Some ( "Timestamps buffer" ) ,
112+ size : 16 ,
113+ usage : wgpu:: BufferUsage :: MAP_READ | wgpu:: BufferUsage :: COPY_DST ,
79114 mapped_at_creation : false ,
80115 } ) ;
81116
@@ -84,23 +119,74 @@ pub fn start(options: &Options) {
84119 layout : & bind_group_layout,
85120 entries : & [ wgpu:: BindGroupEntry {
86121 binding : 0 ,
87- resource : wgpu:: BindingResource :: Buffer {
88- buffer : & buf,
89- offset : 0 ,
90- size : None ,
91- } ,
122+ resource : storage_buffer. as_entire_binding ( ) ,
92123 } ] ,
93124 } ) ;
94125
126+ let queries = device. create_query_set ( & wgpu:: QuerySetDescriptor {
127+ count : 2 ,
128+ ty : wgpu:: QueryType :: Timestamp ,
129+ } ) ;
130+
95131 let mut encoder =
96132 device. create_command_encoder ( & wgpu:: CommandEncoderDescriptor { label : None } ) ;
97133
98134 {
99135 let mut cpass = encoder. begin_compute_pass ( & wgpu:: ComputePassDescriptor { label : None } ) ;
100136 cpass. set_bind_group ( 0 , & bind_group, & [ ] ) ;
101137 cpass. set_pipeline ( & compute_pipeline) ;
102- cpass. dispatch ( 1 , 1 , 1 ) ;
138+ cpass. write_timestamp ( & queries, 0 ) ;
139+ cpass. dispatch ( src_range. len ( ) as u32 / 64 , 1 , 1 ) ;
140+ cpass. write_timestamp ( & queries, 1 ) ;
103141 }
104142
143+ encoder. copy_buffer_to_buffer (
144+ & storage_buffer,
145+ 0 ,
146+ & readback_buffer,
147+ 0 ,
148+ src. len ( ) as wgpu:: BufferAddress ,
149+ ) ;
150+ encoder. resolve_query_set ( & queries, 0 ..2 , & timestamp_buffer, 0 ) ;
151+
105152 queue. submit ( Some ( encoder. finish ( ) ) ) ;
153+ let buffer_slice = readback_buffer. slice ( ..) ;
154+ let timestamp_slice = timestamp_buffer. slice ( ..) ;
155+ let timestamp_future = timestamp_slice. map_async ( wgpu:: MapMode :: Read ) ;
156+ let buffer_future = buffer_slice. map_async ( wgpu:: MapMode :: Read ) ;
157+ device. poll ( wgpu:: Maintain :: Wait ) ;
158+
159+ if let ( Ok ( ( ) ) , Ok ( ( ) ) ) = join ( buffer_future, timestamp_future) . await {
160+ let data = buffer_slice. get_mapped_range ( ) ;
161+ let timing_data = timestamp_slice. get_mapped_range ( ) ;
162+ let result = data
163+ . chunks_exact ( 4 )
164+ . map ( |b| u32:: from_ne_bytes ( b. try_into ( ) . unwrap ( ) ) )
165+ . collect :: < Vec < _ > > ( ) ;
166+ let timings = timing_data
167+ . chunks_exact ( 8 )
168+ . map ( |b| u64:: from_ne_bytes ( b. try_into ( ) . unwrap ( ) ) )
169+ . collect :: < Vec < _ > > ( ) ;
170+ drop ( data) ;
171+ readback_buffer. unmap ( ) ;
172+ drop ( timing_data) ;
173+ timestamp_buffer. unmap ( ) ;
174+ let mut max = 0 ;
175+ for ( src, out) in src_range. zip ( result. iter ( ) . copied ( ) ) {
176+ if out == u32:: MAX {
177+ println ! ( "{}: overflowed" , src) ;
178+ break ;
179+ } else if out > max {
180+ max = out;
181+ // Should produce <https://oeis.org/A006877>
182+ println ! ( "{}: {}" , src, out) ;
183+ }
184+ }
185+ println ! (
186+ "Took: {:?}" ,
187+ Duration :: from_nanos(
188+ ( ( timings[ 1 ] - timings[ 0 ] ) as f64 * f64 :: from( timestamp_period) ) as u64
189+ )
190+ ) ;
191+ }
106192}
0 commit comments