1
+ use wgpu:: util:: DeviceExt ;
2
+
1
3
use super :: { shader_module, Options } ;
2
- use core:: num:: NonZeroU64 ;
3
-
4
- fn create_device_queue ( ) -> ( wgpu:: Device , wgpu:: Queue ) {
5
- async fn create_device_queue_async ( ) -> ( wgpu:: Device , wgpu:: Queue ) {
6
- let instance = wgpu:: Instance :: new ( wgpu:: BackendBit :: PRIMARY ) ;
7
- let adapter = instance
8
- . request_adapter ( & wgpu:: RequestAdapterOptions {
9
- power_preference : wgpu:: PowerPreference :: default ( ) ,
10
- compatible_surface : None ,
11
- } )
12
- . await
13
- . expect ( "Failed to find an appropriate adapter" ) ;
14
-
15
- adapter
16
- . request_device (
17
- & wgpu:: DeviceDescriptor {
18
- label : None ,
19
- features : wgpu:: Features :: empty ( ) ,
20
- limits : wgpu:: Limits :: default ( ) ,
21
- } ,
22
- None ,
23
- )
24
- . await
25
- . expect ( "Failed to create device" )
26
- }
4
+ use futures:: future:: join;
5
+ use std:: { convert:: TryInto , future:: Future , num:: NonZeroU64 , time:: Duration } ;
6
+
7
+ fn block_on < T > ( future : impl Future < Output = T > ) -> T {
27
8
cfg_if:: cfg_if! {
28
9
if #[ cfg( target_arch = "wasm32" ) ] {
29
- wasm_bindgen_futures:: spawn_local( create_device_queue_async ( ) )
10
+ wasm_bindgen_futures:: spawn_local( future )
30
11
} else {
31
- futures:: executor:: block_on( create_device_queue_async ( ) )
12
+ futures:: executor:: block_on( future )
32
13
}
33
14
}
34
15
}
35
16
36
17
pub fn start ( options : & Options ) {
37
18
let shader_binary = shader_module ( options. shader ) ;
38
19
39
- let ( device, queue) = create_device_queue ( ) ;
20
+ block_on ( start_internal ( options, shader_binary) )
21
+ }
22
+
23
+ pub async fn start_internal (
24
+ _options : & Options ,
25
+ shader_binary : wgpu:: ShaderModuleDescriptor < ' static > ,
26
+ ) {
27
+ let instance = wgpu:: Instance :: new ( wgpu:: BackendBit :: PRIMARY ) ;
28
+ let adapter = instance
29
+ . request_adapter ( & wgpu:: RequestAdapterOptions {
30
+ power_preference : wgpu:: PowerPreference :: default ( ) ,
31
+ compatible_surface : None ,
32
+ } )
33
+ . await
34
+ . expect ( "Failed to find an appropriate adapter" ) ;
40
35
36
+ let timestamp_period = adapter. get_timestamp_period ( ) ;
37
+ let ( device, queue) = adapter
38
+ . request_device (
39
+ & wgpu:: DeviceDescriptor {
40
+ label : None ,
41
+ features : wgpu:: Features :: TIMESTAMP_QUERY ,
42
+ limits : wgpu:: Limits :: default ( ) ,
43
+ } ,
44
+ None ,
45
+ )
46
+ . await
47
+ . expect ( "Failed to create device" ) ;
48
+ drop ( instance) ;
49
+ drop ( adapter) ;
41
50
// Load the shaders from disk
42
51
let module = device. create_shader_module ( & shader_binary) ;
43
52
53
+ let top = 2u32 . pow ( 20 ) ;
54
+ let src_range = 1 ..top;
55
+
56
+ let src = src_range
57
+ . clone ( )
58
+ // Not sure which endianness is correct to use here
59
+ . map ( u32:: to_ne_bytes)
60
+ . flat_map ( core:: array:: IntoIter :: new)
61
+ . collect :: < Vec < _ > > ( ) ;
62
+
44
63
let bind_group_layout = device. create_bind_group_layout ( & wgpu:: BindGroupLayoutDescriptor {
45
64
label : None ,
46
65
entries : & [
@@ -72,10 +91,26 @@ pub fn start(options: &Options) {
72
91
entry_point : "main_cs" ,
73
92
} ) ;
74
93
75
- let buf = device. create_buffer ( & wgpu:: BufferDescriptor {
94
+ let readback_buffer = device. create_buffer ( & wgpu:: BufferDescriptor {
76
95
label : None ,
77
- size : 1 ,
78
- usage : wgpu:: BufferUsage :: STORAGE ,
96
+ size : src. len ( ) as wgpu:: BufferAddress ,
97
+ // Can be read to the CPU, and can be copied from the shader's storage buffer
98
+ usage : wgpu:: BufferUsage :: MAP_READ | wgpu:: BufferUsage :: COPY_DST ,
99
+ mapped_at_creation : false ,
100
+ } ) ;
101
+
102
+ let storage_buffer = device. create_buffer_init ( & wgpu:: util:: BufferInitDescriptor {
103
+ label : Some ( "Collatz Conjecture Input" ) ,
104
+ contents : & src,
105
+ usage : wgpu:: BufferUsage :: STORAGE
106
+ | wgpu:: BufferUsage :: COPY_DST
107
+ | wgpu:: BufferUsage :: COPY_SRC ,
108
+ } ) ;
109
+
110
+ let timestamp_buffer = device. create_buffer ( & wgpu:: BufferDescriptor {
111
+ label : Some ( "Timestamps buffer" ) ,
112
+ size : 16 ,
113
+ usage : wgpu:: BufferUsage :: MAP_READ | wgpu:: BufferUsage :: COPY_DST ,
79
114
mapped_at_creation : false ,
80
115
} ) ;
81
116
@@ -84,23 +119,74 @@ pub fn start(options: &Options) {
84
119
layout : & bind_group_layout,
85
120
entries : & [ wgpu:: BindGroupEntry {
86
121
binding : 0 ,
87
- resource : wgpu:: BindingResource :: Buffer {
88
- buffer : & buf,
89
- offset : 0 ,
90
- size : None ,
91
- } ,
122
+ resource : storage_buffer. as_entire_binding ( ) ,
92
123
} ] ,
93
124
} ) ;
94
125
126
+ let queries = device. create_query_set ( & wgpu:: QuerySetDescriptor {
127
+ count : 2 ,
128
+ ty : wgpu:: QueryType :: Timestamp ,
129
+ } ) ;
130
+
95
131
let mut encoder =
96
132
device. create_command_encoder ( & wgpu:: CommandEncoderDescriptor { label : None } ) ;
97
133
98
134
{
99
135
let mut cpass = encoder. begin_compute_pass ( & wgpu:: ComputePassDescriptor { label : None } ) ;
100
136
cpass. set_bind_group ( 0 , & bind_group, & [ ] ) ;
101
137
cpass. set_pipeline ( & compute_pipeline) ;
102
- cpass. dispatch ( 1 , 1 , 1 ) ;
138
+ cpass. write_timestamp ( & queries, 0 ) ;
139
+ cpass. dispatch ( src_range. len ( ) as u32 / 64 , 1 , 1 ) ;
140
+ cpass. write_timestamp ( & queries, 1 ) ;
103
141
}
104
142
143
+ encoder. copy_buffer_to_buffer (
144
+ & storage_buffer,
145
+ 0 ,
146
+ & readback_buffer,
147
+ 0 ,
148
+ src. len ( ) as wgpu:: BufferAddress ,
149
+ ) ;
150
+ encoder. resolve_query_set ( & queries, 0 ..2 , & timestamp_buffer, 0 ) ;
151
+
105
152
queue. submit ( Some ( encoder. finish ( ) ) ) ;
153
+ let buffer_slice = readback_buffer. slice ( ..) ;
154
+ let timestamp_slice = timestamp_buffer. slice ( ..) ;
155
+ let timestamp_future = timestamp_slice. map_async ( wgpu:: MapMode :: Read ) ;
156
+ let buffer_future = buffer_slice. map_async ( wgpu:: MapMode :: Read ) ;
157
+ device. poll ( wgpu:: Maintain :: Wait ) ;
158
+
159
+ if let ( Ok ( ( ) ) , Ok ( ( ) ) ) = join ( buffer_future, timestamp_future) . await {
160
+ let data = buffer_slice. get_mapped_range ( ) ;
161
+ let timing_data = timestamp_slice. get_mapped_range ( ) ;
162
+ let result = data
163
+ . chunks_exact ( 4 )
164
+ . map ( |b| u32:: from_ne_bytes ( b. try_into ( ) . unwrap ( ) ) )
165
+ . collect :: < Vec < _ > > ( ) ;
166
+ let timings = timing_data
167
+ . chunks_exact ( 8 )
168
+ . map ( |b| u64:: from_ne_bytes ( b. try_into ( ) . unwrap ( ) ) )
169
+ . collect :: < Vec < _ > > ( ) ;
170
+ drop ( data) ;
171
+ readback_buffer. unmap ( ) ;
172
+ drop ( timing_data) ;
173
+ timestamp_buffer. unmap ( ) ;
174
+ let mut max = 0 ;
175
+ for ( src, out) in src_range. zip ( result. iter ( ) . copied ( ) ) {
176
+ if out == u32:: MAX {
177
+ println ! ( "{}: overflowed" , src) ;
178
+ break ;
179
+ } else if out > max {
180
+ max = out;
181
+ // Should produce <https://oeis.org/A006877>
182
+ println ! ( "{}: {}" , src, out) ;
183
+ }
184
+ }
185
+ println ! (
186
+ "Took: {:?}" ,
187
+ Duration :: from_nanos(
188
+ ( ( timings[ 1 ] - timings[ 0 ] ) as f64 * f64 :: from( timestamp_period) ) as u64
189
+ )
190
+ ) ;
191
+ }
106
192
}
0 commit comments