@@ -312,6 +312,18 @@ pub fn fft_32_dit_with_planner_and_opts(
312312 imags : & mut [ f32 ] ,
313313 planner : & PlannerDit32 ,
314314 opts : & Options ,
315+ ) {
316+ // Dynamic dispatch overhead becomes really noticeable at small FFT sizes.
317+ // Dispatch only once at the top of the program to
318+ dispatch ! ( planner. simd_level, simd => fft_32_dit_with_planner_and_opts_impl( simd, reals, imags, planner, opts) )
319+ }
320+
321+ fn fft_32_dit_with_planner_and_opts_impl < S : Simd > (
322+ simd : S ,
323+ reals : & mut [ f32 ] ,
324+ imags : & mut [ f32 ] ,
325+ planner : & PlannerDit32 ,
326+ opts : & Options ,
315327) {
316328 assert_eq ! ( reals. len( ) , imags. len( ) ) ;
317329 assert ! ( reals. len( ) . is_power_of_two( ) ) ;
@@ -320,13 +332,21 @@ pub fn fft_32_dit_with_planner_and_opts(
320332 let log_n = n. ilog2 ( ) as usize ;
321333 assert_eq ! ( log_n, planner. log_n) ;
322334
323- let simd_level = planner. simd_level ;
324-
325335 // DIT requires bit-reversed input
326336 run_maybe_in_parallel (
327337 opts. multithreaded_bit_reversal ,
328- || dispatch ! ( simd_level, simd => bit_rev_bravo_f32( simd, reals, log_n) ) ,
329- || dispatch ! ( simd_level, simd => bit_rev_bravo_f32( simd, imags, log_n) ) ,
338+ || {
339+ simd. vectorize (
340+ #[ inline( always) ]
341+ || bit_rev_bravo_f32 ( simd, reals, log_n) ,
342+ )
343+ } ,
344+ || {
345+ simd. vectorize (
346+ #[ inline( always) ]
347+ || bit_rev_bravo_f32 ( simd, imags, log_n) ,
348+ )
349+ } ,
330350 ) ;
331351
332352 // Handle inverse FFT
@@ -336,7 +356,10 @@ pub fn fft_32_dit_with_planner_and_opts(
336356 }
337357 }
338358
339- dispatch ! ( simd_level, simd => recursive_dit_fft_f32( simd, reals, imags, n, planner, opts, 0 ) ) ;
359+ simd. vectorize (
360+ #[ inline( always) ]
361+ || recursive_dit_fft_f32 ( simd, reals, imags, n, planner, opts, 0 ) ,
362+ ) ;
340363
341364 // Scaling for inverse transform
342365 if let Direction :: Reverse = planner. direction {
0 commit comments