Skip to content

Commit 34de4f2

Browse files
committed
Refactor the public API of chunk_2 DiT kernel in preparation for conversion to explicit SIMD
1 parent 964dd4c commit 34de4f2

File tree

2 files changed

+31
-3
lines changed

2 files changed

+31
-3
lines changed

src/algorithms/dit.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ fn execute_dit_stage_f64<S: Simd>(
156156
let chunk_size = dist * 2;
157157

158158
if chunk_size == 2 {
159-
simd.vectorize(|| fft_dit_chunk_2(simd, reals, imags));
159+
simd.vectorize(|| fft_dit_chunk_2_f64(simd, reals, imags));
160160
stage_twiddle_idx
161161
} else if chunk_size == 4 {
162162
fft_dit_chunk_4_f64(simd, reals, imags);
@@ -195,7 +195,7 @@ fn execute_dit_stage_f32<S: Simd>(
195195
let chunk_size = dist * 2;
196196

197197
if chunk_size == 2 {
198-
simd.vectorize(|| fft_dit_chunk_2(simd, reals, imags));
198+
simd.vectorize(|| fft_dit_chunk_2_f32(simd, reals, imags));
199199
stage_twiddle_idx
200200
} else if chunk_size == 4 {
201201
fft_dit_chunk_4_f32(simd, reals, imags);

src/kernels/dit.rs

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,38 @@ use core::f32;
77
use fearless_simd::{f32x16, f32x4, f32x8, f64x4, f64x8, Simd, SimdBase, SimdFloat, SimdFrom};
88
use num_traits::Float;
99

10+
/// DIT butterfly for chunk_size == 2 (f64)
11+
#[inline(never)] // otherwise every kernel gets inlined into the parent and ARM perf drops due to register pressure
12+
pub fn fft_dit_chunk_2_f64<S: Simd>(simd: S, reals: &mut [f64], imags: &mut [f64]) {
13+
simd.vectorize(
14+
#[inline(always)]
15+
|| fft_dit_chunk_2_simd_f64(simd, reals, imags),
16+
)
17+
}
18+
19+
/// DIT butterfly for chunk_size == 2 (f32)
20+
#[inline(never)] // otherwise every kernel gets inlined into the parent and ARM perf drops due to register pressure
21+
pub fn fft_dit_chunk_2_f32<S: Simd>(simd: S, reals: &mut [f32], imags: &mut [f32]) {
22+
simd.vectorize(
23+
#[inline(always)]
24+
|| fft_dit_chunk_2_simd_f32(simd, reals, imags),
25+
)
26+
}
27+
28+
#[inline(always)] // required by fearless_simd
29+
pub fn fft_dit_chunk_2_simd_f32<S: Simd>(simd: S, reals: &mut [f32], imags: &mut [f32]) {
30+
fft_dit_chunk_2(simd, reals, imags)
31+
}
32+
33+
#[inline(always)] // required by fearless_simd
34+
pub fn fft_dit_chunk_2_simd_f64<S: Simd>(simd: S, reals: &mut [f64], imags: &mut [f64]) {
35+
fft_dit_chunk_2(simd, reals, imags)
36+
}
37+
1038
/// DIT butterfly for chunk_size == 2
1139
/// Identical to DIF version (no twiddles at size 2)
1240
#[inline(always)] // required by fearless_simd
13-
pub fn fft_dit_chunk_2<S: Simd, T: Float>(_simd: S, reals: &mut [T], imags: &mut [T]) {
41+
fn fft_dit_chunk_2<S: Simd, T: Float>(_simd: S, reals: &mut [T], imags: &mut [T]) {
1442
reals
1543
.chunks_exact_mut(2)
1644
.zip(imags.chunks_exact_mut(2))

0 commit comments

Comments
 (0)