Skip to content

Commit 6b8fb54

Browse files
ENH: Perf. improv. for ND fft on real arrays
Intel (R) MKL produces only half harmonics for real input arrays, and copying is done in mkl_fft, by using multi iterators. The multi-iterator used to reside in the separate compilation unit which was preventing some compilers from inlining calls. Implementation was moved from .c file into header. Additionally, the multi-iterator structs are now not allocated on the heap.
1 parent 65c13e4 commit 6b8fb54

File tree

6 files changed

+193
-216
lines changed

6 files changed

+193
-216
lines changed

CHANGES.rst

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,25 +2,31 @@
22
mkl_fft changelog
33
=================
44

5+
1.0.11
6+
======
7+
Improvement for performance of ND fft on real input arrays by inlining multi-iterators.
8+
This particularly benefits performance of mkl_fft built with Intel (R) C Compiler.
9+
10+
511
1.0.10
6-
====
12+
======
713
Fix for issue #29.
814

915

1016
1.0.7
11-
====
17+
=====
1218
Improved exception message raised if MKL is signalling an error. The message now includes MKL's own description of the exception.
1319
This partially improves #24.
1420

1521
Improved argument validation for ND transforms aligning with scipy 1.2.0
1622

1723
1.0.6
18-
====
24+
=====
1925

2026
Fixed issues #21, and addressed NumPy 1.15 deprecation warnings from using lists instead of tuples to specify multiple slices.
2127

2228
1.0.5
23-
====
29+
=====
2430

2531
Fixed issues #7, #17, #18.
2632
Consolidated version specification into a single file `mkl_fft/_version.py`.
@@ -31,7 +37,7 @@ Consolidated version specification into a single file `mkl_fft/_version.py`.
3137
Added CHANGES.rst. Fixed issue #11 by using lock around calls to 1D FFT routines.
3238

3339
1.0.3
34-
====
40+
=====
3541

3642
This is a bug fix release.
3743

@@ -41,17 +47,17 @@ As part of fixing issue #13, out-of-place 1D FFT calls such as `fft`, `ifft`, `r
4147

4248

4349
1.0.2
44-
====
50+
=====
4551

4652
Minor update of `mkl_fft`, reflecting renaming of `numpy.core.multiarray_tests` module to `numpy.core._multiarray_tests` as well as fixing #4.
4753

4854

4955
1.0.1
50-
====
56+
=====
5157

5258
Bug fix release.
5359

5460
1.0.0
55-
====
61+
=====
5662

5763
Initial release of `mkl_fft`.

mkl_fft/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = '1.0.10'
1+
__version__ = '1.0.11'

mkl_fft/setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def configuration(parent_package='',top_path=None):
5353
name = '_pydfti',
5454
sources = [
5555
join(wdir, 'mklfft.c.src'),
56-
join(wdir, 'multi_iter.c'),
56+
# join(wdir, 'multi_iter.c'),
5757
] + sources,
5858
depends = [
5959
join(wdir, 'mklfft.h'),
@@ -63,7 +63,7 @@ def configuration(parent_package='',top_path=None):
6363
libraries = libs,
6464
extra_compile_args = [
6565
'-DNDEBUG',
66-
# '-g', '-O0', '-Wall', '-Wextra', '-DDEBUG',
66+
# '-ggdb', '-O0', '-Wall', '-Wextra', '-DDEBUG',
6767
]
6868
)
6969

mkl_fft/src/mklfft.c.src

Lines changed: 36 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -546,7 +546,7 @@ int @name@_mkl_@mode@_in(PyArrayObject* x_inout, npy_intp n, int axis)
546546
status = __cached_inplace_@DftiCompute_MODE@_@MKL_TYPE@(x_data);
547547
if (status != 0) goto failed;
548548
} else {
549-
multi_iter_masked_t *mit;
549+
multi_iter_masked_t mit;
550550
int *mask;
551551
int i;
552552

@@ -555,7 +555,7 @@ int @name@_mkl_@mode@_in(PyArrayObject* x_inout, npy_intp n, int axis)
555555
for(i = 0; i < axis; i++) mask[i] = i;
556556
for(i = axis + 1; i < x_rank; i++) mask[i-1] = i;
557557

558-
mit = multi_iter_masked_new(x_shape, x_rank, mask, x_rank - 1);
558+
multi_iter_masked_new(&mit, x_shape, x_rank, mask, x_rank - 1);
559559

560560
while(!MultiIter_Done(mit)) {
561561
char *tmp;
@@ -567,13 +567,13 @@ int @name@_mkl_@mode@_in(PyArrayObject* x_inout, npy_intp n, int axis)
567567
(@MKL_TYPE@*) tmp);
568568
if (status != 0) break;
569569

570-
if (multi_iter_masked_next(mit))
570+
if (multi_iter_masked_next(&mit))
571571
break;
572572
}
573573

574574

575-
multi_iter_masked_free(mit);
576-
mkl_free(mask);
575+
multi_iter_masked_free(&mit);
576+
mkl_free(mask);
577577

578578
if (status != 0) goto failed;
579579
}
@@ -733,7 +733,7 @@ int @REALIN@_@COMPLEXOUT@_mkl_@mode@_out(
733733
xin_data, xout_data);
734734
if (status != 0) goto failed;
735735
} else {
736-
multi_iter_masked_t *mit;
736+
multi_iter_masked_t mit;
737737
int *mask;
738738
int i;
739739

@@ -742,7 +742,7 @@ int @REALIN@_@COMPLEXOUT@_mkl_@mode@_out(
742742
for(i = 0; i < axis; i++) mask[i] = i;
743743
for(i = axis + 1; i < xin_rank; i++) mask[i-1] = i;
744744

745-
mit = multi_iter_masked_new(xin_shape, xin_rank, mask, xin_rank - 1);
745+
multi_iter_masked_new(&mit, xin_shape, xin_rank, mask, xin_rank - 1);
746746

747747
while(!MultiIter_Done(mit)) {
748748
char *tmp1, *tmp2;
@@ -758,12 +758,12 @@ int @REALIN@_@COMPLEXOUT@_mkl_@mode@_out(
758758
(@MKL_IN_TYPE@*) tmp1, (@MKL_OUT_TYPE@*) tmp2 );
759759
if (status != 0) break;
760760

761-
if (multi_iter_masked_next(mit))
761+
if (multi_iter_masked_next(&mit))
762762
break;
763763
}
764764

765765

766-
multi_iter_masked_free(mit);
766+
multi_iter_masked_free(&mit);
767767
mkl_free(mask);
768768

769769
if (status != 0) goto failed;
@@ -792,15 +792,15 @@ int @REALIN@_@COMPLEXOUT@_mkl_@mode@_out(
792792
} else if(single_DftiCompute) {
793793
*/
794794
} else {
795-
multi_iter_t *mit;
795+
multi_iter_t mit;
796796
npy_intp *half_shape;
797797

798798
half_shape = (npy_intp *) mkl_malloc(xout_rank * sizeof(npy_intp), 64);
799799

800800
memcpy(half_shape, xout_shape, xout_rank * sizeof(npy_intp));
801801
half_shape[axis] = (n_last > 2) ? n_last - nh_last: 0;
802802

803-
mit = multi_iter_new(half_shape, xout_rank);
803+
multi_iter_new(&mit, half_shape, xout_rank);
804804

805805
while(!MultiIter_Done(mit)) {
806806
char *tmp1, *tmp2;
@@ -831,12 +831,12 @@ int @REALIN@_@COMPLEXOUT@_mkl_@mode@_out(
831831

832832
SET_CONJ(dest, src);
833833

834-
if (multi_iter_next(mit))
834+
if (multi_iter_next(&mit))
835835
break;
836836
}
837837

838838

839-
multi_iter_free(mit);
839+
multi_iter_free(&mit);
840840
mkl_free(half_shape);
841841
}
842842
}
@@ -983,7 +983,7 @@ int @COMPLEXIN@_@COMPLEXOUT@_mkl_@mode@_out(
983983
xin_data, xout_data);
984984
if (status != 0) goto failed;
985985
} else {
986-
multi_iter_masked_t *mit;
986+
multi_iter_masked_t mit;
987987
int *mask;
988988
int i;
989989

@@ -992,7 +992,7 @@ int @COMPLEXIN@_@COMPLEXOUT@_mkl_@mode@_out(
992992
for(i = 0; i < axis; i++) mask[i] = i;
993993
for(i = axis + 1; i < xin_rank; i++) mask[i-1] = i;
994994

995-
mit = multi_iter_masked_new(xin_shape, xin_rank, mask, xin_rank - 1);
995+
multi_iter_masked_new(&mit, xin_shape, xin_rank, mask, xin_rank - 1);
996996

997997
while(!MultiIter_Done(mit)) {
998998
char *tmp1, *tmp2;
@@ -1008,12 +1008,12 @@ int @COMPLEXIN@_@COMPLEXOUT@_mkl_@mode@_out(
10081008
(@MKL_TYPE@*) tmp1, (@MKL_TYPE@*) tmp2);
10091009
if (status != 0) break;
10101010

1011-
if (multi_iter_masked_next(mit))
1011+
if (multi_iter_masked_next(&mit))
10121012
break;
10131013
}
10141014

10151015

1016-
multi_iter_masked_free(mit);
1016+
multi_iter_masked_free(&mit);
10171017
mkl_free(mask);
10181018

10191019
if (status != 0) goto failed;
@@ -1121,7 +1121,7 @@ int @name@_mkl_@mode@_in(PyArrayObject* x_inout, npy_intp n, int axis)
11211121
status = __cached_inplace_@DftiCompute_MODE@_@MKL_TYPE@(x_data);
11221122
if (status != 0) goto failed;
11231123
} else {
1124-
multi_iter_masked_t *mit;
1124+
multi_iter_masked_t mit;
11251125
int *mask;
11261126
int i;
11271127

@@ -1130,7 +1130,7 @@ int @name@_mkl_@mode@_in(PyArrayObject* x_inout, npy_intp n, int axis)
11301130
for(i = 0; i < axis; i++) mask[i] = i;
11311131
for(i = axis + 1; i < x_rank; i++) mask[i-1] = i;
11321132

1133-
mit = multi_iter_masked_new(x_shape, x_rank, mask, x_rank - 1);
1133+
multi_iter_masked_new(&mit, x_shape, x_rank, mask, x_rank - 1);
11341134

11351135
while(!MultiIter_Done(mit)) {
11361136
char *tmp;
@@ -1141,11 +1141,11 @@ int @name@_mkl_@mode@_in(PyArrayObject* x_inout, npy_intp n, int axis)
11411141
status = __cached_inplace_@DftiCompute_MODE@_@MKL_TYPE@((@MKL_TYPE@*) tmp);
11421142
if (status != 0) break;
11431143

1144-
if (multi_iter_masked_next(mit))
1144+
if (multi_iter_masked_next(&mit))
11451145
break;
11461146
}
11471147

1148-
multi_iter_masked_free(mit);
1148+
multi_iter_masked_free(&mit);
11491149
mkl_free(mask);
11501150

11511151
if (status != 0) goto failed;
@@ -1283,7 +1283,7 @@ int
12831283
xin_data, xout_data);
12841284
if (status != 0) goto failed;
12851285
} else {
1286-
multi_iter_masked_t *mit;
1286+
multi_iter_masked_t mit;
12871287
int *mask;
12881288
int i;
12891289

@@ -1292,7 +1292,7 @@ int
12921292
for(i = 0; i < axis; i++) mask[i] = i;
12931293
for(i = axis + 1; i < xin_rank; i++) mask[i-1] = i;
12941294

1295-
mit = multi_iter_masked_new(xin_shape, xin_rank, mask, xin_rank - 1);
1295+
multi_iter_masked_new(&mit, xin_shape, xin_rank, mask, xin_rank - 1);
12961296

12971297
while(!MultiIter_Done(mit)) {
12981298
char *tmp1, *tmp2;
@@ -1308,12 +1308,12 @@ int
13081308
(@MKL_IN_TYPE@*) tmp1, (@MKL_OUT_TYPE@*) tmp2);
13091309
if (status != 0) break;
13101310

1311-
if (multi_iter_masked_next(mit))
1311+
if (multi_iter_masked_next(&mit))
13121312
break;
13131313
}
13141314

13151315

1316-
multi_iter_masked_free(mit);
1316+
multi_iter_masked_free(&mit);
13171317
mkl_free(mask);
13181318

13191319
if (status != 0) goto failed;
@@ -1461,7 +1461,7 @@ int @name@_@name@_mkl_@mode@_out(
14611461
xin_data, xout_data);
14621462
if (status != 0) goto failed;
14631463
} else {
1464-
multi_iter_masked_t *mit;
1464+
multi_iter_masked_t mit;
14651465
int *mask;
14661466
int i;
14671467

@@ -1470,7 +1470,7 @@ int @name@_@name@_mkl_@mode@_out(
14701470
for(i = 0; i < axis; i++) mask[i] = i;
14711471
for(i = axis + 1; i < xin_rank; i++) mask[i-1] = i;
14721472

1473-
mit = multi_iter_masked_new(xin_shape, xin_rank, mask, xin_rank - 1);
1473+
multi_iter_masked_new(&mit, xin_shape, xin_rank, mask, xin_rank - 1);
14741474

14751475
while(!MultiIter_Done(mit)) {
14761476
char *tmp1, *tmp2;
@@ -1486,12 +1486,12 @@ int @name@_@name@_mkl_@mode@_out(
14861486
(@MKL_TYPE@*) tmp1, (@MKL_TYPE@*) tmp2);
14871487
if (status != 0) break;
14881488

1489-
if (multi_iter_masked_next(mit))
1489+
if (multi_iter_masked_next(&mit))
14901490
break;
14911491
}
14921492

14931493

1494-
multi_iter_masked_free(mit);
1494+
multi_iter_masked_free(&mit);
14951495
mkl_free(mask);
14961496

14971497
if (status != 0) goto failed;
@@ -1854,7 +1854,7 @@ int
18541854

18551855
/* copy conjugate even harmonics */
18561856
{
1857-
multi_iter_t *mit;
1857+
multi_iter_t mit;
18581858
npy_intp *half_shape;
18591859
npy_intp n_last, nh_last;
18601860
int i, last_idx = xout_rank - 1;
@@ -1866,7 +1866,7 @@ int
18661866
nh_last = (n_last/2) + 1;
18671867
half_shape[last_idx] = (n_last > 2) ? n_last - nh_last: 0;
18681868

1869-
mit = multi_iter_new(half_shape, xout_rank);
1869+
multi_iter_new(&mit, half_shape, xout_rank);
18701870

18711871
while(!MultiIter_Done(mit)) {
18721872
char *tmp1, *tmp2;
@@ -1897,11 +1897,14 @@ int
18971897
dest->real = src->real;
18981898
dest->imag = -src->imag;
18991899

1900-
if (multi_iter_next(mit))
1900+
if (multi_iter_next(&mit))
19011901
break;
19021902
}
1903-
}
19041903

1904+
multi_iter_free(&mit);
1905+
}
1906+
1907+
19051908
if (@POST_CONJUGATE@) {
19061909
Py_BEGIN_ALLOW_THREADS
19071910
@vml_conj_func@(xout_size, xout_data, xout_data, VML_HA);

0 commit comments

Comments
 (0)