diff --git a/doc/classes/SignalSmith.xml b/doc/classes/SignalSmith.xml
new file mode 100644
index 0000000000..352f02cfe0
--- /dev/null
+++ b/doc/classes/SignalSmith.xml
@@ -0,0 +1,60 @@
+
+
+
+ Performs time-stretching and pitch-shifting on raw audio buffers using Signalsmith.
+
+
+ SignalSmith is a low-level audio processing utility which wraps the Signalsmith time-stretching library. It operates on raw interleaved floating-point PCM audio buffers and allows independent control of playback tempo and pitch.
+
+
+
+
+
+
+
+
+ Processes a block of interleaved audio samples and returns a new buffer containing the time-stretched and pitch-shifted result.
+ The effective playback speed is determined by the ratio of input samples to output samples, as influenced by the current tempo setting. The input buffer must contain 32-bit floating-point PCM data and be interleaved according to the configured channel count.
+
+
+
+
+
+ Resets the internal processing state.
+ This should be called when restarting playback or discontinuously changing input streams.
+
+
+
+
+
+
+ Sets the number of audio channels.
+ Input and output buffers are expected to be interleaved according to this channel count.
+
+
+
+
+
+
+ Sets the pitch transpose factor.
+ A value of `1.0` leaves pitch unchanged. Values greater than `1.0` raise pitch, while values less than `1.0` lower pitch.
+
+
+
+
+
+
+ Sets the sample rate, in Hz, used by the internal processing engine.
+ Changing the sample rate resets the internal state.
+
+
+
+
+
+
+ Sets the tempo multiplier used during processing.
+ This value influences the ratio between input and output buffer sizes during time-stretching. A value of `1.0` preserves the original tempo, values greater than `1.0` speed up playback, and values less than `1.0` slow it down.
+
+
+
+
diff --git a/modules/signalsmith/SCsub b/modules/signalsmith/SCsub
new file mode 100644
index 0000000000..3de9dedca8
--- /dev/null
+++ b/modules/signalsmith/SCsub
@@ -0,0 +1,33 @@
+#!/usr/bin/env python
+from misc.utility.scons_hints import *
+
+Import("env")
+
+signalsmith_env = env.Clone()
+
+signalsmith_env.Append(CPPPATH=["#thirdparty"])
+
+cxx = env.subst("$CXX")
+is_clang = "clang" in cxx
+is_gcc = ("gcc" in cxx or "g++" in cxx) and not is_clang
+is_msvc = ("cl" in cxx) and not is_clang
+
+if is_gcc:
+ signalsmith_env.Append(CCFLAGS=["-Wno-class-memaccess"])
+ signalsmith_env.Append(CCFLAGS=["-Wno-shadow"])
+
+# silence "hides previous declaration/member" warnings
+if is_msvc:
+ signalsmith_env.Append(
+ CCFLAGS=[
+ "/wd4456", # hides previous local declaration
+ "/wd4458", # hides class member
+ ]
+ )
+
+module_sources = [
+ "register_types.cpp",
+ "signalsmith_module.cpp",
+]
+
+signalsmith_env.add_source_files(env.modules_sources, module_sources)
diff --git a/modules/signalsmith/config.py b/modules/signalsmith/config.py
new file mode 100644
index 0000000000..d22f9454ed
--- /dev/null
+++ b/modules/signalsmith/config.py
@@ -0,0 +1,6 @@
+def can_build(env, platform):
+ return True
+
+
+def configure(env):
+ pass
diff --git a/modules/signalsmith/register_types.cpp b/modules/signalsmith/register_types.cpp
new file mode 100644
index 0000000000..a28b031257
--- /dev/null
+++ b/modules/signalsmith/register_types.cpp
@@ -0,0 +1,49 @@
+/**************************************************************************/
+/* register_types.cpp */
+/**************************************************************************/
+/* This file is part of: */
+/* REDOT ENGINE */
+/* https://redotengine.org */
+/**************************************************************************/
+/* Copyright (c) 2024-present Redot Engine contributors */
+/* (see REDOT_AUTHORS.md) */
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/**************************************************************************/
+
+#include "register_types.h"
+#include "core/object/class_db.h"
+#include "signalsmith_module.h"
+
+void initialize_signalsmith_module(ModuleInitializationLevel p_level) {
+ if (p_level != MODULE_INITIALIZATION_LEVEL_SCENE) {
+ return;
+ }
+
+ ClassDB::register_class();
+}
+
+void uninitialize_signalsmith_module(ModuleInitializationLevel p_level) {
+ if (p_level != MODULE_INITIALIZATION_LEVEL_SCENE) {
+ return;
+ }
+}
diff --git a/modules/signalsmith/register_types.h b/modules/signalsmith/register_types.h
new file mode 100644
index 0000000000..5ead1a2a91
--- /dev/null
+++ b/modules/signalsmith/register_types.h
@@ -0,0 +1,38 @@
+/**************************************************************************/
+/* register_types.h */
+/**************************************************************************/
+/* This file is part of: */
+/* REDOT ENGINE */
+/* https://redotengine.org */
+/**************************************************************************/
+/* Copyright (c) 2024-present Redot Engine contributors */
+/* (see REDOT_AUTHORS.md) */
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/**************************************************************************/
+
+#pragma once
+
+#include "modules/register_module_types.h"
+
+void initialize_signalsmith_module(ModuleInitializationLevel p_level);
+void uninitialize_signalsmith_module(ModuleInitializationLevel p_level);
diff --git a/modules/signalsmith/signalsmith_module.cpp b/modules/signalsmith/signalsmith_module.cpp
new file mode 100644
index 0000000000..3ead1acbe6
--- /dev/null
+++ b/modules/signalsmith/signalsmith_module.cpp
@@ -0,0 +1,173 @@
+/**************************************************************************/
+/* signalsmith_module.cpp */
+/**************************************************************************/
+/* This file is part of: */
+/* REDOT ENGINE */
+/* https://redotengine.org */
+/**************************************************************************/
+/* Copyright (c) 2024-present Redot Engine contributors */
+/* (see REDOT_AUTHORS.md) */
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/**************************************************************************/
+
+#include "signalsmith_module.h"
+
+#include "core/os/memory.h"
+
+#include
+#include
+
+void SignalSmith::_bind_methods() {
+ ClassDB::bind_method(D_METHOD("set_sample_rate", "rate"), &SignalSmith::set_sample_rate);
+ ClassDB::bind_method(D_METHOD("set_channels", "channels"), &SignalSmith::set_channels);
+ ClassDB::bind_method(D_METHOD("set_pitch", "pitch"), &SignalSmith::set_pitch);
+ ClassDB::bind_method(D_METHOD("set_tempo", "tempo"), &SignalSmith::set_tempo);
+ ClassDB::bind_method(D_METHOD("reset"), &SignalSmith::reset);
+ ClassDB::bind_method(D_METHOD("process", "input"), &SignalSmith::process);
+}
+
+SignalSmith::SignalSmith() {
+ stretch.presetDefault(channels, sample_rate);
+}
+
+SignalSmith::~SignalSmith() {}
+
+void SignalSmith::set_sample_rate(int p_rate) {
+ if (p_rate < 1) {
+ return;
+ }
+
+ sample_rate = p_rate;
+ stretch.presetDefault(channels, sample_rate);
+}
+
+void SignalSmith::set_channels(int p_channels) {
+ if (p_channels < 1) {
+ return;
+ }
+
+ channels = p_channels;
+ stretch.presetDefault(channels, sample_rate);
+}
+
+void SignalSmith::set_pitch(float p_pitch) {
+ if (!(p_pitch > 0.0f)) {
+ return;
+ }
+
+ stretch.setTransposeFactor(p_pitch);
+}
+
+void SignalSmith::set_tempo(float p_tempo) {
+ if (!(p_tempo > 0.0f)) {
+ return;
+ }
+
+ tempo = p_tempo;
+}
+
+void SignalSmith::reset() {
+ stretch.reset();
+}
+
+PackedFloat32Array SignalSmith::process(const PackedFloat32Array &input) {
+ PackedFloat32Array output;
+
+ if (channels < 1) {
+ return output;
+ }
+
+ const int total_samples = input.size();
+
+ if (total_samples <= 0) {
+ return output;
+ }
+
+ if (total_samples % channels != 0) {
+ ERR_FAIL_V_MSG(output, "Input array size must be a multiple of channel count.");
+ }
+
+ const int input_frames = total_samples / channels;
+
+ if (input_frames <= 0) {
+ return output;
+ }
+
+ const float tf = (tempo > 0.0f) ? tempo : 1.0f;
+ int output_frames = (int)std::lround((double)input_frames / (double)tf);
+
+ if (output_frames < 0) {
+ output_frames = 0;
+ }
+
+ // Deinterleave
+ std::vector> in_ch;
+ in_ch.resize((size_t)channels);
+
+ for (int c = 0; c < channels; c++) {
+ in_ch[(size_t)c].resize((size_t)input_frames);
+ }
+
+ const float *src = input.ptr();
+
+ for (int i = 0; i < input_frames; i++) {
+ const int base = i * channels;
+
+ for (int c = 0; c < channels; c++) {
+ in_ch[(size_t)c][(size_t)i] = src[base + c];
+ }
+ }
+
+ // Output buffers
+ std::vector> out_ch;
+ out_ch.resize((size_t)channels);
+
+ for (int c = 0; c < channels; c++) {
+ out_ch[(size_t)c].assign((size_t)output_frames, 0.0f);
+ }
+
+ std::vector in_ptrs((size_t)channels, nullptr);
+ std::vector out_ptrs((size_t)channels, nullptr);
+
+ for (int c = 0; c < channels; c++) {
+ in_ptrs[(size_t)c] = in_ch[(size_t)c].data();
+ out_ptrs[(size_t)c] = out_ch[(size_t)c].data();
+ }
+
+ // Process: (inputs, inputSamples, outputs, outputSamples)
+ stretch.process(in_ptrs.data(), input_frames, out_ptrs.data(), output_frames);
+
+ // Interleave
+ output.resize(output_frames * channels);
+ float *dst = output.ptrw();
+
+ for (int i = 0; i < output_frames; i++) {
+ const int base = i * channels;
+
+ for (int c = 0; c < channels; c++) {
+ dst[base + c] = out_ch[(size_t)c][(size_t)i];
+ }
+ }
+
+ return output;
+}
diff --git a/modules/signalsmith/signalsmith_module.h b/modules/signalsmith/signalsmith_module.h
new file mode 100644
index 0000000000..e60453dee8
--- /dev/null
+++ b/modules/signalsmith/signalsmith_module.h
@@ -0,0 +1,63 @@
+/**************************************************************************/
+/* signalsmith_module.h */
+/**************************************************************************/
+/* This file is part of: */
+/* REDOT ENGINE */
+/* https://redotengine.org */
+/**************************************************************************/
+/* Copyright (c) 2024-present Redot Engine contributors */
+/* (see REDOT_AUTHORS.md) */
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/**************************************************************************/
+
+#pragma once
+
+#include "core/object/class_db.h"
+#include "core/object/ref_counted.h"
+#include "signalsmith-stretch/signalsmith-stretch.h"
+#include
+
+class SignalSmith : public RefCounted {
+ GDCLASS(SignalSmith, RefCounted);
+
+private:
+ signalsmith::stretch::SignalsmithStretch stretch;
+ int sample_rate = 44100;
+ int channels = 2;
+ float tempo = 1.0f;
+
+protected:
+ static void _bind_methods();
+
+public:
+ SignalSmith();
+ ~SignalSmith();
+
+ void set_sample_rate(int p_rate);
+ void set_channels(int p_channels);
+ void set_pitch(float p_pitch);
+ void set_tempo(float p_tempo);
+ void reset();
+
+ PackedFloat32Array process(const PackedFloat32Array &input);
+};
diff --git a/thirdparty/signalsmith-linear/.gitignore b/thirdparty/signalsmith-linear/.gitignore
new file mode 100644
index 0000000000..ee8cad017e
--- /dev/null
+++ b/thirdparty/signalsmith-linear/.gitignore
@@ -0,0 +1,3 @@
+.DS_Store
+tests/others/dsp
+out/
diff --git a/thirdparty/signalsmith-linear/LICENSE.txt b/thirdparty/signalsmith-linear/LICENSE.txt
new file mode 100644
index 0000000000..c2569820cd
--- /dev/null
+++ b/thirdparty/signalsmith-linear/LICENSE.txt
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 Signalsmith Audio
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/thirdparty/signalsmith-linear/README.md b/thirdparty/signalsmith-linear/README.md
new file mode 100644
index 0000000000..177b7c0227
--- /dev/null
+++ b/thirdparty/signalsmith-linear/README.md
@@ -0,0 +1,99 @@
+# Signalsmith Linear
+
+Header-only C++11 wrappers for common things needed by our audio libraries. It's designed for internal use, so [caveat developor](https://en.wikipedia.org/wiki/Caveat_emptor). The goal is to wrap around Accelerate/IPP when available, but still work without it.
+
+Everything is in `signalsmith::linear::` namespace.
+
+## FFTs
+
+```cpp
+#include "signalsmith-linear/fft.h"
+```
+
+This provides real and complex FFTs. They all have `.resize(size_t)`, and `.fft()`/`.ifft()` methods which can take either `std::complex<>` pointers, or a real/imaginary pair ("split-complex" form) for each complex argument.
+
+The `Pow2FFT<>` and `Pow2RealFFT<>` templates wrap around fast implementations where available.
+
+The main `FFT<>`, `RealFFT<>` and `ModifiedRealFFT<>` templates wrap around the `Pow2<>` implementations, to add support for multiples of 3 and 5. They provide a static `.fastSizeAbove()` to find the next biggest size.
+
+### Chunked computation
+
+The main FFT classes also provide `.steps()` method, and an optional `size_t` first argument to the `.fft()`/`.ifft()` methods, so that computation can be divided up into chunks.
+
+The computation time for the chunks is not exactly equal, but when you're doing large FFTs periodically (instead of smaller ones regularly) it can help distribute the computation out, without using threads.
+
+## STFTs
+
+```cpp
+#include "signalsmith-linear/stft.h"
+```
+
+This provides `DynamicSTFT<>` template, which is configured for any block length (zero padding to a fast size), and a (default!) interval between blocks. It can have a different number of input/output channels, using `.spectrum(c)` to access both.
+
+### Input and output
+
+The `.synthesise()` method moves the output time forward, and adds in output block(s) generated from the spectrum. The result can then be queried using `.readOutput()`.
+
+Input is passed in using `.writeInput()`, but you must move the input time forward using `.moveInput()` before calling `.analyse()`. By default, this will analyse the most recent block of input, but if you passed in a non-zero `extraInputHistory` when configuring, you can analyse input from some time in the past.
+
+### Window shape and block interval
+
+It has separate analysis/synthesis windows, which can be changed on-the-fly. Both windows have an "offset" marking the centre of the window, to support asymmetrical setups (for reduced latency).
+
+The "synthesis interval" (optionally passed to `.synthesise()`) can also change arbitrarily. The output is normalised according to these gaps (and the analysis/synthesis windows) such that regardless of spacing or window shape, it would perfectly reconstruct the input.
+
+The analysis should remain constant between analysis and synthesis, since it's used for normalising the output properly. If you're synthesising signals from scratch (which have no inherent input windowing), then you should reflect that by setting the analysis window to all 1s.
+
+### Chunked computation
+
+Similar to the FFTs' `.steps()` methods, `DynamicSTFT` has `.analyseSteps()`/`.synthesiseSteps()` methods, and `.analyseStep()`/`.synthesiseStep()` to help you spread the computation out over time.
+
+The window shapes/offsets, input/output and synthesis interval must stay the same until the analysis/synthesis is finished.
+
+## Expressions
+
+```cpp
+#include "signalsmith-linear/linear.h"
+```
+
+The main `Linear` class provides expression templates, which wrap around three types of pointer: real, complex, and split-complex. You can wrap these pointers (or `std::vector`s) into `Expression`s using the `()` operator:
+
+```cpp
+Linear linear;
+
+float *a, *b;
+std::complex *c;
+size_t size;
+
+// Once the above variables are set up:
+linear(a, size) = linear(b) + linear(c).abs();
+
+// Pass in two real pointers to make a split-complex expression (which is often a bit faster).
+linear(c, size) = linear(a, b).conj();
+```
+
+Implementations may use temporary internal storage. This means it's not thread-safe, it should be a member of your processing class, and you should also call `.reserve???()` during configuration, with the longest vector length you expect to use.
+
+## Building
+
+### CMake
+
+If you're using CMake, include this directory. It will add a `signalsmith-linear` target which doesn't build anything, but linking to this "library" will add the include path.
+
+By default, it will link to Accelerate on Mac. If you don't want this, set the CMake option `SIGNALSMITH_USE_ACCELERATE` to `OFF`.
+
+The similar option `SIGNALSMITH_USE_IPP` if `OFF` by default. When enabled, it will link to IPP and set the `SIGNALSMITH_USE_IPP` preprocessor definition.
+
+`SIGNALSMITH_USE_PFFFT` and `SIGNALSMITH_USE_PFFFT_DOUBLE` don't link to anything, because there are multiple versions.
+
+### Other
+
+To use Accelerate on Mac, link the framework and define `SIGNALSMITH_USE_ACCELERATE`:
+
+```
+g++ -framework Accelerate -DSIGNALSMITH_USE_ACCELERATE
+```
+
+Similarly, define `SIGNALSMITH_USE_IPP` (and link to `IPP::ippcore` and `IPP::ipps`) for IPP.
+
+Not all PFFFT versions support double-precision, so there are separate `SIGNALSMITH_USE_PFFFT`/`SIGNALSMITH_USE_PFFFT_DOUBLE` flags.
diff --git a/thirdparty/signalsmith-linear/SUPPORT.txt b/thirdparty/signalsmith-linear/SUPPORT.txt
new file mode 100644
index 0000000000..96e02c2956
--- /dev/null
+++ b/thirdparty/signalsmith-linear/SUPPORT.txt
@@ -0,0 +1,10 @@
+# https://github.com/geraintluff/SUPPORT.txt
+# These people have committed to maintain the project until at least these dates
+
+2027-01-01 Geraint Luff
+
+FFTs and STFTs:
+2027-01-01 Geraint Luff
+
+Linear:
+This is too experimental, you're on your own.
diff --git a/thirdparty/signalsmith-linear/fft.h b/thirdparty/signalsmith-linear/fft.h
new file mode 100644
index 0000000000..80b559bab8
--- /dev/null
+++ b/thirdparty/signalsmith-linear/fft.h
@@ -0,0 +1,1380 @@
+#ifndef SIGNALSMITH_AUDIO_LINEAR_FFT_H
+#define SIGNALSMITH_AUDIO_LINEAR_FFT_H
+
+#include
+#include
+#include
+
+#if defined(__FAST_MATH__) && (__apple_build_version__ >= 16000000) && (__apple_build_version__ <= 16000099) && !defined(SIGNALSMITH_IGNORE_BROKEN_APPLECLANG)
+# error Apple Clang 16.0.0 generates incorrect SIMD for ARM. If you HAVE to use this version of Clang, turn off -ffast-math.
+#endif
+
+#ifndef M_PI
+# define M_PI 3.14159265358979323846
+#endif
+
+namespace signalsmith { namespace linear {
+
+namespace _impl {
+ // Helpers for complex arithmetic, ignoring the NaN/Inf edge-cases you get without `-ffast-math`
+ template
+ void complexMul(std::complex *a, const std::complex *b, const std::complex *c, size_t size) {
+ for (size_t i = 0; i < size; ++i) {
+ auto bi = b[i], ci = c[i];
+ a[i] = {bi.real()*ci.real() - bi.imag()*ci.imag(), bi.imag()*ci.real() + bi.real()*ci.imag()};
+ }
+ }
+ template
+ void complexMulConj(std::complex *a, const std::complex *b, const std::complex *c, size_t size) {
+ for (size_t i = 0; i < size; ++i) {
+ auto bi = b[i], ci = c[i];
+ a[i] = {bi.real()*ci.real() + bi.imag()*ci.imag(), bi.imag()*ci.real() - bi.real()*ci.imag()};
+ }
+ }
+ template
+ void complexMul(V *ar, V *ai, const V *br, const V *bi, const V *cr, const V *ci, size_t size) {
+ for (size_t i = 0; i < size; ++i) {
+ V rr = br[i]*cr[i] - bi[i]*ci[i];
+ V ri = br[i]*ci[i] + bi[i]*cr[i];
+ ar[i] = rr;
+ ai[i] = ri;
+ }
+ }
+ template
+ void complexMulConj(V *ar, V *ai, const V *br, const V *bi, const V *cr, const V *ci, size_t size) {
+ for (size_t i = 0; i < size; ++i) {
+ V rr = cr[i]*br[i] + ci[i]*bi[i];
+ V ri = cr[i]*bi[i] - ci[i]*br[i];
+ ar[i] = rr;
+ ai[i] = ri;
+ }
+ }
+
+ // Input: aStride elements next to each other -> output with bStride
+ template
+ void interleaveCopy(const V *a, V *b, size_t bStride) {
+ for (size_t bi = 0; bi < bStride; ++bi) {
+ const V *offsetA = a + bi*aStride;
+ V *offsetB = b + bi;
+ for (size_t ai = 0; ai < aStride; ++ai) {
+ offsetB[ai*bStride] = offsetA[ai];
+ }
+ }
+ }
+ template
+ void interleaveCopy(const V *a, V *b, size_t aStride, size_t bStride) {
+ for (size_t bi = 0; bi < bStride; ++bi) {
+ const V *offsetA = a + bi*aStride;
+ V *offsetB = b + bi;
+ for (size_t ai = 0; ai < aStride; ++ai) {
+ offsetB[ai*bStride] = offsetA[ai];
+ }
+ }
+ }
+ template
+ void interleaveCopy(const V *aReal, const V *aImag, V *bReal, V *bImag, size_t bStride) {
+ for (size_t bi = 0; bi < bStride; ++bi) {
+ const V *offsetAr = aReal + bi*aStride;
+ const V *offsetAi = aImag + bi*aStride;
+ V *offsetBr = bReal + bi;
+ V *offsetBi = bImag + bi;
+ for (size_t ai = 0; ai < aStride; ++ai) {
+ offsetBr[ai*bStride] = offsetAr[ai];
+ offsetBi[ai*bStride] = offsetAi[ai];
+ }
+ }
+ }
+ template
+ void interleaveCopy(const V *aReal, const V *aImag, V *bReal, V *bImag, size_t aStride, size_t bStride) {
+ for (size_t bi = 0; bi < bStride; ++bi) {
+ const V *offsetAr = aReal + bi*aStride;
+ const V *offsetAi = aImag + bi*aStride;
+ V *offsetBr = bReal + bi;
+ V *offsetBi = bImag + bi;
+ for (size_t ai = 0; ai < aStride; ++ai) {
+ offsetBr[ai*bStride] = offsetAr[ai];
+ offsetBi[ai*bStride] = offsetAi[ai];
+ }
+ }
+ }
+}
+
+/// Fairly simple and very portable power-of-2 FFT
+template
+struct SimpleFFT {
+ using Complex = std::complex;
+
+ SimpleFFT(size_t size=0) {
+ resize(size);
+ }
+
+ void resize(size_t size) {
+ twiddles.resize(size*3/4);
+ for (size_t i = 0; i < size*3/4; ++i) {
+ Sample twiddlePhase = -2*M_PI*i/size;
+ twiddles[i] = std::polar(Sample(1), twiddlePhase);
+ }
+ working.resize(size);
+ }
+
+ void fft(const Complex *time, Complex *freq) {
+ size_t size = working.size();
+ if (size <= 1) {
+ *freq = *time;
+ return;
+ }
+ fftPass(size, 1, time, freq, working.data());
+ }
+
+ void ifft(const Complex *freq, Complex *time) {
+ size_t size = working.size();
+ if (size <= 1) {
+ *time = *freq;
+ return;
+ }
+ fftPass(size, 1, freq, time, working.data());
+ }
+
+ void fft(const Sample *inR, const Sample *inI, Sample *outR, Sample *outI) {
+ size_t size = working.size();
+ if (size <= 1) {
+ *outR = *inR;
+ *outI = *inI;
+ return;
+ }
+ Sample *workingR = (Sample *)working.data(), *workingI = workingR + size;
+ fftPass(size, 1, inR, inI, outR, outI, workingR, workingI);
+ }
+ void ifft(const Sample *inR, const Sample *inI, Sample *outR, Sample *outI) {
+ size_t size = working.size();
+ if (size <= 1) {
+ *outR = *inR;
+ *outI = *inI;
+ return;
+ }
+ Sample *workingR = (Sample *)working.data(), *workingI = workingR + size;
+ fftPass(size, 1, inR, inI, outR, outI, workingR, workingI);
+ }
+private:
+ std::vector twiddles;
+ std::vector working;
+
+ template
+ static Complex mul(const Complex &a, const Complex &b) {
+ return conjB ? Complex{
+ a.real()*b.real() + a.imag()*b.imag(),
+ a.imag()*b.real() - a.real()*b.imag()
+ } : Complex{
+ a.real()*b.real() - a.imag()*b.imag(),
+ a.imag()*b.real() + a.real()*b.imag()
+ };
+ }
+
+ // Calculate a [size]-point FFT, where each element is a block of [stride] values
+ template
+ void fftPass(size_t size, size_t stride, const Complex *input, Complex *output, Complex *working) {
+ if (size/4 > 1) {
+ // Calculate four quarter-size FFTs
+ fftPass(size/4, stride*4, input, working, output);
+ combine4(size, stride, working, output);
+ } else if (size == 4) {
+ combine4(4, stride, input, output);
+ } else {
+ // 2-point FFT
+ for (size_t s = 0; s < stride; ++s) {
+ Complex a = input[s];
+ Complex b = input[s + stride];
+ output[s] = a + b;
+ output[s + stride] = a - b;
+ }
+ }
+ }
+
+ // Combine interleaved results into a single spectrum
+ template
+ void combine4(size_t size, size_t stride, const Complex *input, Complex *output) const {
+ auto twiddleStep = working.size()/size;
+ for (size_t i = 0; i < size/4; ++i) {
+ Complex twiddleB = twiddles[i*twiddleStep];
+ Complex twiddleC = twiddles[i*2*twiddleStep];
+ Complex twiddleD = twiddles[i*3*twiddleStep];
+
+ const Complex *inputA = input + 4*i*stride;
+ const Complex *inputB = input + (4*i + 1)*stride;
+ const Complex *inputC = input + (4*i + 2)*stride;
+ const Complex *inputD = input + (4*i + 3)*stride;
+ Complex *outputA = output + i*stride;
+ Complex *outputB = output + (i + size/4)*stride;
+ Complex *outputC = output + (i + size/4*2)*stride;
+ Complex *outputD = output + (i + size/4*3)*stride;
+ for (size_t s = 0; s < stride; ++s) {
+ Complex a = inputA[s];
+ Complex b = mul(inputB[s], twiddleB);
+ Complex c = mul(inputC[s], twiddleC);
+ Complex d = mul(inputD[s], twiddleD);
+ Complex ac0 = a + c, ac1 = a - c;
+ Complex bd0 = b + d, bd1 = inverse ? (b - d) : (d - b);
+ Complex bd1i = {-bd1.imag(), bd1.real()};
+ outputA[s] = ac0 + bd0;
+ outputB[s] = ac1 + bd1i;
+ outputC[s] = ac0 - bd0;
+ outputD[s] = ac1 - bd1i;
+ }
+ }
+ }
+
+ // The same thing, but translated for split-complex input/output
+ template
+ void fftPass(size_t size, size_t stride, const Sample *inputR, const Sample *inputI, Sample *outputR, Sample *outputI, Sample *workingR, Sample *workingI) const {
+ if (size/4 > 1) {
+ // Calculate four quarter-size FFTs
+ fftPass(size/4, stride*4, inputR, inputI, workingR, workingI, outputR, outputI);
+ combine4(size, stride, workingR, workingI, outputR, outputI);
+ } else if (size == 4) {
+ combine4(4, stride, inputR, inputI, outputR, outputI);
+ } else {
+ // 2-point FFT
+ for (size_t s = 0; s < stride; ++s) {
+ Sample ar = inputR[s], ai = inputI[s];
+ Sample br = inputR[s + stride], bi = inputI[s + stride];
+ outputR[s] = ar + br;
+ outputI[s] = ai + bi;
+ outputR[s + stride] = ar - br;
+ outputI[s + stride] = ai - bi;
+ }
+ }
+ }
+
+ // Combine interleaved results into a single spectrum
+ template
+ void combine4(size_t size, size_t stride, const Sample *inputR, const Sample *inputI, Sample *outputR, Sample *outputI) const {
+ auto twiddleStep = working.size()/size;
+ for (size_t i = 0; i < size/4; ++i) {
+ Complex twiddleB = twiddles[i*twiddleStep];
+ Complex twiddleC = twiddles[i*2*twiddleStep];
+ Complex twiddleD = twiddles[i*3*twiddleStep];
+
+ const Sample *inputAr = inputR + 4*i*stride, *inputAi = inputI + 4*i*stride;
+ const Sample *inputBr = inputR + (4*i + 1)*stride, *inputBi = inputI + (4*i + 1)*stride;
+ const Sample *inputCr = inputR + (4*i + 2)*stride, *inputCi = inputI + (4*i + 2)*stride;
+ const Sample *inputDr = inputR + (4*i + 3)*stride, *inputDi = inputI + (4*i + 3)*stride;
+ Sample *outputAr = outputR + i*stride, *outputAi = outputI + i*stride;
+ Sample *outputBr = outputR + (i + size/4)*stride, *outputBi = outputI + (i + size/4)*stride;
+ Sample *outputCr = outputR + (i + size/4*2)*stride, *outputCi = outputI + (i + size/4*2)*stride;
+ Sample *outputDr = outputR + (i + size/4*3)*stride, *outputDi = outputI + (i + size/4*3)*stride;
+ for (size_t s = 0; s < stride; ++s) {
+ Complex a = {inputAr[s], inputAi[s]};
+ Complex b = mul({inputBr[s], inputBi[s]}, twiddleB);
+ Complex c = mul({inputCr[s], inputCi[s]}, twiddleC);
+ Complex d = mul({inputDr[s], inputDi[s]}, twiddleD);
+ Complex ac0 = a + c, ac1 = a - c;
+ Complex bd0 = b + d, bd1 = inverse ? (b - d) : (d - b);
+ Complex bd1i = {-bd1.imag(), bd1.real()};
+ outputAr[s] = ac0.real() + bd0.real();
+ outputAi[s] = ac0.imag() + bd0.imag();
+ outputBr[s] = ac1.real() + bd1i.real();
+ outputBi[s] = ac1.imag() + bd1i.imag();
+ outputCr[s] = ac0.real() - bd0.real();
+ outputCi[s] = ac0.imag() - bd0.imag();
+ outputDr[s] = ac1.real() - bd1i.real();
+ outputDi[s] = ac1.imag() - bd1i.imag();
+ }
+ }
+ }
+};
+
+/// A power-of-2 only FFT, specialised with platform-specific fast implementations where available
+template
+struct Pow2FFT {
+ static constexpr bool prefersSplit = true; // whether this FFT implementation is faster when given split-complex inputs
+ using Complex = std::complex;
+
+ Pow2FFT(size_t size=0) {
+ resize(size);
+ }
+ // Allow move, but not copy
+ Pow2FFT(const Pow2FFT &other) = delete;
+ Pow2FFT(Pow2FFT &&other) : tmp(std::move(other.tmp)), simpleFFT(std::move(other.simpleFFT)) {}
+
+ void resize(size_t size) {
+ simpleFFT.resize(size);
+ tmp.resize(size);
+ }
+
+ void fft(const Complex *time, Complex *freq) {
+ simpleFFT.fft(time, freq);
+ }
+ void fft(const Sample *inR, const Sample *inI, Sample *outR, Sample *outI) {
+ simpleFFT.fft(inR, inI, outR, outI);
+ }
+
+ void ifft(const Complex *freq, Complex *time) {
+ simpleFFT.ifft(freq, time);
+ }
+ void ifft(const Sample *inR, const Sample *inI, Sample *outR, Sample *outI) {
+ simpleFFT.ifft(inR, inI, outR, outI);
+ }
+
+private:
+ std::vector tmp;
+ SimpleFFT simpleFFT;
+};
+
+/// An FFT which can handle multiples of 3 and 5, and can be computed in chunks
+template
+struct SplitFFT {
+ using Complex = std::complex;
+ static constexpr bool prefersSplit = Pow2FFT::prefersSplit;
+
+ static constexpr size_t maxSplit = splitComputation ? 4 : 1;
+ static constexpr size_t minInnerSize = 32;
+
+ static size_t fastSizeAbove(size_t size) {
+ size_t pow2 = 1;
+ while (pow2 < 16 && pow2 < size) pow2 *= 2;
+ while (pow2*8 < size) pow2 *= 2;
+ size_t multiple = (size + pow2 - 1)/pow2; // will be 1-8
+ if (multiple == 7) ++multiple;
+ return multiple*pow2;
+ }
+
+ SplitFFT(size_t size=0) {
+ resize(size);
+ }
+
+ void resize(size_t size) {
+ innerSize = 1;
+ outerSize = size;
+
+ dftTmp.resize(0);
+ dftTwists.resize(0);
+ plan.resize(0);
+ if (!size) return;
+
+ // Inner size = largest power of 2 such that either the inner size >= minInnerSize, or we have the target number of splits
+ while (!(outerSize&1) && (outerSize > maxSplit || innerSize < minInnerSize)) {
+ innerSize *= 2;
+ outerSize /= 2;
+ }
+ tmpFreq.resize(size);
+ innerFFT.resize(innerSize);
+
+ outerTwiddles.resize(innerSize*(outerSize - 1));
+ outerTwiddlesR.resize(innerSize*(outerSize - 1));
+ outerTwiddlesI.resize(innerSize*(outerSize - 1));
+ for (size_t i = 0; i < innerSize; ++i) {
+ for (size_t s = 1; s < outerSize; ++s) {
+ Sample twiddlePhase = Sample(-2*M_PI*i/innerSize*s/outerSize);
+ outerTwiddles[i + (s - 1)*innerSize] = std::polar(Sample(1), twiddlePhase);
+ }
+ }
+ for (size_t i = 0; i < outerTwiddles.size(); ++i) {
+ outerTwiddlesR[i] = outerTwiddles[i].real();
+ outerTwiddlesI[i] = outerTwiddles[i].imag();
+ }
+
+ StepType interleaveStep = StepType::interleaveOrderN;
+ StepType finalStep = StepType::finalOrderN;
+ if (outerSize == 2) {
+ interleaveStep = StepType::interleaveOrder2;
+ finalStep = StepType::finalOrder2;
+ }
+ if (outerSize == 3) {
+ interleaveStep = StepType::interleaveOrder3;
+ finalStep = StepType::finalOrder3;
+ }
+ if (outerSize == 4) {
+ interleaveStep = StepType::interleaveOrder4;
+ finalStep = StepType::finalOrder4;
+ }
+ if (outerSize == 5) {
+ interleaveStep = StepType::interleaveOrder5;
+ finalStep = StepType::finalOrder5;
+ }
+
+ if (outerSize <= 1) {
+ if (size > 0) plan.push_back(Step{StepType::passthrough, 0});
+ } else {
+ plan.push_back({interleaveStep, 0});
+ plan.push_back({StepType::firstFFT, 0});
+ for (size_t s = 1; s < outerSize; ++s) {
+ plan.push_back({StepType::middleFFT, s*innerSize});
+ }
+ plan.push_back({StepType::twiddles, 0});
+ plan.push_back({finalStep, 0});
+
+ if (finalStep == StepType::finalOrderN) {
+ dftTmp.resize(outerSize);
+ dftTwists.resize(outerSize);
+ for (size_t s = 0; s < outerSize; ++s) {
+ Sample dftPhase = Sample(-2*M_PI*s/outerSize);
+ dftTwists[s] = std::polar(Sample(1), dftPhase);
+ }
+ }
+ }
+ }
+
+ size_t size() const {
+ return innerSize*outerSize;
+ }
+ size_t steps() const {
+ return plan.size();
+ }
+
+ void fft(const Complex *time, Complex *freq) {
+ for (auto &step : plan) {
+ fftStep(step, time, freq);
+ }
+ }
+ void fft(size_t step, const Complex *time, Complex *freq) {
+ fftStep(plan[step], time, freq);
+ }
+ void fft(const Sample *inR, const Sample *inI, Sample *outR, Sample *outI) {
+ for (auto &step : plan) {
+ fftStep(step, inR, inI, outR, outI);
+ }
+ }
+ void fft(size_t step, const Sample *inR, const Sample *inI, Sample *outR, Sample *outI) {
+ fftStep(plan[step], inR, inI, outR, outI);
+ }
+
+ void ifft(const Complex *freq, Complex *time) {
+ for (auto &step : plan) {
+ fftStep(step, freq, time);
+ }
+ }
+ void ifft(size_t step, const Complex *freq, Complex *time) {
+ fftStep(plan[step], freq, time);
+ }
+ void ifft(const Sample *inR, const Sample *inI, Sample *outR, Sample *outI) {
+ for (auto &step : plan) {
+ fftStep(step, inR, inI, outR, outI);
+ }
+ }
+ void ifft(size_t step, const Sample *inR, const Sample *inI, Sample *outR, Sample *outI) {
+ fftStep(plan[step], inR, inI, outR, outI);
+ }
+private:
+ using InnerFFT = Pow2FFT;
+ InnerFFT innerFFT;
+
+ size_t innerSize, outerSize;
+ std::vector tmpFreq;
+ std::vector outerTwiddles;
+ std::vector outerTwiddlesR, outerTwiddlesI;
+ std::vector dftTwists, dftTmp;
+
+ enum class StepType {
+ passthrough,
+ interleaveOrder2, interleaveOrder3, interleaveOrder4, interleaveOrder5, interleaveOrderN,
+ firstFFT, middleFFT,
+ twiddles,
+ finalOrder2, finalOrder3, finalOrder4, finalOrder5, finalOrderN
+ };
+ struct Step {
+ StepType type;
+ size_t offset;
+ };
+ std::vector plan;
+
+ template
+ void fftStep(Step step, const Complex *time, Complex *freq) {
+ switch (step.type) {
+ case (StepType::passthrough): {
+ if (inverse) {
+ innerFFT.ifft(time, freq);
+ } else {
+ innerFFT.fft(time, freq);
+ }
+ break;
+ }
+ case (StepType::interleaveOrder2): {
+ _impl::interleaveCopy<2>(time, tmpFreq.data(), innerSize);
+ break;
+ }
+ case (StepType::interleaveOrder3): {
+ _impl::interleaveCopy<3>(time, tmpFreq.data(), innerSize);
+ break;
+ }
+ case (StepType::interleaveOrder4): {
+ _impl::interleaveCopy<4>(time, tmpFreq.data(), innerSize);
+ break;
+ }
+ case (StepType::interleaveOrder5): {
+ _impl::interleaveCopy<5>(time, tmpFreq.data(), innerSize);
+ break;
+ }
+ case (StepType::interleaveOrderN): {
+ _impl::interleaveCopy(time, tmpFreq.data(), outerSize, innerSize);
+ break;
+ }
+ case (StepType::firstFFT): {
+ if (inverse) {
+ innerFFT.ifft(tmpFreq.data(), freq);
+ } else {
+ innerFFT.fft(tmpFreq.data(), freq);
+ }
+ break;
+ }
+ case (StepType::middleFFT): {
+ Complex *offsetOut = freq + step.offset;
+ if (inverse) {
+ innerFFT.ifft(tmpFreq.data() + step.offset, offsetOut);
+ } else {
+ innerFFT.fft(tmpFreq.data() + step.offset, offsetOut);
+ }
+ break;
+ }
+ case (StepType::twiddles): {
+ if (inverse) {
+ _impl::complexMulConj(freq + innerSize, freq + innerSize, outerTwiddles.data(), innerSize*(outerSize - 1));
+ } else {
+ _impl::complexMul(freq + innerSize, freq + innerSize, outerTwiddles.data(), innerSize*(outerSize - 1));
+ }
+ break;
+ }
+ case StepType::finalOrder2:
+ finalPass2(freq);
+ break;
+ case StepType::finalOrder3:
+ finalPass3(freq);
+ break;
+ case StepType::finalOrder4:
+ finalPass4(freq);
+ break;
+ case StepType::finalOrder5:
+ finalPass5(freq);
+ break;
+ case StepType::finalOrderN:
+ finalPassN(freq);
+ break;
+ }
+ }
+ template
+ void fftStep(Step step, const Sample *inR, const Sample *inI, Sample *outR, Sample *outI) {
+ Sample *tmpR = (Sample *)tmpFreq.data(), *tmpI = tmpR + tmpFreq.size();
+ switch (step.type) {
+ case (StepType::passthrough): {
+ if (inverse) {
+ innerFFT.ifft(inR, inI, outR, outI);
+ } else {
+ innerFFT.fft(inR, inI, outR, outI);
+ }
+ break;
+ }
+ case (StepType::interleaveOrder2): {
+ _impl::interleaveCopy<2>(inR, tmpR, innerSize);
+ _impl::interleaveCopy<2>(inI, tmpI, innerSize);
+ break;
+ }
+ case (StepType::interleaveOrder3): {
+ _impl::interleaveCopy<3>(inR, tmpR, innerSize);
+ _impl::interleaveCopy<3>(inI, tmpI, innerSize);
+ break;
+ }
+ case (StepType::interleaveOrder4): {
+ _impl::interleaveCopy<4>(inR, tmpR, innerSize);
+ _impl::interleaveCopy<4>(inI, tmpI, innerSize);
+ break;
+ }
+ case (StepType::interleaveOrder5): {
+ _impl::interleaveCopy<5>(inR, tmpR, innerSize);
+ _impl::interleaveCopy<5>(inI, tmpI, innerSize);
+ break;
+ }
+ case (StepType::interleaveOrderN): {
+ _impl::interleaveCopy(inR, inI, tmpR, tmpI, outerSize, innerSize);
+ break;
+ }
+ case (StepType::firstFFT): {
+ if (inverse) {
+ innerFFT.ifft(tmpR, tmpI, outR, outI);
+ } else {
+ innerFFT.fft(tmpR, tmpI, outR, outI);
+ }
+ break;
+ }
+ case (StepType::middleFFT): {
+ size_t offset = step.offset;
+ Sample *offsetOutR = outR + offset;
+ Sample *offsetOutI = outI + offset;
+ if (inverse) {
+ innerFFT.ifft(tmpR + offset, tmpI + offset, offsetOutR, offsetOutI);
+ } else {
+ innerFFT.fft(tmpR + offset, tmpI + offset, offsetOutR, offsetOutI);
+ }
+ break;
+ }
+ case(StepType::twiddles): {
+ auto *twiddlesR = outerTwiddlesR.data();
+ auto *twiddlesI = outerTwiddlesI.data();
+ if (inverse) {
+ _impl::complexMulConj(outR + innerSize, outI + innerSize, outR + innerSize, outI + innerSize, twiddlesR, twiddlesI, innerSize*(outerSize - 1));
+ } else {
+ _impl::complexMul(outR + innerSize, outI + innerSize, outR + innerSize, outI + innerSize, twiddlesR, twiddlesI, innerSize*(outerSize - 1));
+ }
+ break;
+ }
+ case StepType::finalOrder2:
+ finalPass2(outR, outI);
+ break;
+ case StepType::finalOrder3:
+ finalPass3(outR, outI);
+ break;
+ case StepType::finalOrder4:
+ finalPass4(outR, outI);
+ break;
+ case StepType::finalOrder5:
+ finalPass5(outR, outI);
+ break;
+ case StepType::finalOrderN:
+ finalPassN(outR, outI);
+ break;
+ }
+ }
+
+ void finalPass2(Complex *f0) {
+ auto *f1 = f0 + innerSize;
+ for (size_t i = 0; i < innerSize; ++i) {
+ Complex a = f0[i], b = f1[i];
+ f0[i] = a + b;
+ f1[i] = a - b;
+ }
+ }
+ void finalPass2(Sample *f0r, Sample *f0i) {
+ auto *f1r = f0r + innerSize;
+ auto *f1i = f0i + innerSize;
+ for (size_t i = 0; i < innerSize; ++i) {
+ Sample ar = f0r[i], ai = f0i[i];
+ Sample br = f1r[i], bi = f1i[i];
+ f0r[i] = ar + br;
+ f0i[i] = ai + bi;
+ f1r[i] = ar - br;
+ f1i[i] = ai - bi;
+ }
+ }
+ template
+ void finalPass3(Complex *f0) {
+ auto *f1 = f0 + innerSize;
+ auto *f2 = f0 + innerSize*2;
+ const Complex tw1{Sample(-0.5), Sample(-std::sqrt(0.75)*(inverse ? -1 : 1))};
+ for (size_t i = 0; i < innerSize; ++i) {
+ Complex a = f0[i], b = f1[i], c = f2[i];
+ Complex bc0 = b + c, bc1 = b - c;
+ f0[i] = a + bc0;
+ f1[i] = {
+ a.real() + bc0.real()*tw1.real() - bc1.imag()*tw1.imag(),
+ a.imag() + bc0.imag()*tw1.real() + bc1.real()*tw1.imag()
+ };
+ f2[i] = {
+ a.real() + bc0.real()*tw1.real() + bc1.imag()*tw1.imag(),
+ a.imag() + bc0.imag()*tw1.real() - bc1.real()*tw1.imag()
+ };
+ }
+ }
+ template
+ void finalPass3(Sample *f0r, Sample *f0i) {
+ auto *f1r = f0r + innerSize;
+ auto *f1i = f0i + innerSize;
+ auto *f2r = f0r + innerSize*2;
+ auto *f2i = f0i + innerSize*2;
+ const Sample tw1r = -0.5, tw1i = -std::sqrt(0.75)*(inverse ? -1 : 1);
+
+ for (size_t i = 0; i < innerSize; ++i) {
+ Sample ar = f0r[i], ai = f0i[i], br = f1r[i], bi = f1i[i], cr = f2r[i], ci = f2i[i];
+
+ f0r[i] = ar + br + cr;
+ f0i[i] = ai + bi + ci;
+ f1r[i] = ar + br*tw1r - bi*tw1i + cr*tw1r + ci*tw1i;
+ f1i[i] = ai + bi*tw1r + br*tw1i - cr*tw1i + ci*tw1r;
+ f2r[i] = ar + br*tw1r + bi*tw1i + cr*tw1r - ci*tw1i;
+ f2i[i] = ai + bi*tw1r - br*tw1i + cr*tw1i + ci*tw1r;
+ }
+ }
+ template
+ void finalPass4(Complex *f0) {
+ auto *f1 = f0 + innerSize;
+ auto *f2 = f0 + innerSize*2;
+ auto *f3 = f0 + innerSize*3;
+ for (size_t i = 0; i < innerSize; ++i) {
+ Complex a = f0[i], b = f1[i], c = f2[i], d = f3[i];
+
+ Complex ac0 = a + c, ac1 = a - c;
+ Complex bd0 = b + d, bd1 = inverse ? (b - d) : (d - b);
+ Complex bd1i = {-bd1.imag(), bd1.real()};
+ f0[i] = ac0 + bd0;
+ f1[i] = ac1 + bd1i;
+ f2[i] = ac0 - bd0;
+ f3[i] = ac1 - bd1i;
+ }
+ }
+ template
+ void finalPass4(Sample *f0r, Sample *f0i) {
+ auto *f1r = f0r + innerSize;
+ auto *f1i = f0i + innerSize;
+ auto *f2r = f0r + innerSize*2;
+ auto *f2i = f0i + innerSize*2;
+ auto *f3r = f0r + innerSize*3;
+ auto *f3i = f0i + innerSize*3;
+ for (size_t i = 0; i < innerSize; ++i) {
+ Sample ar = f0r[i], ai = f0i[i], br = f1r[i], bi = f1i[i], cr = f2r[i], ci = f2i[i], dr = f3r[i], di = f3i[i];
+
+ Sample ac0r = ar + cr, ac0i = ai + ci;
+ Sample ac1r = ar - cr, ac1i = ai - ci;
+ Sample bd0r = br + dr, bd0i = bi + di;
+ Sample bd1r = br - dr, bd1i = bi - di;
+
+ f0r[i] = ac0r + bd0r;
+ f0i[i] = ac0i + bd0i;
+ f1r[i] = inverse ? (ac1r - bd1i) : (ac1r + bd1i);
+ f1i[i] = inverse ? (ac1i + bd1r) : (ac1i - bd1r);
+ f2r[i] = ac0r - bd0r;
+ f2i[i] = ac0i - bd0i;
+ f3r[i] = inverse ? (ac1r + bd1i) : (ac1r - bd1i);
+ f3i[i] = inverse ? (ac1i - bd1r) : (ac1i + bd1r);
+ }
+ }
+ template
+ void finalPass5(Complex *f0) {
+ auto *f1 = f0 + innerSize;
+ auto *f2 = f0 + innerSize*2;
+ auto *f3 = f0 + innerSize*3;
+ auto *f4 = f0 + innerSize*4;
+ const Sample tw1r = 0.30901699437494745;
+ const Sample tw1i = -0.9510565162951535*(inverse ? -1 : 1);
+ const Sample tw2r = -0.8090169943749473;
+ const Sample tw2i = -0.5877852522924732*(inverse ? -1 : 1);
+ for (size_t i = 0; i < innerSize; ++i) {
+ Complex a = f0[i], b = f1[i], c = f2[i], d = f3[i], e = f4[i];
+
+ Complex be0 = b + e, be1 = {e.imag() - b.imag(), b.real() - e.real()}; // (b - e)*i
+ Complex cd0 = c + d, cd1 = {d.imag() - c.imag(), c.real() - d.real()};
+
+ Complex bcde01 = be0*tw1r + cd0*tw2r;
+ Complex bcde02 = be0*tw2r + cd0*tw1r;
+ Complex bcde11 = be1*tw1i + cd1*tw2i;
+ Complex bcde12 = be1*tw2i - cd1*tw1i;
+
+ f0[i] = a + be0 + cd0;
+ f1[i] = a + bcde01 + bcde11;
+ f2[i] = a + bcde02 + bcde12;
+ f3[i] = a + bcde02 - bcde12;
+ f4[i] = a + bcde01 - bcde11;
+ }
+ }
+ template
+ void finalPass5(Sample *f0r, Sample *f0i) {
+ auto *f1r = f0r + innerSize;
+ auto *f1i = f0i + innerSize;
+ auto *f2r = f0r + innerSize*2;
+ auto *f2i = f0i + innerSize*2;
+ auto *f3r = f0r + innerSize*3;
+ auto *f3i = f0i + innerSize*3;
+ auto *f4r = f0r + innerSize*4;
+ auto *f4i = f0i + innerSize*4;
+
+ const Sample tw1r = 0.30901699437494745;
+ const Sample tw1i = -0.9510565162951535*(inverse ? -1 : 1);
+ const Sample tw2r = -0.8090169943749473;
+ const Sample tw2i = -0.5877852522924732*(inverse ? -1 : 1);
+ for (size_t i = 0; i < innerSize; ++i) {
+ Sample ar = f0r[i], ai = f0i[i], br = f1r[i], bi = f1i[i], cr = f2r[i], ci = f2i[i], dr = f3r[i], di = f3i[i], er = f4r[i], ei = f4i[i];
+
+ Sample be0r = br + er, be0i = bi + ei;
+ Sample be1r = ei - bi, be1i = br - er;
+ Sample cd0r = cr + dr, cd0i = ci + di;
+ Sample cd1r = di - ci, cd1i = cr - dr;
+
+ Sample bcde01r = be0r*tw1r + cd0r*tw2r, bcde01i = be0i*tw1r + cd0i*tw2r;
+ Sample bcde02r = be0r*tw2r + cd0r*tw1r, bcde02i = be0i*tw2r + cd0i*tw1r;
+ Sample bcde11r = be1r*tw1i + cd1r*tw2i, bcde11i = be1i*tw1i + cd1i*tw2i;
+ Sample bcde12r = be1r*tw2i - cd1r*tw1i, bcde12i = be1i*tw2i - cd1i*tw1i;
+
+ f0r[i] = ar + be0r + cd0r;
+ f0i[i] = ai + be0i + cd0i;
+ f1r[i] = ar + bcde01r + bcde11r;
+ f1i[i] = ai + bcde01i + bcde11i;
+ f2r[i] = ar + bcde02r + bcde12r;
+ f2i[i] = ai + bcde02i + bcde12i;
+ f3r[i] = ar + bcde02r - bcde12r;
+ f3i[i] = ai + bcde02i - bcde12i;
+ f4r[i] = ar + bcde01r - bcde11r;
+ f4i[i] = ai + bcde01i - bcde11i;
+ }
+ }
+
+ template
+ void finalPassN(Complex *f0) {
+ for (size_t i = 0; i < innerSize; ++i) {
+ Complex *offsetFreq = f0 + i;
+ Complex sum = 0;
+ for (size_t i2 = 0; i2 < outerSize; ++i2) {
+ sum += (dftTmp[i2] = offsetFreq[i2*innerSize]);
+ }
+ offsetFreq[0] = sum;
+
+ for (size_t f = 1; f < outerSize; ++f) {
+ Complex sum = dftTmp[0];
+
+ for (size_t i2 = 1; i2 < outerSize; ++i2) {
+ size_t twistIndex = (i2*f)%outerSize;
+ Complex twist = inverse ? std::conj(dftTwists[twistIndex]) : dftTwists[twistIndex];
+ sum += Complex{
+ dftTmp[i2].real()*twist.real() - dftTmp[i2].imag()*twist.imag(),
+ dftTmp[i2].imag()*twist.real() + dftTmp[i2].real()*twist.imag()
+ };
+ }
+
+ offsetFreq[f*innerSize] = sum;
+ }
+ }
+ }
+ template
+ void finalPassN(Sample *f0r, Sample *f0i) {
+ Sample *tmpR = (Sample *)dftTmp.data(), *tmpI = tmpR + outerSize;
+
+ for (size_t i = 0; i < innerSize; ++i) {
+ Sample *offsetR = f0r + i;
+ Sample *offsetI = f0i + i;
+ Sample sumR = 0, sumI = 0;
+ for (size_t i2 = 0; i2 < outerSize; ++i2) {
+ sumR += (tmpR[i2] = offsetR[i2*innerSize]);
+ sumI += (tmpI[i2] = offsetI[i2*innerSize]);
+ }
+ offsetR[0] = sumR;
+ offsetI[0] = sumI;
+
+ for (size_t f = 1; f < outerSize; ++f) {
+ Sample sumR = *tmpR, sumI = *tmpI;
+
+ for (size_t i2 = 1; i2 < outerSize; ++i2) {
+ size_t twistIndex = (i2*f)%outerSize;
+ Complex twist = inverse ? std::conj(dftTwists[twistIndex]) : dftTwists[twistIndex];
+ sumR += tmpR[i2]*twist.real() - tmpI[i2]*twist.imag();
+ sumI += tmpI[i2]*twist.real() + tmpR[i2]*twist.imag();
+ }
+
+ offsetR[f*innerSize] = sumR;
+ offsetI[f*innerSize] = sumI;
+ }
+ }
+ }
+};
+
+template
+using FFT = SplitFFT;
+
+// Wraps a complex FFT into a real one
+template>
+struct SimpleRealFFT {
+ using Complex = std::complex;
+
+ static constexpr bool prefersSplit = ComplexFFT::prefersSplit;
+
+ SimpleRealFFT(size_t size=0) {
+ resize(size);
+ }
+
+ void resize(size_t size) {
+ complexFft.resize(size);
+ tmpTime.resize(size);
+ tmpFreq.resize(size);
+ }
+
+ void fft(const Sample *time, Complex *freq) {
+ for (size_t i = 0; i < tmpTime.size(); ++i) {
+ tmpTime[i] = time[i];
+ }
+ complexFft.fft(tmpTime.data(), tmpFreq.data());
+ for (size_t i = 0; i < tmpFreq.size()/2; ++i) {
+ freq[i] = tmpFreq[i];
+ }
+ freq[0] = {
+ tmpFreq[0].real(),
+ tmpFreq[tmpFreq.size()/2].real()
+ };
+ }
+ void fft(const Sample *inR, Sample *outR, Sample *outI) {
+ Sample *tmpFreqR = (Sample *)tmpFreq.data(), *tmpFreqI = tmpFreqR + tmpFreq.size();
+ for (size_t i = 0; i < tmpTime.size()/2; ++i) {
+ tmpTime[i] = 0;
+ }
+ complexFft.fft(inR, (const Sample *)tmpTime.data(), tmpFreqR, tmpFreqI);
+ for (size_t i = 0; i < tmpTime.size()/2; ++i) {
+ outR[i] = tmpFreqR[i];
+ outI[i] = tmpFreqI[i];
+ }
+ outI[0] = tmpFreqR[tmpFreq.size()/2];
+ }
+
+ void ifft(const Complex *freq, Sample *time) {
+ tmpFreq[0] = freq[0].real();
+ tmpFreq[tmpFreq.size()/2] = freq[0].imag();
+ for (size_t i = 1; i < tmpFreq.size()/2; ++i) {
+ tmpFreq[i] = freq[i];
+ tmpFreq[tmpFreq.size() - i] = std::conj(freq[i]);
+ }
+ complexFft.ifft(tmpFreq.data(), tmpTime.data());
+ for (size_t i = 0; i < tmpTime.size(); ++i) {
+ time[i] = tmpTime[i].real();
+ }
+ }
+ void ifft(const Sample *inR, const Sample *inI, Sample *outR) {
+ Sample *tmpFreqR = (Sample *)tmpFreq.data(), *tmpFreqI = tmpFreqR + tmpFreq.size();
+ tmpFreqR[0] = inR[0];
+ tmpFreqR[tmpFreq.size()/2] = inI[0];
+ tmpFreqI[0] = 0;
+ tmpFreqI[tmpFreq.size()/2] = 0;
+ for (size_t i = 1; i < tmpFreq.size()/2; ++i) {
+ tmpFreqR[i] = inR[i];
+ tmpFreqI[i] = inI[i];
+ tmpFreqR[tmpFreq.size() - i] = inR[i];
+ tmpFreqI[tmpFreq.size() - i] = -inI[i];
+ }
+ complexFft.ifft(tmpFreqR, tmpFreqI, outR, (Sample *)tmpTime.data());
+ }
+
+private:
+ ComplexFFT complexFft;
+ std::vector tmpTime, tmpFreq;
+};
+
+/// A default power-of-2 FFT, specialised with platform-specific fast implementations where available
+template
+struct Pow2RealFFT : public SimpleRealFFT {
+ static constexpr bool prefersSplit = SimpleRealFFT::prefersSplit;
+
+ using SimpleRealFFT::SimpleRealFFT;
+
+ // Prevent copying, since it might be a problem for specialisations
+ Pow2RealFFT(const Pow2RealFFT &other) = delete;
+ // Pass move-constructor through, just to be explicit about it
+ Pow2RealFFT(Pow2RealFFT &&other) : SimpleRealFFT(std::move(other)) {}
+};
+
+/// A Real FFT which can handle multiples of 3 and 5, and can be computed in chunks
+template
+struct RealFFT {
+ using Complex = std::complex;
+ static constexpr bool prefersSplit = SplitFFT::prefersSplit;
+
+ static size_t fastSizeAbove(size_t size) {
+ return ComplexFFT::fastSizeAbove((size + 1)/2)*2;
+ }
+
+ RealFFT(size_t size=0) {
+ resize(size);
+ }
+
+ void resize(size_t size) {
+ size_t hSize = size/2;
+ complexFft.resize(hSize);
+ tmpFreq.resize(hSize);
+ tmpTime.resize(hSize);
+
+ twiddles.resize(hSize/2 + 1);
+
+ if (!halfBinShift) {
+ for (size_t i = 0; i < twiddles.size(); ++i) {
+ Sample rotPhase = i*(-2*M_PI/size) - M_PI/2; // bake rotation by (-i) into twiddles
+ twiddles[i] = std::polar(Sample(1), rotPhase);
+ }
+ } else {
+ for (size_t i = 0; i < twiddles.size(); ++i) {
+ Sample rotPhase = (i + 0.5)*(-2*M_PI/size) - M_PI/2;
+ twiddles[i] = std::polar(Sample(1), rotPhase);
+ }
+
+ halfBinTwists.resize(hSize);
+ for (size_t i = 0; i < hSize; ++i) {
+ Sample twistPhase = -2*M_PI*i/size;
+ halfBinTwists[i] = std::polar(Sample(1), twistPhase);
+ }
+ }
+ }
+
+ size_t size() const {
+ return complexFft.size()*2;
+ }
+ size_t steps() const {
+ return complexFft.steps() + (splitComputation ? 3 : 2);
+ }
+
+ void fft(const Sample *time, Complex *freq) {
+ for (size_t s = 0; s < steps(); ++s) {
+ fft(s, time, freq);
+ }
+ }
+ void fft(size_t step, const Sample *time, Complex *freq) {
+ if (complexPrefersSplit) {
+ size_t hSize = complexFft.size();
+ Sample *tmpTimeR = (Sample *)tmpTime.data(), *tmpTimeI = tmpTimeR + hSize;
+ Sample *tmpFreqR = (Sample *)tmpFreq.data(), *tmpFreqI = tmpFreqR + hSize;
+ if (step-- == 0) {
+ size_t hSize = complexFft.size();
+ if (halfBinShift) {
+ for (size_t i = 0; i < hSize; ++i) {
+ Sample tr = time[2*i], ti = time[2*i + 1];
+ Complex twist = halfBinTwists[i];
+ tmpTimeR[i] = tr*twist.real() - ti*twist.imag();
+ tmpTimeI[i] = ti*twist.real() + tr*twist.imag();
+ }
+ } else {
+ for (size_t i = 0; i < hSize; ++i) {
+ tmpTimeR[i] = time[2*i];
+ tmpTimeI[i] = time[2*i + 1];
+ }
+ }
+ } else if (step < complexFft.steps()) {
+ complexFft.fft(step, tmpTimeR, tmpTimeI, tmpFreqR, tmpFreqI);
+ } else {
+ if (!halfBinShift) {
+ Sample bin0r = tmpFreqR[0], bin0i = tmpFreqI[0];
+ freq[0] = {bin0r + bin0i, bin0r - bin0i};
+ }
+
+ size_t startI = halfBinShift ? 0 : 1;
+ size_t endI = hSize/2 + 1;
+ if (splitComputation) { // Do this last twiddle in two halves
+ if (step == complexFft.steps()) {
+ endI = (startI + endI)/2;
+ } else {
+ startI = (startI + endI)/2;
+ }
+ }
+ for (size_t i = startI; i < endI; ++i) {
+ size_t conjI = halfBinShift ? (hSize - 1 - i) : (hSize - i);
+ Complex twiddle = twiddles[i];
+
+ Sample oddR = (tmpFreqR[i] + tmpFreqR[conjI])*Sample(0.5);
+ Sample oddI = (tmpFreqI[i] - tmpFreqI[conjI])*Sample(0.5);
+ Sample evenIR = (tmpFreqR[i] - tmpFreqR[conjI])*Sample(0.5);
+ Sample evenII = (tmpFreqI[i] + tmpFreqI[conjI])*Sample(0.5);
+ Sample evenRotMinusIR = evenIR*twiddle.real() - evenII*twiddle.imag();
+ Sample evenRotMinusII = evenII*twiddle.real() + evenIR*twiddle.imag();
+
+ freq[i] = {oddR + evenRotMinusIR, oddI + evenRotMinusII};
+ freq[conjI] = {oddR - evenRotMinusIR, evenRotMinusII - oddI};
+ }
+ }
+ } else {
+ bool canUseTime = !halfBinShift && !(size_t(time)%alignof(Complex));
+ if (step-- == 0) {
+ size_t hSize = complexFft.size();
+ if (halfBinShift) {
+ for (size_t i = 0; i < hSize; ++i) {
+ Sample tr = time[2*i], ti = time[2*i + 1];
+ Complex twist = halfBinTwists[i];
+ tmpTime[i] = {
+ tr*twist.real() - ti*twist.imag(),
+ ti*twist.real() + tr*twist.imag()
+ };
+ }
+ } else if (!canUseTime) {
+ std::memcpy(tmpTime.data(), time, sizeof(Complex)*hSize);
+ }
+ } else if (step < complexFft.steps()) {
+ complexFft.fft(step, canUseTime ? (const Complex *)time : tmpTime.data(), tmpFreq.data());
+ } else {
+ if (!halfBinShift) {
+ Complex bin0 = tmpFreq[0];
+ freq[0] = { // pack DC & Nyquist together
+ bin0.real() + bin0.imag(),
+ bin0.real() - bin0.imag()
+ };
+ }
+
+ size_t hSize = complexFft.size();
+ size_t startI = halfBinShift ? 0 : 1;
+ size_t endI = hSize/2 + 1;
+ if (splitComputation) { // Do this last twiddle in two halves
+ if (step == complexFft.steps()) {
+ endI = (startI + endI)/2;
+ } else {
+ startI = (startI + endI)/2;
+ }
+ }
+ for (size_t i = startI; i < endI; ++i) {
+ size_t conjI = halfBinShift ? (hSize - 1 - i) : (hSize - i);
+ Complex twiddle = twiddles[i];
+
+ Complex odd = (tmpFreq[i] + std::conj(tmpFreq[conjI]))*Sample(0.5);
+ Complex evenI = (tmpFreq[i] - std::conj(tmpFreq[conjI]))*Sample(0.5);
+ Complex evenRotMinusI = { // twiddle includes a factor of -i
+ evenI.real()*twiddle.real() - evenI.imag()*twiddle.imag(),
+ evenI.imag()*twiddle.real() + evenI.real()*twiddle.imag()
+ };
+
+ freq[i] = odd + evenRotMinusI;
+ freq[conjI] = {odd.real() - evenRotMinusI.real(), evenRotMinusI.imag() - odd.imag()};
+ }
+ }
+ }
+ }
+ void fft(const Sample *inR, Sample *outR, Sample *outI) {
+ for (size_t s = 0; s < steps(); ++s) {
+ fft(s, inR, outR, outI);
+ }
+ }
+ void fft(size_t step, const Sample *inR, Sample *outR, Sample *outI) {
+ size_t hSize = complexFft.size();
+ Sample *tmpTimeR = (Sample *)tmpTime.data(), *tmpTimeI = tmpTimeR + hSize;
+ Sample *tmpFreqR = (Sample *)tmpFreq.data(), *tmpFreqI = tmpFreqR + hSize;
+ if (step-- == 0) {
+ size_t hSize = complexFft.size();
+ if (halfBinShift) {
+ for (size_t i = 0; i < hSize; ++i) {
+ Sample tr = inR[2*i], ti = inR[2*i + 1];
+ Complex twist = halfBinTwists[i];
+ tmpTimeR[i] = tr*twist.real() - ti*twist.imag();
+ tmpTimeI[i] = ti*twist.real() + tr*twist.imag();
+ }
+ } else {
+ for (size_t i = 0; i < hSize; ++i) {
+ tmpTimeR[i] = inR[2*i];
+ tmpTimeI[i] = inR[2*i + 1];
+ }
+ }
+ } else if (step < complexFft.steps()) {
+ complexFft.fft(step, tmpTimeR, tmpTimeI, tmpFreqR, tmpFreqI);
+ } else {
+ if (!halfBinShift) {
+ Sample bin0r = tmpFreqR[0], bin0i = tmpFreqI[0];
+ outR[0] = bin0r + bin0i;
+ outI[0] = bin0r - bin0i;
+ }
+
+ size_t startI = halfBinShift ? 0 : 1;
+ size_t endI = hSize/2 + 1;
+ if (splitComputation) { // Do this last twiddle in two halves
+ if (step == complexFft.steps()) {
+ endI = (startI + endI)/2;
+ } else {
+ startI = (startI + endI)/2;
+ }
+ }
+ for (size_t i = startI; i < endI; ++i) {
+ size_t conjI = halfBinShift ? (hSize - 1 - i) : (hSize - i);
+ Complex twiddle = twiddles[i];
+
+ Sample oddR = (tmpFreqR[i] + tmpFreqR[conjI])*Sample(0.5);
+ Sample oddI = (tmpFreqI[i] - tmpFreqI[conjI])*Sample(0.5);
+ Sample evenIR = (tmpFreqR[i] - tmpFreqR[conjI])*Sample(0.5);
+ Sample evenII = (tmpFreqI[i] + tmpFreqI[conjI])*Sample(0.5);
+ Sample evenRotMinusIR = evenIR*twiddle.real() - evenII*twiddle.imag();
+ Sample evenRotMinusII = evenII*twiddle.real() + evenIR*twiddle.imag();
+
+ outR[i] = oddR + evenRotMinusIR;
+ outI[i] = oddI + evenRotMinusII;
+ outR[conjI] = oddR - evenRotMinusIR;
+ outI[conjI] = evenRotMinusII - oddI;
+ }
+ }
+ }
+
+ void ifft(const Complex *freq, Sample *time) {
+ for (size_t s = 0; s < steps(); ++s) {
+ ifft(s, freq, time);
+ }
+ }
+ void ifft(size_t step, const Complex *freq, Sample *time) {
+ if (complexPrefersSplit) {
+ size_t hSize = complexFft.size();
+ Sample *tmpTimeR = (Sample *)tmpTime.data(), *tmpTimeI = tmpTimeR + hSize;
+ Sample *tmpFreqR = (Sample *)tmpFreq.data(), *tmpFreqI = tmpFreqR + hSize;
+
+ bool splitFirst = splitComputation && (step-- == 0);
+ if (splitFirst || step-- == 0) {
+ Complex bin0 = freq[0];
+ if (!halfBinShift) {
+ tmpFreqR[0] = bin0.real() + bin0.imag();
+ tmpFreqI[0] = bin0.real() - bin0.imag();
+ }
+ size_t startI = halfBinShift ? 0 : 1;
+ size_t endI = hSize/2 + 1;
+ if (splitComputation) { // Do this first twiddle in two halves
+ if (splitFirst) {
+ endI = (startI + endI)/2;
+ } else {
+ startI = (startI + endI)/2;
+ }
+ }
+ for (size_t i = startI; i < endI; ++i) {
+ size_t conjI = halfBinShift ? (hSize - 1 - i) : (hSize - i);
+ Complex twiddle = twiddles[i];
+
+ Complex odd = freq[i] + std::conj(freq[conjI]);
+ Complex evenRotMinusI = freq[i] - std::conj(freq[conjI]);
+ Complex evenI = { // Conjugate twiddle
+ evenRotMinusI.real()*twiddle.real() + evenRotMinusI.imag()*twiddle.imag(),
+ evenRotMinusI.imag()*twiddle.real() - evenRotMinusI.real()*twiddle.imag()
+ };
+
+ tmpFreqR[i] = odd.real() + evenI.real();
+ tmpFreqI[i] = odd.imag() + evenI.imag();
+ tmpFreqR[conjI] = odd.real() - evenI.real();
+ tmpFreqI[conjI] = evenI.imag() - odd.imag();
+ }
+ } else if (step < complexFft.steps()) {
+ complexFft.ifft(step, tmpFreqR, tmpFreqI, tmpTimeR, tmpTimeI);
+ } else {
+ size_t hSize = complexFft.size();
+ if (halfBinShift) {
+ for (size_t i = 0; i < hSize; ++i) {
+ Sample tr = tmpTimeR[i], ti = tmpTimeI[i];
+ Complex twist = halfBinTwists[i];
+ time[2*i] = tr*twist.real() + ti*twist.imag();
+ time[2*i + 1] = ti*twist.real() - tr*twist.imag();
+ }
+ } else {
+ for (size_t i = 0; i < hSize; ++i) {
+ time[2*i] = tmpTimeR[i];
+ time[2*i + 1] = tmpTimeI[i];
+ }
+ }
+ }
+ } else {
+ bool canUseTime = !halfBinShift && !(size_t(time)%alignof(Complex));
+ bool splitFirst = splitComputation && (step-- == 0);
+ if (splitFirst || step-- == 0) {
+ Complex bin0 = freq[0];
+ if (!halfBinShift) {
+ tmpFreq[0] = {
+ bin0.real() + bin0.imag(),
+ bin0.real() - bin0.imag()
+ };
+ }
+ size_t hSize = complexFft.size();
+ size_t startI = halfBinShift ? 0 : 1;
+ size_t endI = hSize/2 + 1;
+ if (splitComputation) { // Do this first twiddle in two halves
+ if (splitFirst) {
+ endI = (startI + endI)/2;
+ } else {
+ startI = (startI + endI)/2;
+ }
+ }
+ for (size_t i = startI; i < endI; ++i) {
+ size_t conjI = halfBinShift ? (hSize - 1 - i) : (hSize - i);
+ Complex twiddle = twiddles[i];
+
+ Complex odd = freq[i] + std::conj(freq[conjI]);
+ Complex evenRotMinusI = freq[i] - std::conj(freq[conjI]);
+ Complex evenI = { // Conjugate twiddle
+ evenRotMinusI.real()*twiddle.real() + evenRotMinusI.imag()*twiddle.imag(),
+ evenRotMinusI.imag()*twiddle.real() - evenRotMinusI.real()*twiddle.imag()
+ };
+
+ tmpFreq[i] = odd + evenI;
+ tmpFreq[conjI] = {odd.real() - evenI.real(), evenI.imag() - odd.imag()};
+ }
+ } else if (step < complexFft.steps()) {
+ // Can't just use time as (Complex *), since it might not be aligned properly
+ complexFft.ifft(step, tmpFreq.data(), canUseTime ? (Complex *)time : tmpTime.data());
+ } else {
+ size_t hSize = complexFft.size();
+ if (halfBinShift) {
+ for (size_t i = 0; i < hSize; ++i) {
+ Complex t = tmpTime[i];
+ Complex twist = halfBinTwists[i];
+ time[2*i] = t.real()*twist.real() + t.imag()*twist.imag();
+ time[2*i + 1] = t.imag()*twist.real() - t.real()*twist.imag();
+ }
+ } else if (!canUseTime) {
+ std::memcpy(time, tmpTime.data(), sizeof(Complex)*hSize);
+ }
+ }
+ }
+ }
+ void ifft(const Sample *inR, const Sample *inI, Sample *outR) {
+ for (size_t s = 0; s < steps(); ++s) {
+ ifft(s, inR, inI, outR);
+ }
+ }
+ void ifft(size_t step, const Sample *inR, const Sample *inI, Sample *outR) {
+ size_t hSize = complexFft.size();
+ Sample *tmpTimeR = (Sample *)tmpTime.data(), *tmpTimeI = tmpTimeR + hSize;
+ Sample *tmpFreqR = (Sample *)tmpFreq.data(), *tmpFreqI = tmpFreqR + hSize;
+
+ bool splitFirst = splitComputation && (step-- == 0);
+ if (splitFirst || step-- == 0) {
+ Sample bin0r = inR[0], bin0i = inI[0];
+ if (!halfBinShift) {
+ tmpFreqR[0] = bin0r + bin0i;
+ tmpFreqI[0] = bin0r - bin0i;
+ }
+ size_t startI = halfBinShift ? 0 : 1;
+ size_t endI = hSize/2 + 1;
+ if (splitComputation) { // Do this first twiddle in two halves
+ if (splitFirst) {
+ endI = (startI + endI)/2;
+ } else {
+ startI = (startI + endI)/2;
+ }
+ }
+ for (size_t i = startI; i < endI; ++i) {
+ size_t conjI = halfBinShift ? (hSize - 1 - i) : (hSize - i);
+ Complex twiddle = twiddles[i];
+ Sample fir = inR[i], fii = inI[i];
+ Sample fcir = inR[conjI], fcii = inI[conjI];
+
+ Complex odd = {fir + fcir, fii - fcii};
+ Complex evenRotMinusI = {fir - fcir, fii + fcii};
+ Complex evenI = { // Conjugate twiddle
+ evenRotMinusI.real()*twiddle.real() + evenRotMinusI.imag()*twiddle.imag(),
+ evenRotMinusI.imag()*twiddle.real() - evenRotMinusI.real()*twiddle.imag()
+ };
+
+ tmpFreqR[i] = odd.real() + evenI.real();
+ tmpFreqI[i] = odd.imag() + evenI.imag();
+ tmpFreqR[conjI] = odd.real() - evenI.real();
+ tmpFreqI[conjI] = evenI.imag() - odd.imag();
+ }
+ } else if (step < complexFft.steps()) {
+ // Can't just use time as (Complex *), since it might not be aligned properly
+ complexFft.ifft(step, tmpFreqR, tmpFreqI, tmpTimeR, tmpTimeI);
+ } else {
+ if (halfBinShift) {
+ for (size_t i = 0; i < hSize; ++i) {
+ Sample tr = tmpTimeR[i], ti = tmpTimeI[i];
+ Complex twist = halfBinTwists[i];
+ outR[2*i] = tr*twist.real() + ti*twist.imag();
+ outR[2*i + 1] = ti*twist.real() - tr*twist.imag();
+ }
+ } else {
+ for (size_t i = 0; i < hSize; ++i) {
+ outR[2*i] = tmpTimeR[i];
+ outR[2*i + 1] = tmpTimeI[i];
+ }
+ }
+ }
+ }
+private:
+ using ComplexFFT = SplitFFT;
+ ComplexFFT complexFft;
+
+ static constexpr bool complexPrefersSplit = ComplexFFT::prefersSplit;
+ std::vector tmpFreq, tmpTime;
+ std::vector twiddles, halfBinTwists;
+};
+
+template
+using ModifiedRealFFT = RealFFT;
+
+}} // namespace
+
+// Override `Pow2FFT` / `Pow2RealFFT` templates with faster implementations
+#if defined(SIGNALSMITH_USE_PFFFT) || defined(SIGNALSMITH_USE_PFFFT_DOUBLE)
+# if defined(SIGNALSMITH_USE_PFFFT)
+# include "./platform/fft-pffft.h"
+# endif
+# if defined(SIGNALSMITH_USE_PFFFT_DOUBLE)
+# include "./platform/fft-pffft-double.h"
+# endif
+#elif defined(SIGNALSMITH_USE_ACCELERATE)
+# include "./platform/fft-accelerate.h"
+#elif defined(SIGNALSMITH_USE_IPP)
+# include "./platform/fft-ipp.h"
+#endif
+
+#endif // include guard
diff --git a/thirdparty/signalsmith-linear/include/signalsmith-linear/fft.h b/thirdparty/signalsmith-linear/include/signalsmith-linear/fft.h
new file mode 100644
index 0000000000..661709d42b
--- /dev/null
+++ b/thirdparty/signalsmith-linear/include/signalsmith-linear/fft.h
@@ -0,0 +1 @@
+#include "../../fft.h"
diff --git a/thirdparty/signalsmith-linear/include/signalsmith-linear/linear.h b/thirdparty/signalsmith-linear/include/signalsmith-linear/linear.h
new file mode 100644
index 0000000000..2b276f5cc9
--- /dev/null
+++ b/thirdparty/signalsmith-linear/include/signalsmith-linear/linear.h
@@ -0,0 +1 @@
+#include "../../linear.h"
diff --git a/thirdparty/signalsmith-linear/include/signalsmith-linear/stft.h b/thirdparty/signalsmith-linear/include/signalsmith-linear/stft.h
new file mode 100644
index 0000000000..9665aee3ab
--- /dev/null
+++ b/thirdparty/signalsmith-linear/include/signalsmith-linear/stft.h
@@ -0,0 +1 @@
+#include "../../stft.h"
diff --git a/thirdparty/signalsmith-linear/linear.h b/thirdparty/signalsmith-linear/linear.h
new file mode 100644
index 0000000000..9097ac7458
--- /dev/null
+++ b/thirdparty/signalsmith-linear/linear.h
@@ -0,0 +1,1075 @@
+#ifndef SIGNALSMITH_AUDIO_LINEAR_H
+#define SIGNALSMITH_AUDIO_LINEAR_H
+
+#if defined(__FAST_MATH__) && (__apple_build_version__ >= 16000000) && (__apple_build_version__ <= 16000099) && !defined(SIGNALSMITH_IGNORE_BROKEN_APPLECLANG)
+# error Apple Clang 16.0.0 generates incorrect SIMD for ARM. If you HAVE to use this version of Clang, turn off -ffast-math.
+#endif
+
+#ifndef M_PI
+# define M_PI 3.14159265358979323846
+#endif
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+namespace signalsmith { namespace linear {
+
+template
+using ConstRealPointer = const V *;
+template
+using RealPointer = V *;
+
+template
+using ConstComplexPointer = const std::complex *;
+template
+using ComplexPointer = std::complex *;
+
+template
+struct ConstSplitPointer {
+ ConstRealPointer real, imag;
+ ConstSplitPointer(ConstRealPointer real, ConstRealPointer imag) : real(real), imag(imag) {}
+
+ // Array-like access for convenience
+ const std::complex operator[](std::ptrdiff_t i) const {
+ return {real[i], imag[i]};
+ }
+};
+
+template
+struct SplitPointer {
+ using Complex = std::complex;
+
+ RealPointer real, imag;
+ SplitPointer(RealPointer real, RealPointer imag) : real(real), imag(imag) {}
+ operator ConstSplitPointer() {
+ return {real, imag};
+ }
+
+ // Array-like access for convenience
+ const Complex operator[](std::ptrdiff_t i) const {
+ return {real[i], imag[i]};
+ }
+
+ // Assignable (if not const) and converts to `std::complex`
+ struct Value : public Complex {
+ Value(V &_real, V &_imag) : Complex(_real, _imag), _real(_real), _imag(_imag) {}
+
+ V real() const {
+ return _real;
+ }
+ void real(V v) {
+ _real = v;
+ Complex::real(v);
+ }
+ V imag() const {
+ return _imag;
+ }
+ void imag(V v) {
+ _imag = v;
+ Complex::imag(v);
+ }
+
+#define LINEAR_SPLIT_POINTER_ASSIGNMENT_OP(OP) \
+ template \
+ Value & operator OP(Other &&v) { \
+ std::complex::operator OP(std::forward(v)); \
+ _real = v.real(); \
+ _imag = v.imag(); \
+ return *this; \
+ }
+ LINEAR_SPLIT_POINTER_ASSIGNMENT_OP(=);
+ LINEAR_SPLIT_POINTER_ASSIGNMENT_OP(+=);
+ LINEAR_SPLIT_POINTER_ASSIGNMENT_OP(-=);
+ LINEAR_SPLIT_POINTER_ASSIGNMENT_OP(*=);
+ LINEAR_SPLIT_POINTER_ASSIGNMENT_OP(/=);
+#undef LINEAR_SPLIT_POINTER_ASSIGNMENT_OP
+
+ private:
+ V &_real;
+ V &_imag;
+ };
+
+ Value operator[](std::ptrdiff_t i) {
+ return {real[i], imag[i]};
+ }
+};
+
+template
+struct LinearImpl;
+using Linear = LinearImpl;
+
+// Everything we deal with is actually one of these
+template
+struct Expression;
+template
+struct WritableExpression;
+
+#define EXPRESSION_NAME(Class, nameExpr) \
+ static std::string name() {\
+ return nameExpr; \
+ }
+
+//#undef EXPRESSION_NAME
+//#include
+//#define EXPRESSION_NAME(Class, nameExpr) \
+// static std::string name() { \
+// return typeid(Class).name(); \
+// }
+
+// Expression templates, which always hold const pointers
+namespace expression {
+ // All base Exprs inherit from this, so we can SFINAE-test for them
+ struct Base {};
+ inline void mustBeExpr(const Base &) {}
+
+ template
+ struct ConstantExpr : public Base {
+ EXPRESSION_NAME(Constant, "V");
+ V value;
+
+ static_assert(std::is_trivially_copyable::value, "ConstantExpr values must be trivially copyable");
+
+ ConstantExpr(V value) : value(value) {}
+
+ const V get(std::ptrdiff_t) const {
+ return value;
+ }
+ };
+
+ template
+ using Arithmetic = decltype(std::declval() + std::declval());
+
+ // If the constant is one of our assignable complex proxies, use the basic one instead
+ template<>
+ struct ConstantExpr::Value> : public ConstantExpr> {
+ using ConstantExpr>::ConstantExpr;
+ };
+ template<>
+ struct ConstantExpr::Value> : public ConstantExpr> {
+ using ConstantExpr>::ConstantExpr;
+ };
+
+ template
+ struct ExprTest {
+ using Constant = ConstantExpr>;
+
+ static_assert(std::is_trivially_copyable::value, "ConstantExpr must be trivially copyable");
+
+ static Constant wrap(const V &v) {
+ return {v};
+ }
+ };
+ template
+ struct ExprTest()))> {
+ static Expr wrap(const Expr &expr) {
+ return expr;
+ }
+ };
+ // Constant class, only defined for non-Expr types
+ template
+ using Constant = typename ExprTest::Constant;
+
+ template
+ auto ensureExpr(const Expr &expr) -> decltype(ExprTest::wrap(expr)) {
+ return ExprTest::wrap(expr);
+ };
+
+ // Remove `Expression<>` or `WritableExpression<>` layers
+ template
+ E unwrapped(E e) {
+ return e;
+ }
+ template
+ E unwrapped(Expression e) {
+ return e;
+ }
+ template
+ E unwrapped(WritableExpression e) {
+ return e;
+ }
+ template
+ using Unwrapped = decltype(unwrapped(std::declval()));
+
+ // Expressions that just read from a pointer
+ template
+ struct ReadableReal : public Base {
+ EXPRESSION_NAME(ReadableReal, "const V*");
+ ConstRealPointer pointer;
+
+ ReadableReal(ConstRealPointer pointer) : pointer(pointer) {}
+
+ V get(std::ptrdiff_t i) const {
+ return pointer[i];
+ }
+ };
+ template
+ struct ReadableComplex : public Base {
+ EXPRESSION_NAME(ReadableComplex, "const VC*");
+ ConstComplexPointer pointer;
+
+ ReadableComplex(ConstComplexPointer pointer) : pointer(pointer) {}
+
+ std::complex get(std::ptrdiff_t i) const {
+ return pointer[i];
+ }
+ };
+ template
+ struct ReadableSplit : public Base {
+ EXPRESSION_NAME(ReadableSplit, "const VS*");
+ ConstSplitPointer pointer;
+
+ ReadableSplit(ConstSplitPointer pointer) : pointer(pointer) {}
+
+ std::complex get(std::ptrdiff_t i) const {
+ return {pointer.real[i], pointer.imag[i]};
+ }
+ };
+}
+
+// + - * / % ^ & | ~ ! = < > += -= *= /= %= ^= &= |= << >> >>= <<= == != <= >= <=>(since C++20) && || ++ -- , ->* -> ( ) [ ]
+
+#define SIGNALSMITH_AUDIO_LINEAR_UNARY_PREFIX(Name, OP) \
+namespace expression { \
+ template \
+ struct Name : public Base { \
+ EXPRESSION_NAME(Name, (#Name "<") + A::name() + ">"); \
+ A a; \
+ Name(const A &a) : a(a) {} \
+ auto get(std::ptrdiff_t i) const -> decltype(OP a.get(i)) const { \
+ return OP a.get(i); \
+ } \
+ }; \
+ template \
+ Name> make##Name(A a) { \
+ return {a}; \
+ } \
+} \
+template \
+Expression> operator OP(const Expression &a) { \
+ return {a}; \
+}
+SIGNALSMITH_AUDIO_LINEAR_UNARY_PREFIX(Neg, -)
+// Two negatives cancel out
+template
+Expression operator-(const Expression> &expr) { \
+ return expr.a;
+}
+#undef SIGNALSMITH_AUDIO_LINEAR_UNARY_PREFIX
+
+#define SIGNALSMITH_AUDIO_LINEAR_BINARY_INFIX(Name, OP) \
+namespace expression { \
+ template \
+ struct Name : public Base { \
+ EXPRESSION_NAME(Name, (#Name "<") + A::name() + "," + B::name() + ">"); \
+ A a; \
+ B b; \
+ Name(const A &a, const B &b) : a(a), b(b) {} \
+ auto get(std::ptrdiff_t i) const -> decltype(a.get(i) OP b.get(i)) const { \
+ return a.get(i) OP b.get(i); \
+ } \
+ }; \
+ template \
+ Name, Unwrapped> make##Name(A a, B b) { \
+ return {a, b}; \
+ } \
+} \
+template \
+const Expression> operator OP(const Expression &a, const Expression &b) { \
+ return {a, b}; \
+} \
+template \
+const Expression>> operator OP(const Expression &a, const B &b) { \
+ return {a, b}; \
+} \
+template \
+const Expression, B>> operator OP(const A &a, const Expression &b) { \
+ return {a, b}; \
+}
+SIGNALSMITH_AUDIO_LINEAR_BINARY_INFIX(Add, +)
+SIGNALSMITH_AUDIO_LINEAR_BINARY_INFIX(Mul, *)
+SIGNALSMITH_AUDIO_LINEAR_BINARY_INFIX(Sub, -)
+SIGNALSMITH_AUDIO_LINEAR_BINARY_INFIX(Div, /)
+#undef SIGNALSMITH_AUDIO_LINEAR_BINARY_INFIX
+
+namespace expression {
+#define SIGNALSMITH_AUDIO_LINEAR_FUNC1(Name, func) \
+ template \
+ struct Name; \
+ template \
+ Name> make##Name(A a) { \
+ return {a}; \
+ } \
+ template \
+ struct Name : public Base { \
+ EXPRESSION_NAME(Name, (#Name "<") + A::name() + ">"); \
+ A a; \
+ Name(const A &a) : a(a) {} \
+ auto get(std::ptrdiff_t i) const -> decltype(func(a.get(i))) { \
+ return func(a.get(i)); \
+ } \
+ };
+
+ template
+ A fastAbs(const A &a) {
+ return std::abs(a);
+ }
+ template
+ A fastAbs(const std::complex &a) {
+ return std::hypot(a.real(), a.imag());
+ }
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Abs, fastAbs)
+
+ template
+ A fastNorm(const A &a) {
+ return a*a;
+ }
+ template
+ A fastNorm(const std::complex &a) {
+ A real = a.real(), imag = a.imag();
+ return real*real + imag*imag;
+ }
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Norm, fastNorm)
+
+ // Single-argument functions
+ // Abs, Norm, Exp, Exp2, Log, Log2, Log10, Sqrt, Cbrt, Ceil, Floor, Trunc, Round, Conj, Real, Imag, Arg, Proj, Sin, Cos, Tan, Asin, Acos, Atan, Sinh, Cosh, Tanh, Asinh, Acosh, Atanh, Erf, Erfc, Tgamma, Lgamma
+
+ // .abs and .norm are handled above
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Exp, std::exp)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Exp2, std::exp2)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Log, std::log)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Log2, std::log2)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Log10, std::log10)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Sqrt, std::sqrt)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Cbrt, std::cbrt)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Ceil, std::ceil)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Floor, std::floor)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Trunc, std::trunc)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Round, std::round)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Conj, std::conj)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Real, std::real)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Imag, std::imag)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Arg, std::arg)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Proj, std::proj)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Sin, std::sin)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Cos, std::cos)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Tan, std::tan)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Asin, std::asin)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Acos, std::acos)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Atan, std::atan)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Sinh, std::sinh)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Cosh, std::cosh)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Tanh, std::tanh)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Asinh, std::asinh)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Acosh, std::acosh)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Atanh, std::atanh)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Erf, std::erf)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Erfc, std::erfc)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Tgamma, std::tgamma)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Lgamma, std::lgamma)
+#undef SIGNALSMITH_AUDIO_LINEAR_FUNC1
+
+#define SIGNALSMITH_AUDIO_LINEAR_FUNC2(Name, func) \
+ template \
+ auto common##Name(VA a, VB b) -> decltype(func((decltype(a + b))a, (decltype(a + b))b)) { \
+ return func((decltype(a + b))a, (decltype(a + b))b); \
+ } \
+ template \
+ struct Name : public Base { \
+ EXPRESSION_NAME(Name, (#Name "<") + A::name() + "," + B::name() + ">"); \
+ A a; \
+ B b; \
+ Name(const A &a, const B &b) : a(a), b(b) {} \
+ auto get(std::ptrdiff_t i) const -> decltype(common##Name(a.get(i), b.get(i))) const { \
+ return common##Name(a.get(i), b.get(i)); \
+ } \
+ }; \
+ template \
+ Name, Unwrapped> make##Name(A a, B b) { \
+ return {a, b}; \
+ }
+ // Min, Max, Dim, Pow, Atan2, Hypot, Copysign, Polar
+ SIGNALSMITH_AUDIO_LINEAR_FUNC2(Max, std::fmax);
+ SIGNALSMITH_AUDIO_LINEAR_FUNC2(Min, std::fmin);
+ SIGNALSMITH_AUDIO_LINEAR_FUNC2(Dim, std::fdim);
+ SIGNALSMITH_AUDIO_LINEAR_FUNC2(Pow, std::pow);
+ SIGNALSMITH_AUDIO_LINEAR_FUNC2(Atan2, std::atan2);
+ SIGNALSMITH_AUDIO_LINEAR_FUNC2(Hypot, std::hypot);
+ SIGNALSMITH_AUDIO_LINEAR_FUNC2(Copysign, std::copysign);
+ SIGNALSMITH_AUDIO_LINEAR_FUNC2(Polar, std::polar);
+#undef SIGNALSMITH_AUDIO_LINEAR_FUNC2
+
+} // expression::
+
+template
+struct Expression : public BaseExpr {
+ template
+ Expression(Args &&...args) : BaseExpr(std::forward(args)...) {
+ static_assert(std::is_trivially_copyable::value, "BaseExpr must be trivially copyable");
+ static_assert(std::is_trivially_copyable::value, "Expression<> must be trivially copyable");
+ }
+
+ auto operator[](std::ptrdiff_t i) -> decltype(BaseExpr::get(i)) const {
+ return BaseExpr::get(i);
+ }
+
+#define SIGNALSMITH_AUDIO_LINEAR_FUNC1(ExprName, methodName) \
+ const Expression> methodName() const { \
+ return {*this}; \
+ }
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Abs, abs)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Norm, norm)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Exp, exp)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Exp2, exp2)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Log, log)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Log2, log2)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Log10, log10)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Sqrt, sqrt)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Cbrt, cbrt)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Ceil, ceil)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Floor, floor)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Trunc, trunc)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Round, round)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Conj, conj)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Real, real)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Imag, imag)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Arg, arg)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Proj, proj)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Sin, sin)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Cos, cos)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Tan, tan)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Asin, asin)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Acos, acos)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Atan, atan)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Sinh, sinh)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Cosh, cosh)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Tanh, tanh)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Asinh, asinh)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Acosh, acosh)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Atanh, atanh)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Erf, erf)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Erfc, erfc)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Tgamma, tgamma)
+ SIGNALSMITH_AUDIO_LINEAR_FUNC1(Lgamma, lgamma)
+#undef SIGNALSMITH_AUDIO_LINEAR_FUNC1
+};
+template
+struct WritableExpression : public Expression {
+ using Expression::Expression;
+
+ WritableExpression(const WritableExpression &other) = default;
+
+ template
+ WritableExpression & operator=(const Expr &expr) {
+ this->linear.fill(this->pointer, expression::ensureExpr(expr), this->size);
+ return *this;
+ }
+
+ WritableExpression & operator=(const WritableExpression &expr) {
+ this->linear.fill(this->pointer, expr, this->size);
+ return *this;
+ }
+#define SIGNALSMITH_AUDIO_ASSIGNMENT_OP(assignOp, binaryOp) \
+ template\
+ WritableExpression & operator assignOp(const E &expr) { \
+ return *this = (*this) binaryOp expr; \
+ }
+ SIGNALSMITH_AUDIO_ASSIGNMENT_OP(+=, +);
+ SIGNALSMITH_AUDIO_ASSIGNMENT_OP(-=, -);
+ SIGNALSMITH_AUDIO_ASSIGNMENT_OP(*=, *);
+ SIGNALSMITH_AUDIO_ASSIGNMENT_OP(/=, /);
+#undef SIGNALSMITH_AUDIO_ASSIGNMENT_OP
+
+ // Use the pointer's `operator[]` instead of the expression
+ auto operator[](std::ptrdiff_t i) -> decltype(this->pointer[i]) {
+ return this->pointer[i];
+ }
+
+ auto operator[](std::ptrdiff_t i) const -> decltype(this->pointer[i]) {
+ return this->pointer[i];
+ }
+};
+
+/// Helper class for temporary storage
+template
+struct Temporary {
+ // This is called if we don't have enough reserved space and end up allocating
+ std::function allocationWarning;
+
+ void reserve(size_t size) {
+ if (buffer) delete[] buffer;
+ buffer = new V[size];
+ alignedBuffer = nextAligned(buffer);
+ if (alignedBuffer != buffer) {
+ delete[] buffer;
+ buffer = new V[size + extraAlignmentItems];
+ alignedBuffer = nextAligned(buffer);
+ }
+ start = alignedBuffer;
+ end = alignedBuffer + size;
+ }
+
+ void clear() {
+ start = alignedBuffer;
+ fallbacks.resize(0);
+ fallbacks.shrink_to_fit();
+ }
+
+ // You should have one of these every time you're using the temporary storage
+ struct Scoped {
+ Scoped(Temporary &temporary) : temporary(temporary), restoreStart(temporary.start), fallbackSize(temporary.fallbacks.size()) {}
+
+ ~Scoped() {
+ temporary.start = restoreStart;
+ temporary.fallbacks.resize(fallbackSize);
+ }
+
+ V * operator()(size_t size) {
+ return temporary.getChunk(size);
+ }
+
+ private:
+ Temporary &temporary;
+ V *restoreStart;
+ size_t fallbackSize;
+ };
+
+private:
+ static constexpr size_t extraAlignmentItems = alignBytes/sizeof(V);
+ static V * nextAligned(V *ptr) {
+ return (V*)((size_t(ptr) + (alignBytes - 1))&~(alignBytes - 1));
+ }
+
+ size_t depth = 0;
+ V *start = nullptr, *end = nullptr;
+ V *buffer = nullptr, *alignedBuffer = nullptr;
+
+ std::vector> fallbacks;
+
+ /// Valid until the next call to .clear() or .reserve()
+ V * getChunk(size_t size) {
+ V *result = start;
+ V *newStart = start + size;
+ if (newStart > end) {
+ // OK, actually we ran out of temporary space, so allocate
+ fallbacks.emplace_back(size + extraAlignmentItems);
+ result = nextAligned(fallbacks.back().data());
+ // but we're not happy about it. >:(
+ if (allocationWarning) allocationWarning(newStart - buffer);
+ }
+ start = nextAligned(newStart);
+ return result;
+ }
+};
+
+template
+struct CachedResults {
+ using TemporaryFloats = Temporary;
+ using TemporaryDoubles = Temporary;
+
+ template
+ using WritableReal = typename Linear::template WritableReal;
+ template
+ using WritableComplex = typename Linear::template WritableComplex;
+ template
+ using WritableSplit = typename Linear::template WritableSplit;
+
+ CachedResults(Linear &linear) : linear(linear) {}
+
+ struct ScopedFloat : public TemporaryFloats::Scoped {
+ ScopedFloat(Linear &linear, TemporaryFloats &temporary) : TemporaryFloats::Scoped(temporary), linear(linear) {}
+
+ template
+ ConstRealPointer real(Expr expr, size_t size) {
+ auto chunk = (*this)(size);
+ linear.fill(chunk, expr, size);
+ return chunk;
+ }
+ ConstRealPointer real(expression::ReadableReal expr, size_t) {
+ return expr.pointer;
+ }
+ ConstRealPointer real(WritableReal expr, size_t) {
+ return expr.pointer;
+ }
+ template
+ ConstRealPointer real(Expr expr, size_t size, RealPointer