diff --git a/sdp/challenger2_sdp.py b/sdp/challenger2_sdp.py new file mode 100644 index 0000000..3072c9e --- /dev/null +++ b/sdp/challenger2_sdp.py @@ -0,0 +1,120 @@ +import numpy as np +import pyfftw + + +class SLIDING_DOT_PRODUCT: + # https://stackoverflow.com/a/30615425/2955541 + def __init__(self): + self.m = 0 + self.n = 0 + self.shape = 0 + self.threads = 1 + + self.rfft_obj = None + self.irfft_obj = None + + self.store_obj = False + self.historical_obj = {} + + self.real_arr = pyfftw.empty_aligned(2**20, dtype=np.float64) + self.complex_arr = pyfftw.empty_aligned(2**20, dtype=np.complex128) + + def __call__(self, Q, T): + if T.shape[0] == self.n: + shape = self.shape + else: + shape = pyfftw.next_fast_len(T.shape[0]) + + self.n = T.shape[0] + self.m = Q.shape[0] + + if self.shape != shape: + # need to set rfft/irfft objects + self.shape = shape + rfft_irfft_objects = self.historical_obj.get(self.shape, None) + + if rfft_irfft_objects is not None: + self.rfft_obj = rfft_irfft_objects[0] + self.irfft_obj = rfft_irfft_objects[1] + else: + self.rfft_obj = pyfftw.builders.rfft( + pyfftw.empty_aligned(self.shape, dtype=np.float64), + n=self.shape, + overwrite_input=True, + avoid_copy=True, + threads=self.threads, + ) + + self.irfft_obj = pyfftw.builders.irfft( + pyfftw.empty_aligned(1 + self.shape // 2, dtype=np.complex128), + overwrite_input=True, + avoid_copy=True, + n=self.shape, + threads=self.threads, + ) + + if self.store_obj: # pragma: no cover + self.historical_obj[self.shape] = (self.rfft_obj, self.irfft_obj) + + rfft_shape = 1 + self.shape // 2 + if self.shape <= self.real_arr.shape[0]: + real_arr = self.real_arr[: self.shape] + complex_arr = self.complex_arr[:rfft_shape] + else: # pragma: no cover + real_arr = pyfftw.empty_aligned(self.shape, dtype=np.float64) + complex_arr = pyfftw.empty_aligned(rfft_shape, dtype=np.complex128) + + real_arr[: self.n] = T + real_arr[self.n :] = 0 + self.rfft_obj(real_arr) + complex_arr[:] = self.rfft_obj.output_array + + real_arr[: self.m] = Q[::-1] + real_arr[self.m :] = 0 + self.rfft_obj(real_arr) + complex_arr *= self.rfft_obj.output_array + + self.irfft_obj(complex_arr) + + out = self.irfft_obj.output_array[self.m - 1 : self.n] + + return out + + def create_reusable_objects(self, pmin=2, pmax=20): + for p in range(pmin, pmax + 1): + n = 2**p + rfft_obj = pyfftw.builders.rfft( + pyfftw.empty_aligned(n, dtype=np.float64), + overwrite_input=True, + avoid_copy=True, + n=n, + threads=self.threads, + ) + irfft_obj = pyfftw.builders.irfft( + pyfftw.empty_aligned(1 + n // 2, dtype=np.complex128), + overwrite_input=True, + avoid_copy=True, + n=n, + threads=self.threads, + ) + self.historical_obj[n] = (rfft_obj, irfft_obj) + + return + + +_sliding_dot_product = SLIDING_DOT_PRODUCT() + + +def setup(Q, T): + _sliding_dot_product.create_reusable_objects() + _sliding_dot_product(Q, T) + return + + +def sliding_dot_product(Q, T): + if len(Q) == len(T): + out = np.dot(Q, T) + else: + out = _sliding_dot_product(Q, T) + + return out diff --git a/sdp/challenger_sdp.py b/sdp/challenger_sdp.py index ce60e8d..28af4e6 100644 --- a/sdp/challenger_sdp.py +++ b/sdp/challenger_sdp.py @@ -1,14 +1,66 @@ import numpy as np +import pyfftw + + +class SLIDING_DOT_PRODUCT: + # https://stackoverflow.com/a/30615425/2955541 + def __init__(self): + self.m = 0 + self.n = 0 + self.threads = 1 + self.rfft_Q_obj = None + self.rfft_T_obj = None + self.irfft_obj = None + + def __call__(self, Q, T): + if Q.shape[0] != self.m or T.shape[0] != self.n: + self.m = Q.shape[0] + self.n = T.shape[0] + self.shape = pyfftw.next_fast_len(self.n) + + self.rfft_input = pyfftw.empty_aligned(self.shape, dtype=np.float64) + self.rfft_obj = pyfftw.builders.rfft( + self.rfft_input, + overwrite_input=True, + avoid_copy=True, + n=self.shape, + threads=self.threads, + ) + + self.irfft_input = pyfftw.empty_aligned( + 1 + self.shape // 2, dtype=np.complex128 + ) + self.irfft_obj = pyfftw.builders.irfft( + self.irfft_input, + overwrite_input=True, + avoid_copy=True, + n=self.shape, + threads=self.threads, + ) + + self.rfft_input[: self.m] = Q[::-1] / self.shape # Reverse/flip Q and scale + self.rfft_input[self.m :] = 0.0 + self.rfft_obj.execute() + self.irfft_input[:] = self.rfft_obj.output_array + + self.rfft_input[: self.n] = T + self.rfft_input[self.n :] = 0.0 + self.rfft_obj.execute() + self.irfft_input *= self.rfft_obj.output_array + + # irfft_input is ready + self.irfft_obj.execute() + + return self.irfft_obj.output_array[self.m - 1 : self.n] + + +_sliding_dot_product = SLIDING_DOT_PRODUCT() def setup(Q, T): + _sliding_dot_product(Q, T) return def sliding_dot_product(Q, T): - m = len(Q) - l = T.shape[0] - m + 1 - out = np.empty(l) - for i in range(l): - out[i] = np.dot(Q, T[i : i + m]) - return out + return _sliding_dot_product(Q, T) diff --git a/test.py b/test.py index c72a6b3..81ad854 100644 --- a/test.py +++ b/test.py @@ -192,3 +192,37 @@ def test_setup(): raise e return + + +def test_challenger2_power2(): + # test for case 5. len(T) is power of 2 + pmin = 3 + pmax = 13 + + from sdp import challenger2_sdp + + challenger2_sdp._sliding_dot_product.create_reusable_objects( + pmin=pmin, pmax=pmax - 1 + ) + # leaving out pmax to force the algo to create rfft/irfft if not created before + + for q in range(pmin, pmax + 1): + n_Q = 2**q + for p in range(q, pmax + 1): + n_T = 2**p + Q = np.random.rand(n_Q) + T = np.random.rand(n_T) + + ref = naive_sliding_dot_product(Q, T) + comp = challenger2_sdp.sliding_dot_product(Q, T) + npt.assert_allclose(comp, ref) + + n_T = 2**16 + T = np.random.rand(n_T) + Q = np.random.rand(n_T - 1) + + ref = naive_sliding_dot_product(Q, T) + comp = challenger2_sdp.sliding_dot_product(Q, T) + npt.assert_allclose(comp, ref) + + return diff --git a/timing_OneShot.py b/timing_OneShot.py new file mode 100755 index 0000000..9f68e8d --- /dev/null +++ b/timing_OneShot.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python + +import argparse +import numpy as np +import time +import warnings + +import utils + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="./timing.py -noheader -pmin 6 -pmax 23 -pdiff 3 pyfftw challenger" + ) + parser.add_argument("-noheader", default=False, action="store_true") + parser.add_argument( + "-timeout", + default=5.0, + type=float, + help="Number of seconds to wait for a run before timing out", + ) + parser.add_argument( + "-pequal", default=False, action="store_true", help="Compute `len(Q) == len(T)`" + ) + parser.add_argument( + "-niter", default=4, type=int, help="Number of iterations to run" + ) + parser.add_argument("-pmin", default=6, type=int, help="Minimum 2^p to use") + parser.add_argument("-pmax", default=27, type=int, help="Maximum 2^p to use") + parser.add_argument( + "-pdiff", + default=100, + type=int, + help="Maximum deviation from the minimum 2^p allowed", + ) + parser.add_argument( + "-ignore", + default=None, + nargs="*", + help="Keyword of modules to match and ignore", + ) + parser.add_argument( + "include", + default=None, + nargs="*", + help="Keyword of modules to match and include", + ) + args = parser.parse_args() + + modules = utils.import_sdp_mods(args.include, args.ignore) + + noheader = args.noheader + timeout = args.timeout + if args.pequal: + skip_p_equal = 0 + else: + skip_p_equal = 1 + n_iter = args.niter + p_min = args.pmin + p_max = args.pmax + p_diff = args.pdiff + + if not noheader: + print("module,len_Q,len_T,n_iter,time", flush=True) + + start_timing = time.time() + for mod in modules: + mod_name = mod.__name__.removeprefix("sdp.").removesuffix("_sdp") + mod.setup(np.random.rand(2), np.random.rand(2)) + + for i in range(p_min, p_max + 1): + Q = np.random.rand(2**i) + + j_range = range(i + skip_p_equal, min(i + p_diff + 1, p_max + 1)) + timing = np.zeros(len(j_range), dtype=np.float64) + for _ in range(n_iter): + lst = [] + for j in j_range: + T = np.random.rand(2**j) + + start = time.time() + mod.sliding_dot_product(Q, T) + diff = time.time() - start + lst.append(diff) + + timing += np.array(lst) + + timing /= n_iter + + for j_index, j in enumerate(j_range): + T = np.random.rand(2**j) + info = f"{mod_name},{len(Q)},{len(T)},{n_iter}" + f",{timing[j_index]}" + print(info, flush=True) + + elapsed_timing = np.round((time.time() - start_timing) / 60.0, 2) + warnings.warn(f"Test completed in {elapsed_timing} min")