Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 120 additions & 0 deletions sdp/challenger2_sdp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
import numpy as np
import pyfftw


class SLIDING_DOT_PRODUCT:
# https://stackoverflow.com/a/30615425/2955541
def __init__(self):
self.m = 0
self.n = 0
self.shape = 0
self.threads = 1

self.rfft_obj = None
self.irfft_obj = None

self.store_obj = False
self.historical_obj = {}

self.real_arr = pyfftw.empty_aligned(2**20, dtype=np.float64)
self.complex_arr = pyfftw.empty_aligned(2**20, dtype=np.complex128)

def __call__(self, Q, T):
if T.shape[0] == self.n:
shape = self.shape
else:
shape = pyfftw.next_fast_len(T.shape[0])

self.n = T.shape[0]
self.m = Q.shape[0]

if self.shape != shape:
# need to set rfft/irfft objects
self.shape = shape
rfft_irfft_objects = self.historical_obj.get(self.shape, None)

if rfft_irfft_objects is not None:
self.rfft_obj = rfft_irfft_objects[0]
self.irfft_obj = rfft_irfft_objects[1]
else:
self.rfft_obj = pyfftw.builders.rfft(
pyfftw.empty_aligned(self.shape, dtype=np.float64),
n=self.shape,
overwrite_input=True,
avoid_copy=True,
threads=self.threads,
)

self.irfft_obj = pyfftw.builders.irfft(
pyfftw.empty_aligned(1 + self.shape // 2, dtype=np.complex128),
overwrite_input=True,
avoid_copy=True,
n=self.shape,
threads=self.threads,
)

if self.store_obj: # pragma: no cover
self.historical_obj[self.shape] = (self.rfft_obj, self.irfft_obj)

rfft_shape = 1 + self.shape // 2
if self.shape <= self.real_arr.shape[0]:
real_arr = self.real_arr[: self.shape]
complex_arr = self.complex_arr[:rfft_shape]
else: # pragma: no cover
real_arr = pyfftw.empty_aligned(self.shape, dtype=np.float64)
complex_arr = pyfftw.empty_aligned(rfft_shape, dtype=np.complex128)

real_arr[: self.n] = T
real_arr[self.n :] = 0
self.rfft_obj(real_arr)
complex_arr[:] = self.rfft_obj.output_array

real_arr[: self.m] = Q[::-1]
real_arr[self.m :] = 0
self.rfft_obj(real_arr)
complex_arr *= self.rfft_obj.output_array

self.irfft_obj(complex_arr)

out = self.irfft_obj.output_array[self.m - 1 : self.n]

return out

def create_reusable_objects(self, pmin=2, pmax=20):
for p in range(pmin, pmax + 1):
n = 2**p
rfft_obj = pyfftw.builders.rfft(
pyfftw.empty_aligned(n, dtype=np.float64),
overwrite_input=True,
avoid_copy=True,
n=n,
threads=self.threads,
)
irfft_obj = pyfftw.builders.irfft(
pyfftw.empty_aligned(1 + n // 2, dtype=np.complex128),
overwrite_input=True,
avoid_copy=True,
n=n,
threads=self.threads,
)
self.historical_obj[n] = (rfft_obj, irfft_obj)

return


_sliding_dot_product = SLIDING_DOT_PRODUCT()


def setup(Q, T):
_sliding_dot_product.create_reusable_objects()
_sliding_dot_product(Q, T)
return


def sliding_dot_product(Q, T):
if len(Q) == len(T):
out = np.dot(Q, T)
else:
out = _sliding_dot_product(Q, T)

return out
64 changes: 58 additions & 6 deletions sdp/challenger_sdp.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,66 @@
import numpy as np
import pyfftw


class SLIDING_DOT_PRODUCT:
# https://stackoverflow.com/a/30615425/2955541
def __init__(self):
self.m = 0
self.n = 0
self.threads = 1
self.rfft_Q_obj = None
self.rfft_T_obj = None
self.irfft_obj = None

def __call__(self, Q, T):
if Q.shape[0] != self.m or T.shape[0] != self.n:
self.m = Q.shape[0]
self.n = T.shape[0]
self.shape = pyfftw.next_fast_len(self.n)

self.rfft_input = pyfftw.empty_aligned(self.shape, dtype=np.float64)
self.rfft_obj = pyfftw.builders.rfft(
self.rfft_input,
overwrite_input=True,
avoid_copy=True,
n=self.shape,
threads=self.threads,
)

self.irfft_input = pyfftw.empty_aligned(
1 + self.shape // 2, dtype=np.complex128
)
self.irfft_obj = pyfftw.builders.irfft(
self.irfft_input,
overwrite_input=True,
avoid_copy=True,
n=self.shape,
threads=self.threads,
)

self.rfft_input[: self.m] = Q[::-1] / self.shape # Reverse/flip Q and scale
self.rfft_input[self.m :] = 0.0
self.rfft_obj.execute()
self.irfft_input[:] = self.rfft_obj.output_array

self.rfft_input[: self.n] = T
self.rfft_input[self.n :] = 0.0
self.rfft_obj.execute()
self.irfft_input *= self.rfft_obj.output_array

# irfft_input is ready
self.irfft_obj.execute()

return self.irfft_obj.output_array[self.m - 1 : self.n]


_sliding_dot_product = SLIDING_DOT_PRODUCT()


def setup(Q, T):
_sliding_dot_product(Q, T)
return


def sliding_dot_product(Q, T):
m = len(Q)
l = T.shape[0] - m + 1
out = np.empty(l)
for i in range(l):
out[i] = np.dot(Q, T[i : i + m])
return out
return _sliding_dot_product(Q, T)
34 changes: 34 additions & 0 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,3 +192,37 @@ def test_setup():
raise e

return


def test_challenger2_power2():
# test for case 5. len(T) is power of 2
pmin = 3
pmax = 13

from sdp import challenger2_sdp

challenger2_sdp._sliding_dot_product.create_reusable_objects(
pmin=pmin, pmax=pmax - 1
)
# leaving out pmax to force the algo to create rfft/irfft if not created before

for q in range(pmin, pmax + 1):
n_Q = 2**q
for p in range(q, pmax + 1):
n_T = 2**p
Q = np.random.rand(n_Q)
T = np.random.rand(n_T)

ref = naive_sliding_dot_product(Q, T)
comp = challenger2_sdp.sliding_dot_product(Q, T)
npt.assert_allclose(comp, ref)

n_T = 2**16
T = np.random.rand(n_T)
Q = np.random.rand(n_T - 1)

ref = naive_sliding_dot_product(Q, T)
comp = challenger2_sdp.sliding_dot_product(Q, T)
npt.assert_allclose(comp, ref)

return
95 changes: 95 additions & 0 deletions timing_OneShot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#!/usr/bin/env python

import argparse
import numpy as np
import time
import warnings

import utils

if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="./timing.py -noheader -pmin 6 -pmax 23 -pdiff 3 pyfftw challenger"
)
parser.add_argument("-noheader", default=False, action="store_true")
parser.add_argument(
"-timeout",
default=5.0,
type=float,
help="Number of seconds to wait for a run before timing out",
)
parser.add_argument(
"-pequal", default=False, action="store_true", help="Compute `len(Q) == len(T)`"
)
parser.add_argument(
"-niter", default=4, type=int, help="Number of iterations to run"
)
parser.add_argument("-pmin", default=6, type=int, help="Minimum 2^p to use")
parser.add_argument("-pmax", default=27, type=int, help="Maximum 2^p to use")
parser.add_argument(
"-pdiff",
default=100,
type=int,
help="Maximum deviation from the minimum 2^p allowed",
)
parser.add_argument(
"-ignore",
default=None,
nargs="*",
help="Keyword of modules to match and ignore",
)
parser.add_argument(
"include",
default=None,
nargs="*",
help="Keyword of modules to match and include",
)
args = parser.parse_args()

modules = utils.import_sdp_mods(args.include, args.ignore)

noheader = args.noheader
timeout = args.timeout
if args.pequal:
skip_p_equal = 0
else:
skip_p_equal = 1
n_iter = args.niter
p_min = args.pmin
p_max = args.pmax
p_diff = args.pdiff

if not noheader:
print("module,len_Q,len_T,n_iter,time", flush=True)

start_timing = time.time()
for mod in modules:
mod_name = mod.__name__.removeprefix("sdp.").removesuffix("_sdp")
mod.setup(np.random.rand(2), np.random.rand(2))

for i in range(p_min, p_max + 1):
Q = np.random.rand(2**i)

j_range = range(i + skip_p_equal, min(i + p_diff + 1, p_max + 1))
timing = np.zeros(len(j_range), dtype=np.float64)
for _ in range(n_iter):
lst = []
for j in j_range:
T = np.random.rand(2**j)

start = time.time()
mod.sliding_dot_product(Q, T)
diff = time.time() - start
lst.append(diff)

timing += np.array(lst)

timing /= n_iter

for j_index, j in enumerate(j_range):
T = np.random.rand(2**j)
info = f"{mod_name},{len(Q)},{len(T)},{n_iter}" + f",{timing[j_index]}"
print(info, flush=True)

elapsed_timing = np.round((time.time() - start_timing) / 60.0, 2)
warnings.warn(f"Test completed in {elapsed_timing} min")
Loading