Skip to content

Commit 2d8854a

Browse files
Add new sdp functions (#1128)
* improve njit_sdp * simplify function * add new sdp func and its test * add pocketfft sdp and test * fixed missing imports * fixed coverage * refactored test functions * minor change * enhanced test cases * add minor comment * Minor change
1 parent bd6aed8 commit 2d8854a

File tree

2 files changed

+200
-30
lines changed

2 files changed

+200
-30
lines changed

stumpy/sdp.py

Lines changed: 61 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import numpy as np
22
from numba import njit
3-
from scipy.signal import convolve
3+
from scipy.fft import next_fast_len
4+
from scipy.fft._pocketfft.basic import c2r, r2c
5+
from scipy.signal import convolve, oaconvolve
46

57
from . import config
68

@@ -23,11 +25,14 @@ def _njit_sliding_dot_product(Q, T):
2325
out : numpy.ndarray
2426
Sliding dot product between `Q` and `T`.
2527
"""
26-
m = Q.shape[0]
27-
l = T.shape[0] - m + 1
28+
m = len(Q)
29+
l = len(T) - m + 1
2830
out = np.empty(l)
2931
for i in range(l):
30-
out[i] = np.dot(Q, T[i : i + m])
32+
result = 0.0
33+
for j in range(m):
34+
result += Q[j] * T[i + j]
35+
out[i] = result
3136

3237
return out
3338

@@ -49,12 +54,59 @@ def _convolve_sliding_dot_product(Q, T):
4954
output : numpy.ndarray
5055
Sliding dot product between `Q` and `T`.
5156
"""
52-
n = T.shape[0]
53-
m = Q.shape[0]
54-
Qr = np.flipud(Q) # Reverse/flip Q
55-
QT = convolve(Qr, T)
57+
return convolve(np.flipud(Q), T, mode="valid")
5658

57-
return QT.real[m - 1 : n]
59+
60+
def _oaconvolve_sliding_dot_product(Q, T):
61+
"""
62+
Use scipy's oaconvolve to calculate the sliding dot product.
63+
64+
Parameters
65+
----------
66+
Q : numpy.ndarray
67+
Query array or subsequence
68+
69+
T : numpy.ndarray
70+
Time series or sequence
71+
72+
Returns
73+
-------
74+
output : numpy.ndarray
75+
Sliding dot product between `Q` and `T`.
76+
"""
77+
return oaconvolve(np.ascontiguousarray(Q[::-1]), T, mode="valid")
78+
79+
80+
def _pocketfft_sliding_dot_product(Q, T):
81+
"""
82+
Use scipy.fft._pocketfft to compute
83+
the sliding dot product.
84+
85+
Parameters
86+
----------
87+
Q : numpy.ndarray
88+
Query array or subsequence
89+
90+
T : numpy.ndarray
91+
Time series or sequence
92+
93+
Returns
94+
-------
95+
output : numpy.ndarray
96+
Sliding dot product between `Q` and `T`.
97+
"""
98+
n = len(T)
99+
m = len(Q)
100+
next_fast_n = next_fast_len(n, real=True)
101+
102+
tmp = np.empty((2, next_fast_n))
103+
tmp[0, :m] = Q[::-1]
104+
tmp[0, m:] = 0.0
105+
tmp[1, :n] = T
106+
tmp[1, n:] = 0.0
107+
fft_2d = r2c(True, tmp, axis=-1)
108+
109+
return c2r(False, np.multiply(fft_2d[0], fft_2d[1]), n=next_fast_n)[m - 1 : n]
58110

59111

60112
def _sliding_dot_product(Q, T):

tests/test_sdp.py

Lines changed: 139 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,154 @@
1+
import inspect
2+
import warnings
3+
from operator import eq, lt
4+
15
import naive
26
import numpy as np
37
import pytest
48
from numpy import testing as npt
59

610
from stumpy import sdp
711

8-
test_data = [
9-
(np.array([-1, 1, 2], dtype=np.float64), np.array(range(5), dtype=np.float64)),
12+
# README
13+
# Real FFT algorithm performs more efficiently when the length
14+
# of the input array `arr` is composed of small prime factors.
15+
# The next_fast_len(arr, real=True) function from Scipy returns
16+
# the same length if len(arr) is composed of a subset of
17+
# prime numbers 2, 3, 5. Therefore, these radices are
18+
# considered as the most efficient for the real FFT algorithm.
19+
20+
# To ensure that the tests cover different cases, the following cases
21+
# are considered:
22+
# 1. len(T) is even, and len(T) == next_fast_len(len(T), real=True)
23+
# 2. len(T) is odd, and len(T) == next_fast_len(len(T), real=True)
24+
# 3. len(T) is even, and len(T) < next_fast_len(len(T), real=True)
25+
# 4. len(T) is odd, and len(T) < next_fast_len(len(T), real=True)
26+
# And 5. a special case of 1, where len(T) is power of 2.
27+
28+
# Therefore:
29+
# 1. len(T) is composed of 2 and a subset of {3, 5}
30+
# 2. len(T) is composed of a subset of {3, 5}
31+
# 3. len(T) is composed of a subset of {7, 11, 13, ...} and 2
32+
# 4. len(T) is composed of a subset of {7, 11, 13, ...}
33+
# 5. len(T) is power of 2
34+
35+
# In some cases, the prime factors are raised to a power of
36+
# certain degree to increase the length of array to be around
37+
# 1000-2000. This allows us to test sliding_dot_product for
38+
# wider range of query lengths.
39+
40+
# test cases 1-4
41+
test_inputs = [
42+
# Input format:
43+
# (
44+
# len(T),
45+
# remainder, # from `len(T) % 2`
46+
# comparator, # for len(T) comparator next_fast_len(len(T), real=True)
47+
# )
48+
(
49+
2 * (3**2) * (5**3),
50+
0,
51+
eq,
52+
), # = 2250, Even `len(T)`, and `len(T) == next_fast_len(len(T), real=True)`
53+
(
54+
(3**2) * (5**3),
55+
1,
56+
eq,
57+
), # = 1125, Odd `len(T)`, and `len(T) == next_fast_len(len(T), real=True)`.
58+
(
59+
2 * 7 * 11 * 13,
60+
0,
61+
lt,
62+
), # = 2002, Even `len(T)`, and `len(T) < next_fast_len(len(T), real=True)`
1063
(
11-
np.array([9, 8100, -60], dtype=np.float64),
12-
np.array([584, -11, 23, 79, 1001], dtype=np.float64),
13-
),
14-
(np.random.uniform(-1000, 1000, [8]), np.random.uniform(-1000, 1000, [64])),
64+
7 * 11 * 13,
65+
1,
66+
lt,
67+
), # = 1001, Odd `len(T)`, and `len(T) < next_fast_len(len(T), real=True)`
1568
]
1669

1770

18-
@pytest.mark.parametrize("Q, T", test_data)
19-
def test_njit_sliding_dot_product(Q, T):
20-
ref_mp = naive.rolling_window_dot_product(Q, T)
21-
comp_mp = sdp._njit_sliding_dot_product(Q, T)
22-
npt.assert_almost_equal(ref_mp, comp_mp)
71+
def get_sdp_function_names():
72+
out = []
73+
for func_name, func in inspect.getmembers(sdp, inspect.isfunction):
74+
if func_name.endswith("sliding_dot_product"):
75+
out.append(func_name)
76+
77+
return out
78+
79+
80+
@pytest.mark.parametrize("n_T, remainder, comparator", test_inputs)
81+
def test_sdp(n_T, remainder, comparator):
82+
# test_sdp for cases 1-4
83+
84+
n_Q_prime = [
85+
2,
86+
3,
87+
5,
88+
7,
89+
11,
90+
13,
91+
17,
92+
19,
93+
23,
94+
29,
95+
31,
96+
37,
97+
41,
98+
43,
99+
47,
100+
53,
101+
59,
102+
61,
103+
67,
104+
71,
105+
73,
106+
79,
107+
83,
108+
89,
109+
97,
110+
]
111+
n_Q_power2 = [2, 4, 8, 16, 32, 64]
112+
n_Q_values = n_Q_prime + n_Q_power2 + [n_T]
113+
n_Q_values = sorted(n_Q for n_Q in set(n_Q_values) if n_Q <= n_T)
114+
115+
for n_Q in n_Q_values:
116+
Q = np.random.rand(n_Q)
117+
T = np.random.rand(n_T)
118+
ref = naive.rolling_window_dot_product(Q, T)
119+
for func_name in get_sdp_function_names():
120+
func = getattr(sdp, func_name)
121+
try:
122+
comp = func(Q, T)
123+
npt.assert_allclose(comp, ref)
124+
except Exception as e: # pragma: no cover
125+
msg = f"Error in {func_name}, with n_Q={n_Q} and n_T={n_T}"
126+
warnings.warn(msg)
127+
raise e
128+
129+
130+
def test_sdp_power2():
131+
# test for case 5. len(T) is power of 2
132+
pmin = 3
133+
pmax = 13
23134

135+
for func_name in get_sdp_function_names():
136+
func = getattr(sdp, func_name)
137+
try:
138+
for q in range(pmin, pmax + 1):
139+
n_Q = 2**q
140+
for p in range(q, pmax + 1):
141+
n_T = 2**p
142+
Q = np.random.rand(n_Q)
143+
T = np.random.rand(n_T)
24144

25-
@pytest.mark.parametrize("Q, T", test_data)
26-
def test_convolve_sliding_dot_product(Q, T):
27-
ref_mp = naive.rolling_window_dot_product(Q, T)
28-
comp_mp = sdp._convolve_sliding_dot_product(Q, T)
29-
npt.assert_almost_equal(ref_mp, comp_mp)
145+
ref = naive.rolling_window_dot_product(Q, T)
146+
comp = func(Q, T)
147+
npt.assert_allclose(comp, ref)
30148

149+
except Exception as e: # pragma: no cover
150+
msg = f"Error in {func_name}, with q={q} and p={p}"
151+
warnings.warn(msg)
152+
raise e
31153

32-
@pytest.mark.parametrize("Q, T", test_data)
33-
def test_sliding_dot_product(Q, T):
34-
ref_mp = naive.rolling_window_dot_product(Q, T)
35-
comp_mp = sdp._sliding_dot_product(Q, T)
36-
npt.assert_almost_equal(ref_mp, comp_mp)
154+
return

0 commit comments

Comments
 (0)