Skip to content

Commit 2cb167c

Browse files
authored
Use BETWEEN queries in the API as much as possible (#977)
1 parent 27ce3f5 commit 2cb167c

File tree

5 files changed

+148
-9
lines changed

5 files changed

+148
-9
lines changed

src/server/_query.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from ._exceptions import DatabaseErrorException
2323
from ._validate import DateRange, extract_strings
2424
from ._params import GeoPair, SourceSignalPair, TimePair
25+
from .utils import dates_to_ranges
2526

2627

2728
def date_string(value: int) -> str:
@@ -89,7 +90,8 @@ def filter_dates(
8990
param_key: str,
9091
params: Dict[str, Any],
9192
):
92-
return filter_values(field, values, param_key, params, date_string)
93+
ranges = dates_to_ranges(values)
94+
return filter_values(field, ranges, param_key, params, date_string)
9395

9496

9597
def filter_fields(generator: Iterable[Dict[str, Any]]):
@@ -185,7 +187,8 @@ def filter_pair(pair: TimePair, i) -> str:
185187
params[type_param] = pair.time_type
186188
if isinstance(pair.time_values, bool) and pair.time_values:
187189
return f"{type_field} = :{type_param}"
188-
return f"({type_field} = :{type_param} AND {filter_integers(time_field, cast(Sequence[Union[int, Tuple[int,int]]], pair.time_values), type_param, params)})"
190+
ranges = dates_to_ranges(pair.time_values)
191+
return f"({type_field} = :{type_param} AND {filter_integers(time_field, cast(Sequence[Union[int, Tuple[int,int]]], ranges), type_param, params)})"
189192

190193
parts = [filter_pair(p, i) for i, p in enumerate(values)]
191194

src/server/utils/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
from .dates import shift_time_value, date_to_time_value, time_value_to_iso, time_value_to_date, days_in_range, weeks_in_range, shift_week_value, week_to_time_value, week_value_to_week, guess_time_value_is_day
1+
from .dates import shift_time_value, date_to_time_value, time_value_to_iso, time_value_to_date, days_in_range, weeks_in_range, shift_week_value, week_to_time_value, week_value_to_week, guess_time_value_is_day, dates_to_ranges

src/server/utils/dates.py

+102-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
1-
from typing import Tuple
1+
from typing import (
2+
Optional,
3+
Sequence,
4+
Tuple,
5+
Union
6+
)
27
from datetime import date, timedelta
38
from epiweeks import Week, Year
49

@@ -23,6 +28,10 @@ def guess_time_value_is_day(value: int) -> bool:
2328
# YYYYMMDD type and not YYYYMM
2429
return len(str(value)) > 6
2530

31+
def guess_time_value_is_week(value: int) -> bool:
32+
# YYYYWW type and not YYYYMMDD
33+
return len(str(value)) == 6
34+
2635
def date_to_time_value(d: date) -> int:
2736
return int(d.strftime("%Y%m%d"))
2837

@@ -67,3 +76,95 @@ def weeks_in_range(week_range: Tuple[int, int]) -> int:
6776
year = Year(y)
6877
acc += year.totalweeks()
6978
return acc + 1 # same week should lead to 1 week that will be queried
79+
80+
def dates_to_ranges(values: Optional[Sequence[Union[Tuple[int, int], int]]]) -> Optional[Sequence[Union[Tuple[int, int], int]]]:
81+
"""
82+
Converts a mixed list of dates and date ranges to an optimized list where dates are merged into ranges where possible.
83+
e.g. [20200101, 20200102, (20200101, 20200104), 20200106] -> [(20200101, 20200104), 20200106]
84+
(the first two values of the original list are merged into a single range)
85+
"""
86+
if not values or len(values) <= 1:
87+
return values
88+
89+
# determine whether the list is of days (YYYYMMDD) or weeks (YYYYWW) based on first element
90+
try:
91+
if (isinstance(values[0], tuple) and guess_time_value_is_day(values[0][0]))\
92+
or (isinstance(values[0], int) and guess_time_value_is_day(values[0])):
93+
return days_to_ranges(values)
94+
elif (isinstance(values[0], tuple) and guess_time_value_is_week(values[0][0]))\
95+
or (isinstance(values[0], int) and guess_time_value_is_week(values[0])):
96+
return weeks_to_ranges(values)
97+
else:
98+
return values
99+
except:
100+
return values
101+
102+
def days_to_ranges(values: Sequence[Union[Tuple[int, int], int]]) -> Sequence[Union[Tuple[int, int], int]]:
103+
intervals = []
104+
105+
# populate list of intervals based on original values
106+
for v in values:
107+
if isinstance(v, int):
108+
# 20200101 -> [20200101, 20200101]
109+
intervals.append([time_value_to_date(v), time_value_to_date(v)])
110+
else: # tuple
111+
# (20200101, 20200102) -> [20200101, 20200102]
112+
intervals.append([time_value_to_date(v[0]), time_value_to_date(v[1])])
113+
114+
intervals.sort(key=lambda x: x[0])
115+
116+
# merge overlapping intervals https://leetcode.com/problems/merge-intervals/
117+
merged = []
118+
for interval in intervals:
119+
# no overlap, append the interval
120+
# caveat: we subtract 1 from interval[0] so that contiguous intervals are considered overlapping. i.e. [1, 1], [2, 2] -> [1, 2]
121+
if not merged or merged[-1][1] < interval[0] - timedelta(days=1):
122+
merged.append(interval)
123+
# overlap, merge the current and previous intervals
124+
else:
125+
merged[-1][1] = max(merged[-1][1], interval[1])
126+
127+
# convert intervals from dates back to integers
128+
ranges = []
129+
for m in merged:
130+
if m[0] == m[1]:
131+
ranges.append(date_to_time_value(m[0]))
132+
else:
133+
ranges.append((date_to_time_value(m[0]), date_to_time_value(m[1])))
134+
135+
return ranges
136+
137+
def weeks_to_ranges(values: Sequence[Union[Tuple[int, int], int]]) -> Sequence[Union[Tuple[int, int], int]]:
138+
intervals = []
139+
140+
# populate list of intervals based on original values
141+
for v in values:
142+
if isinstance(v, int):
143+
# 202001 -> [202001, 202001]
144+
intervals.append([week_value_to_week(v), week_value_to_week(v)])
145+
else: # tuple
146+
# (202001, 202002) -> [202001, 202002]
147+
intervals.append([week_value_to_week(v[0]), week_value_to_week(v[1])])
148+
149+
intervals.sort(key=lambda x: x[0])
150+
151+
# merge overlapping intervals https://leetcode.com/problems/merge-intervals/
152+
merged = []
153+
for interval in intervals:
154+
# no overlap, append the interval
155+
# caveat: we subtract 1 from interval[0] so that contiguous intervals are considered overlapping. i.e. [1, 1], [2, 2] -> [1, 2]
156+
if not merged or merged[-1][1] < interval[0] - 1:
157+
merged.append(interval)
158+
# overlap, merge the current and previous intervals
159+
else:
160+
merged[-1][1] = max(merged[-1][1], interval[1])
161+
162+
# convert intervals from weeks back to integers
163+
ranges = []
164+
for m in merged:
165+
if m[0] == m[1]:
166+
ranges.append(week_to_time_value(m[0]))
167+
else:
168+
ranges.append((week_to_time_value(m[0]), week_to_time_value(m[1])))
169+
170+
return ranges

tests/server/test_query.py

+25-4
Original file line numberDiff line numberDiff line change
@@ -89,23 +89,44 @@ def test_filter_dates(self):
8989
self.assertEqual(filter_dates("a", [20200101], "a", params), "(a = :a_0)")
9090
self.assertEqual(params, {"a_0": "2020-01-01"})
9191
params = {}
92+
self.assertEqual(filter_dates("a", [20200101, 20200101, (20200101, 20200101), 20200101], "a", params), "(a = :a_0)")
93+
self.assertEqual(params, {"a_0": "2020-01-01"})
94+
params = {}
9295
self.assertEqual(
9396
filter_dates("a", [20200101, 20200102], "a", params),
97+
"(a BETWEEN :a_0 AND :a_0_2)",
98+
)
99+
self.assertEqual(params, {"a_0": "2020-01-01", "a_0_2": "2020-01-02"})
100+
params = {}
101+
self.assertEqual(
102+
filter_dates("a", [20200101, 20200103], "a", params),
94103
"(a = :a_0 OR a = :a_1)",
95104
)
96-
self.assertEqual(params, {"a_0": "2020-01-01", "a_1": "2020-01-02"})
105+
self.assertEqual(params, {"a_0": "2020-01-01", "a_1": "2020-01-03"})
97106
params = {}
98107
self.assertEqual(
99108
filter_dates("a", [20200101, 20200102, (20200101, 20200104)], "a", params),
109+
"(a BETWEEN :a_0 AND :a_0_2)",
110+
)
111+
self.assertEqual(
112+
params,
113+
{
114+
"a_0": "2020-01-01",
115+
"a_0_2": "2020-01-04"
116+
},
117+
)
118+
params = {}
119+
self.assertEqual(
120+
filter_dates("a", [20200101, 20200103, (20200105, 20200107)], "a", params),
100121
"(a = :a_0 OR a = :a_1 OR a BETWEEN :a_2 AND :a_2_2)",
101122
)
102123
self.assertEqual(
103124
params,
104125
{
105126
"a_0": "2020-01-01",
106-
"a_1": "2020-01-02",
107-
"a_2": "2020-01-01",
108-
"a_2_2": "2020-01-04",
127+
"a_1": "2020-01-03",
128+
"a_2": "2020-01-05",
129+
"a_2_2": "2020-01-07",
109130
},
110131
)
111132

tests/server/utils/test_dates.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from datetime import date
33
from epiweeks import Week
44

5-
from delphi.epidata.server.utils.dates import time_value_to_date, date_to_time_value, shift_time_value, time_value_to_iso, days_in_range, weeks_in_range, week_to_time_value, week_value_to_week
5+
from delphi.epidata.server.utils.dates import time_value_to_date, date_to_time_value, shift_time_value, time_value_to_iso, days_in_range, weeks_in_range, week_to_time_value, week_value_to_week, dates_to_ranges
66

77

88
class UnitTests(unittest.TestCase):
@@ -40,3 +40,17 @@ def test_week_value_to_week(self):
4040
def test_week_to_time_value(self):
4141
self.assertEqual(week_to_time_value(Week(2021, 1)), 202101)
4242
self.assertEqual(week_to_time_value(Week(2020, 42)), 202042)
43+
44+
def test_dates_to_ranges(self):
45+
self.assertEqual(dates_to_ranges(None), None)
46+
self.assertEqual(dates_to_ranges([]), [])
47+
# days
48+
self.assertEqual(dates_to_ranges([20200101]), [20200101])
49+
self.assertEqual(dates_to_ranges([(20200101, 20200105)]), [(20200101, 20200105)])
50+
self.assertEqual(dates_to_ranges([20211231, (20211230, 20220102), 20220102]), [(20211230, 20220102)])
51+
self.assertEqual(dates_to_ranges([20200101, 20200102, (20200101, 20200104), 20200106]), [(20200101, 20200104), 20200106])
52+
# weeks
53+
self.assertEqual(dates_to_ranges([202001]), [202001])
54+
self.assertEqual(dates_to_ranges([(202001, 202005)]), [(202001, 202005)])
55+
self.assertEqual(dates_to_ranges([202051, (202050, 202102), 202101]), [(202050, 202102)])
56+
self.assertEqual(dates_to_ranges([202050, 202051, (202050, 202101), 202103]), [(202050, 202101), 202103])

0 commit comments

Comments
 (0)