Skip to content

Commit 1b52a17

Browse files
committed
numba compatible datetime converter
Even with datetime conversions removed from the group processing loop, the conversion time using datetime.datetime() remains slow. After trying attempts using some datetime conversion approaches with pandas I was still unable to achieve a significant performance boost. Numba does not support the creation of datetime objects, however it does support datetime arithmetic. This commit adds in a numba compatible datetime conversion function which calculates a dates offset from the epoch and adds the appropriate timedelta64 objects to return a datetime64 object.
1 parent 1a62f38 commit 1b52a17

File tree

1 file changed

+26
-0
lines changed

1 file changed

+26
-0
lines changed

HSP2tools/readWDM.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,14 @@ def readWDM(wdmfile, hdffile, compress_output=True):
3030
iarray = np.fromfile(wdmfile, dtype=np.int32)
3131
farray = np.fromfile(wdmfile, dtype=np.float32)
3232

33+
date_epoch = np.datetime64(0,'Y')
34+
dt_year = np.timedelta64(1, 'Y')
35+
dt_month = np.timedelta64(1, 'M')
36+
dt_day = np.timedelta64(1, 'D')
37+
dt_hour = np.timedelta64(1, 'h')
38+
dt_minute = np.timedelta64(1, 'm')
39+
dt_second = np.timedelta64(1, 's')
40+
3341
if iarray[0] != -998:
3442
raise ValueError ('Provided file does not match WDM format. First int32 should be -998.')
3543
nrecords = iarray[28] # first record is File Definition Record
@@ -124,6 +132,9 @@ def readWDM(wdmfile, hdffile, compress_output=True):
124132
series = pd.Series(values, index=dates)
125133
index = series.index.to_series()
126134
series.index = index.apply(lambda x: datetime.datetime(*bits_to_date(x)))
135+
dates = np.array(dates)
136+
dates_converted = date_convert(dates, date_epoch, dt_year, dt_month, dt_day, dt_hour, dt_minute, dt_second)
137+
series = pd.Series(values, index=dates_converted)
127138

128139
dsname = f'TIMESERIES/TS{dsn:03d}'
129140
if compress_output:
@@ -244,6 +255,21 @@ def _is_leapyear(year):
244255
else:
245256
return False
246257

258+
@njit
259+
def date_convert(dates, date_epoch, dt_year, dt_month, dt_day, dt_hour, dt_minute, dt_second):
260+
converted_dates = []
261+
for x in dates:
262+
year, month, day, hour, minute, second = bits_to_date(x)
263+
date = date_epoch
264+
date += (year - 1970) * dt_year
265+
date += (month - 1) * dt_month
266+
date += (day - 1) * dt_day
267+
date += hour * dt_hour
268+
date += minute * dt_minute
269+
date += second * dt_second
270+
converted_dates.append(date)
271+
return converted_dates
272+
247273
@njit
248274
def _process_groups(iarray, farray, records, offsets, tgroup):
249275
date_array = [0] #need initialize with a type for numba

0 commit comments

Comments
 (0)