Skip to content

Commit ba64128

Browse files
authored
Merge pull request #37 from LimnoTech/develop_readWDM2
Merge develop_readWDM into develop to read time series by block & group
2 parents 4b55545 + 12dc1d5 commit ba64128

20 files changed

+644580
-496
lines changed

HSP2/utilities.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ def transform(ts, name, how, siminfo):
7777
if freq == tsfreq:
7878
pass
7979
elif tsfreq == None: # Sparse time base, frequency not defined
80-
ts = ts.reindex(siminfo['tbase']).ffill().bfill()
80+
ts = ts.reindex(siminfo['tbase']).ffill().bfill()
8181
elif how == 'SAME':
8282
ts = ts.resample(freq).ffill() # tsfreq >= freq assumed, or bad user choice
8383
elif not how:
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
{
2+
"ExpandedNodes": [
3+
""
4+
],
5+
"SelectedNode": "\\readWDM.py",
6+
"PreviewInSolutionExplorer": false
7+
}

HSP2tools/readWDM.py

Lines changed: 208 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,11 @@
77

88
import numpy as np
99
import pandas as pd
10-
from numba import jit
10+
from numba import jit, njit
1111
import datetime
1212

13+
import warnings
14+
1315
# look up attributes NAME, data type (Integer; Real; String) and data length by attribute number
1416
attrinfo = {1:('TSTYPE','S',4), 2:('STAID','S',16), 11:('DAREA','R',1),
1517
17:('TCODE','I',1), 27:('TSBYR','I',1), 28:('TSBMO','I',1),
@@ -24,13 +26,20 @@
2426
freq = {7:'100YS', 6:'YS', 5:'MS', 4:'D', 3:'H', 2:'min', 1:'S'} # pandas date_range() frequency by TCODE, TGROUP
2527

2628

27-
def readWDM(wdmfile, hdffile, jupyterlab=True):
29+
def readWDM(wdmfile, hdffile, compress_output=False):
2830
iarray = np.fromfile(wdmfile, dtype=np.int32)
2931
farray = np.fromfile(wdmfile, dtype=np.float32)
3032

33+
date_epoch = np.datetime64(0,'Y')
34+
dt_year = np.timedelta64(1, 'Y')
35+
dt_month = np.timedelta64(1, 'M')
36+
dt_day = np.timedelta64(1, 'D')
37+
dt_hour = np.timedelta64(1, 'h')
38+
dt_minute = np.timedelta64(1, 'm')
39+
dt_second = np.timedelta64(1, 's')
40+
3141
if iarray[0] != -998:
32-
print('Not a WDM file, magic number is not -990. Stopping!')
33-
return
42+
raise ValueError ('Provided file does not match WDM format. First int32 should be -998.')
3443
nrecords = iarray[28] # first record is File Definition Record
3544
ntimeseries = iarray[31]
3645

@@ -39,7 +48,7 @@ def readWDM(wdmfile, hdffile, jupyterlab=True):
3948
if not (iarray[index]==0 and iarray[index+1]==0 and iarray[index+2]==0 and iarray[index+3]==0) and iarray[index+5]==1:
4049
dsnlist.append(index)
4150
if len(dsnlist) != ntimeseries:
42-
print('PROGRAM ERROR, wrong number of DSN records found')
51+
raise RuntimeError (f'Wrong number of Time Series Records found expecting:{ntimeseries} found:{len(dsnlist)}')
4352

4453
with pd.HDFStore(hdffile) as store:
4554
summary = []
@@ -66,7 +75,7 @@ def readWDM(wdmfile, hdffile, jupyterlab=True):
6675
elif atype == 'R':
6776
dattr[name] = farray[ptr]
6877
else:
69-
dattr[name] = ''.join([itostr(iarray[k]) for k in range(ptr, ptr + length // 4)]).strip()
78+
dattr[name] = ''.join([_inttostr(iarray[k]) for k in range(ptr, ptr + length // 4)]).strip()
7079
if att not in dattr:
7180
found_in_all = False
7281
if found_in_all:
@@ -98,50 +107,49 @@ def readWDM(wdmfile, hdffile, jupyterlab=True):
98107
elif atype == 'R':
99108
dattr[name] = farray[ptr]
100109
else:
101-
dattr[name] = ''.join([itostr(iarray[k]) for k in range(ptr, ptr + length//4)]).strip()
110+
dattr[name] = ''.join([_inttostr(iarray[k]) for k in range(ptr, ptr + length//4)]).strip()
102111

103112
# Get timeseries timebase data
104-
records = []
113+
records = []
114+
offsets = []
105115
for i in range(pdat+1, pdatv-1):
106116
a = iarray[index+i]
107117
if a != 0:
108-
records.append(splitposition(a))
118+
record, offset = _splitposition(a)
119+
records.append(record)
120+
offsets.append(offset)
109121
if len(records) == 0:
110-
continue # WDM preallocation, but nothing saved here yet
111-
112-
srec, soffset = records[0]
113-
start = splitdate(iarray[srec*512 + soffset])
114-
115-
sprec, spoffset = splitposition(frepos)
116-
finalindex = sprec * 512 + spoffset
122+
continue
117123

118124
# calculate number of data points in each group, tindex is final index for storage
119125
tgroup = dattr['TGROUP']
120126
tstep = dattr['TSSTEP']
121127
tcode = dattr['TCODE']
122-
cindex = pd.date_range(start=start, periods=len(records)+1, freq=freq[tgroup])
123-
tindex = pd.date_range(start=start, end=cindex[-1], freq=str(tstep) + freq[tcode])
124-
counts = np.diff(np.searchsorted(tindex, cindex))
125-
126-
## Get timeseries data
127-
floats = np.zeros(sum(counts), dtype=np.float32)
128-
findex = 0
129-
for (rec,offset),count in zip(records, counts):
130-
findex = getfloats(iarray, farray, floats, findex, rec, offset, count, finalindex, tcode, tstep)
131-
132-
## Write to HDF5 file
133-
series = pd.Series(floats[:findex], index=tindex[:findex])
128+
129+
records = np.asarray(records)
130+
offsets = np.asarray(offsets)
131+
132+
dates, values, stop_datetime = _process_groups(iarray, farray, records, offsets, tgroup)
133+
stop_datetime = datetime.datetime(*_bits_to_date(stop_datetime))
134+
dates = np.array(dates)
135+
dates_converted = _date_convert(dates, date_epoch, dt_year, dt_month, dt_day, dt_hour, dt_minute, dt_second)
136+
series = pd.Series(values, index=dates_converted)
137+
try:
138+
series.index.freq = str(tstep) + freq[tcode]
139+
except ValueError:
140+
series.index.freq = None
141+
134142
dsname = f'TIMESERIES/TS{dsn:03d}'
135-
# series.to_hdf(store, dsname, complib='blosc', complevel=9)
136-
if jupyterlab:
137-
series.to_hdf(store, dsname, complib='blosc', complevel=9) # This is the official version
143+
if compress_output:
144+
series.to_hdf(store, dsname, complib='blosc', complevel=9)
138145
else:
139-
series.to_hdf(store, dsname, format='t', data_columns=True) # show the columns in HDFView
146+
series.to_hdf(store, dsname, format='t', data_columns=True)
140147

141-
data = [str(tindex[0]), str(tindex[-1]), str(tstep) + freq[tcode],
142-
len(series), dattr['TSTYPE'], dattr['TFILL']]
148+
data = [
149+
str(series.index[0]), str(stop_datetime), str(tstep) + freq[tcode],
150+
len(series), dattr['TSTYPE'], dattr['TFILL']
151+
]
143152
columns = ['Start', 'Stop', 'Freq','Length', 'TSTYPE', 'TFILL']
144-
# search = ['STAID', 'STNAM', 'SCENARIO', 'CONSTITUENT','LOCATION']
145153
for x in columns_to_add:
146154
if x in dattr:
147155
data.append(dattr[x])
@@ -150,11 +158,165 @@ def readWDM(wdmfile, hdffile, jupyterlab=True):
150158
summary.append(data)
151159
summaryindx.append(dsname[11:])
152160

153-
154161
dfsummary = pd.DataFrame(summary, index=summaryindx, columns=columns)
155162
store.put('TIMESERIES/SUMMARY',dfsummary, format='t', data_columns=True)
163+
156164
return dfsummary
157165

166+
@njit
167+
def _splitdate(x):
168+
year = np.int64(x >> 14)
169+
month = np.int64(x >> 10 & 0xF)
170+
day = np.int64(x >> 5 & 0x1F)
171+
hour = np.int64(x & 0x1F)
172+
return _correct_date(year, month, day, hour, 0,0)
173+
174+
@njit
175+
def _splitcontrol(x):
176+
nval = x >> 16
177+
ltstep = x >> 10 & 0x3f
178+
ltcode = x >> 7 & 0x7
179+
comp = x >> 5 & 0x3
180+
qual = x & 0x1f
181+
return nval, ltstep, ltcode, comp, qual
182+
183+
@njit
184+
def _splitposition(x):
185+
return((x>>9) - 1, (x&0x1FF) - 1) #args: record, offset
186+
187+
@njit
188+
def _inttostr(i):
189+
return chr(i & 0xFF) + chr(i>>8 & 0xFF) + chr(i>>16 & 0xFF) + chr(i>>24 & 0xFF)
190+
191+
@njit
192+
def _bits_to_date(x):
193+
year = x >> 26
194+
month = x >> 22 & 0xf
195+
day = x >> 17 & 0x1f
196+
hour = x >> 12 & 0x1f
197+
minute = x >> 6 & 0x3f
198+
second = x & 0x3f
199+
return year, month, day, hour, minute, second
200+
201+
@njit
202+
def _date_to_bits(year, month, day, hour, minute, second):
203+
x = year << 26 | month << 22 | day << 17 | hour << 12 | minute << 6 | second
204+
return x
205+
206+
@njit
207+
def _increment_date(date, timecode, timestep):
208+
year, month, day, hour, minute, second = _bits_to_date(date)
209+
210+
if timecode == 7: year += 100 * timestep
211+
elif timecode == 6 : year += timestep
212+
elif timecode == 5 : month += timestep
213+
elif timecode == 4 : day += timestep
214+
elif timecode == 3 : hour += timestep
215+
elif timecode == 2 : minute += timestep
216+
elif timecode == 1 : second += timestep
217+
218+
return _correct_date(year, month, day, hour, minute, second)
219+
220+
@njit
221+
def _correct_date(year, month, day, hour, minute, second):
222+
while second >= 60:
223+
second -= 60
224+
minute += 1
225+
while minute >= 60:
226+
minute -= 60
227+
hour += 1
228+
while hour >= 24:
229+
hour -= 24
230+
day += 1
231+
while day > _days_in_month(year, month):
232+
day -= _days_in_month(year, month)
233+
month += 1
234+
while month > 12:
235+
month -= 12
236+
year += 1
237+
return _date_to_bits(year, month, day, hour, minute, second)
238+
239+
@njit
240+
def _days_in_month(year, month):
241+
if month > 12: month %= 12
242+
243+
if month in (1,3,5,7,8,10,12):
244+
return 31
245+
elif month in (4,6,9,11):
246+
return 30
247+
elif month == 2:
248+
if _is_leapyear(year): return 29
249+
else: return 28
250+
251+
@njit
252+
def _is_leapyear(year):
253+
if year % 400 == 0:
254+
return True
255+
if year % 100 == 0:
256+
return False
257+
if year % 4 == 0:
258+
return True
259+
else:
260+
return False
261+
262+
@njit
263+
def _date_convert(dates, date_epoch, dt_year, dt_month, dt_day, dt_hour, dt_minute, dt_second):
264+
converted_dates = []
265+
for x in dates:
266+
year, month, day, hour, minute, second = _bits_to_date(x)
267+
date = date_epoch
268+
date += (year - 1970) * dt_year
269+
date += (month - 1) * dt_month
270+
date += (day - 1) * dt_day
271+
date += hour * dt_hour
272+
date += minute * dt_minute
273+
date += second * dt_second
274+
converted_dates.append(date)
275+
return converted_dates
276+
277+
@njit
278+
def _process_groups(iarray, farray, records, offsets, tgroup):
279+
date_array = [0] #need initialize with a type for numba
280+
value_array = [0.0]
281+
282+
for i in range(0,len(records)):
283+
record = records[i]
284+
offset = offsets[i]
285+
index = record * 512 + offset
286+
pscfwr = iarray[record * 512 + 3] #should be 0 for last record in timeseries
287+
current_date = _splitdate(iarray[index])
288+
group_enddate = _increment_date(current_date, tgroup, 1)
289+
offset +=1
290+
index +=1
291+
292+
while current_date < group_enddate:
293+
nval, ltstep, ltcode, comp, qual = _splitcontrol(iarray[index])
294+
#compressed - only has single value which applies to full range
295+
if comp == 1:
296+
for i in range(0, nval, 1):
297+
date_array.append(current_date)
298+
current_date = _increment_date(current_date, ltcode, ltstep)
299+
value_array.append(farray[index + 1])
300+
index += 2
301+
offset +=2
302+
else:
303+
for i in range(0, nval, 1):
304+
date_array.append(current_date)
305+
current_date = _increment_date(current_date, ltcode, ltstep)
306+
value_array.append(farray[index + 1 + i])
307+
index += 1 + nval
308+
offset +=1 + nval
309+
310+
if offset >= 511:
311+
offset = 4
312+
index = (pscfwr - 1) * 512 + offset
313+
record = pscfwr
314+
pscfwr = iarray[(record - 1) * 512 + 3] #should be 0 for last record in timeseries
315+
316+
date_array = date_array[1:]
317+
value_array = value_array[1:]
318+
319+
return date_array, value_array, group_enddate
158320

159321
'''
160322
Get single time series data from a WDM file
@@ -289,41 +451,39 @@ def get_wdm_data_set(wdmfile, attributes):
289451

290452
return None
291453

454+
########################
455+
### legacy functions ###
456+
########################
292457

293458
def todatetime(yr=1900, mo=1, dy=1, hr=0):
294459
'''takes yr,mo,dy,hr information then returns its datetime64'''
460+
warnings.warn("use '_date_convert' instead; Removed for numba compatible datetime handling; reference commit 1b52a1736e45a497ccdf78cd6e2eab8c0b7a444f", DeprecationWarning)
295461
if hr == 24:
296462
return datetime.datetime(yr, mo, dy, 23) + pd.Timedelta(1,'h')
297463
else:
298464
return datetime.datetime(yr, mo, dy, hr)
299465

300466
def splitdate(x):
301467
'''splits WDM int32 DATWRD into year, month, day, hour -> then returns its datetime64'''
468+
warnings.warn("use '_splitdate' instead; naming updated to indicate internal function", DeprecationWarning)
302469
return todatetime(x >> 14, x >> 10 & 0xF, x >> 5 & 0x1F, x & 0x1F) # args: year, month, day, hour
303470

304471
def splitcontrol(x):
305472
''' splits int32 into (qual, compcode, units, tstep, nvalues)'''
473+
warnings.warn("use '_splitcontrol' instead; naming updated to indicate internal function", DeprecationWarning)
306474
return(x & 0x1F, x >> 5 & 0x3, x >> 7 & 0x7, x >> 10 & 0x3F, x >> 16)
307475

308476
def splitposition(x):
309477
''' splits int32 into (record, offset), converting to Pyton zero based indexing'''
478+
warnings.warn("use '_spiltposition' instead; naming updated to indicate internal function", DeprecationWarning)
310479
return((x>>9) - 1, (x&0x1FF) - 1)
311480

312481
def itostr(i):
482+
warnings.warn("use '_inttostr' instead; naming updated to indicate internal function; method also handles integer argments so updated name from 'i' to 'int' for additonal clarity", DeprecationWarning)
313483
return chr(i & 0xFF) + chr(i>>8 & 0xFF) + chr(i>>16 & 0xFF) + chr(i>>24 & 0xFF)
314484

315-
# @jit(nopython=True, cache=True)
316-
def leap_year(y):
317-
if y % 400 == 0:
318-
return True
319-
if y % 100 == 0:
320-
return False
321-
if y % 4 == 0:
322-
return True
323-
else:
324-
return False
325-
326485
def getfloats(iarray, farray, floats, findex, rec, offset, count, finalindex, tcode, tstep):
486+
warnings.warn("discontinue use and replace with new 'process_groups' instead; Function replaced by incompatible group/block processing approach; reference commit c5b2a1cdd6a55eccc0db67d7840ec3eaf904dcec .",DeprecationWarning)
327487
index = rec * 512 + offset + 1
328488
stop = (rec + 1) * 512
329489
cntr = 0
@@ -376,6 +536,7 @@ def getfloats(iarray, farray, floats, findex, rec, offset, count, finalindex, tc
376536
return findex
377537

378538
def adjustNval(ldate, ltstep, tstep, ltcode, tcode, comp, nval):
539+
warnings.warn("supporting function for deprecated 'get_floats' function;", DeprecationWarning)
379540
lnval = nval
380541
if comp != 1:
381542
nval = -1 # only can adjust compressed data
@@ -418,4 +579,3 @@ def adjustNval(ldate, ltstep, tstep, ltcode, tcode, comp, nval):
418579
str(nval) + ',', str(lnval), ')')
419580

420581
return nval
421-

docs/HSPF_v12.2_manual+nav.docx

2 MB
Binary file not shown.

docs/HSPF_v12.2_manual+nav.pdf

5.62 MB
Binary file not shown.
4.32 MB
Binary file not shown.

docs/WDMProgrammersGuide.pdf

4.56 MB
Binary file not shown.

environment_dev.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,11 @@ dependencies:
2020
# - texlive-core # to export notebooks to PDF. https://nbconvert.readthedocs.io/en/latest/install.html#installing-tex
2121
- matplotlib
2222

23+
# Dev tools (optional)
24+
# - python-language-server
25+
- jupyter-lsp-python # Includes both the server extension (jupyter-lsp) and pyls third-party server (python-language-server)
26+
- jupyterlab-lsp # Docs at https://github.com/krassowski/jupyterlab-lsp
27+
2328
# package management
2429
- conda
2530
- conda-build

0 commit comments

Comments
 (0)