77
88import numpy as np
99import pandas as pd
10- from numba import jit
10+ from numba import jit , njit
1111import datetime
1212
13+ import warnings
14+
1315# look up attributes NAME, data type (Integer; Real; String) and data length by attribute number
1416attrinfo = {1 :('TSTYPE' ,'S' ,4 ), 2 :('STAID' ,'S' ,16 ), 11 :('DAREA' ,'R' ,1 ),
1517 17 :('TCODE' ,'I' ,1 ), 27 :('TSBYR' ,'I' ,1 ), 28 :('TSBMO' ,'I' ,1 ),
2426freq = {7 :'100YS' , 6 :'YS' , 5 :'MS' , 4 :'D' , 3 :'H' , 2 :'min' , 1 :'S' } # pandas date_range() frequency by TCODE, TGROUP
2527
2628
27- def readWDM (wdmfile , hdffile , jupyterlab = True ):
29+ def readWDM (wdmfile , hdffile , compress_output = False ):
2830 iarray = np .fromfile (wdmfile , dtype = np .int32 )
2931 farray = np .fromfile (wdmfile , dtype = np .float32 )
3032
33+ date_epoch = np .datetime64 (0 ,'Y' )
34+ dt_year = np .timedelta64 (1 , 'Y' )
35+ dt_month = np .timedelta64 (1 , 'M' )
36+ dt_day = np .timedelta64 (1 , 'D' )
37+ dt_hour = np .timedelta64 (1 , 'h' )
38+ dt_minute = np .timedelta64 (1 , 'm' )
39+ dt_second = np .timedelta64 (1 , 's' )
40+
3141 if iarray [0 ] != - 998 :
32- print ('Not a WDM file, magic number is not -990. Stopping!' )
33- return
42+ raise ValueError ('Provided file does not match WDM format. First int32 should be -998.' )
3443 nrecords = iarray [28 ] # first record is File Definition Record
3544 ntimeseries = iarray [31 ]
3645
@@ -39,7 +48,7 @@ def readWDM(wdmfile, hdffile, jupyterlab=True):
3948 if not (iarray [index ]== 0 and iarray [index + 1 ]== 0 and iarray [index + 2 ]== 0 and iarray [index + 3 ]== 0 ) and iarray [index + 5 ]== 1 :
4049 dsnlist .append (index )
4150 if len (dsnlist ) != ntimeseries :
42- print ( 'PROGRAM ERROR, wrong number of DSN records found' )
51+ raise RuntimeError ( f'Wrong number of Time Series Records found expecting: { ntimeseries } found: { len ( dsnlist ) } ' )
4352
4453 with pd .HDFStore (hdffile ) as store :
4554 summary = []
@@ -66,7 +75,7 @@ def readWDM(wdmfile, hdffile, jupyterlab=True):
6675 elif atype == 'R' :
6776 dattr [name ] = farray [ptr ]
6877 else :
69- dattr [name ] = '' .join ([itostr (iarray [k ]) for k in range (ptr , ptr + length // 4 )]).strip ()
78+ dattr [name ] = '' .join ([_inttostr (iarray [k ]) for k in range (ptr , ptr + length // 4 )]).strip ()
7079 if att not in dattr :
7180 found_in_all = False
7281 if found_in_all :
@@ -98,50 +107,49 @@ def readWDM(wdmfile, hdffile, jupyterlab=True):
98107 elif atype == 'R' :
99108 dattr [name ] = farray [ptr ]
100109 else :
101- dattr [name ] = '' .join ([itostr (iarray [k ]) for k in range (ptr , ptr + length // 4 )]).strip ()
110+ dattr [name ] = '' .join ([_inttostr (iarray [k ]) for k in range (ptr , ptr + length // 4 )]).strip ()
102111
103112 # Get timeseries timebase data
104- records = []
113+ records = []
114+ offsets = []
105115 for i in range (pdat + 1 , pdatv - 1 ):
106116 a = iarray [index + i ]
107117 if a != 0 :
108- records .append (splitposition (a ))
118+ record , offset = _splitposition (a )
119+ records .append (record )
120+ offsets .append (offset )
109121 if len (records ) == 0 :
110- continue # WDM preallocation, but nothing saved here yet
111-
112- srec , soffset = records [0 ]
113- start = splitdate (iarray [srec * 512 + soffset ])
114-
115- sprec , spoffset = splitposition (frepos )
116- finalindex = sprec * 512 + spoffset
122+ continue
117123
118124 # calculate number of data points in each group, tindex is final index for storage
119125 tgroup = dattr ['TGROUP' ]
120126 tstep = dattr ['TSSTEP' ]
121127 tcode = dattr ['TCODE' ]
122- cindex = pd .date_range (start = start , periods = len (records )+ 1 , freq = freq [tgroup ])
123- tindex = pd .date_range (start = start , end = cindex [- 1 ], freq = str (tstep ) + freq [tcode ])
124- counts = np .diff (np .searchsorted (tindex , cindex ))
125-
126- ## Get timeseries data
127- floats = np .zeros (sum (counts ), dtype = np .float32 )
128- findex = 0
129- for (rec ,offset ),count in zip (records , counts ):
130- findex = getfloats (iarray , farray , floats , findex , rec , offset , count , finalindex , tcode , tstep )
131-
132- ## Write to HDF5 file
133- series = pd .Series (floats [:findex ], index = tindex [:findex ])
128+
129+ records = np .asarray (records )
130+ offsets = np .asarray (offsets )
131+
132+ dates , values , stop_datetime = _process_groups (iarray , farray , records , offsets , tgroup )
133+ stop_datetime = datetime .datetime (* _bits_to_date (stop_datetime ))
134+ dates = np .array (dates )
135+ dates_converted = _date_convert (dates , date_epoch , dt_year , dt_month , dt_day , dt_hour , dt_minute , dt_second )
136+ series = pd .Series (values , index = dates_converted )
137+ try :
138+ series .index .freq = str (tstep ) + freq [tcode ]
139+ except ValueError :
140+ series .index .freq = None
141+
134142 dsname = f'TIMESERIES/TS{ dsn :03d} '
135- # series.to_hdf(store, dsname, complib='blosc', complevel=9)
136- if jupyterlab :
137- series .to_hdf (store , dsname , complib = 'blosc' , complevel = 9 ) # This is the official version
143+ if compress_output :
144+ series .to_hdf (store , dsname , complib = 'blosc' , complevel = 9 )
138145 else :
139- series .to_hdf (store , dsname , format = 't' , data_columns = True ) # show the columns in HDFView
146+ series .to_hdf (store , dsname , format = 't' , data_columns = True )
140147
141- data = [str (tindex [0 ]), str (tindex [- 1 ]), str (tstep ) + freq [tcode ],
142- len (series ), dattr ['TSTYPE' ], dattr ['TFILL' ]]
148+ data = [
149+ str (series .index [0 ]), str (stop_datetime ), str (tstep ) + freq [tcode ],
150+ len (series ), dattr ['TSTYPE' ], dattr ['TFILL' ]
151+ ]
143152 columns = ['Start' , 'Stop' , 'Freq' ,'Length' , 'TSTYPE' , 'TFILL' ]
144- # search = ['STAID', 'STNAM', 'SCENARIO', 'CONSTITUENT','LOCATION']
145153 for x in columns_to_add :
146154 if x in dattr :
147155 data .append (dattr [x ])
@@ -150,11 +158,165 @@ def readWDM(wdmfile, hdffile, jupyterlab=True):
150158 summary .append (data )
151159 summaryindx .append (dsname [11 :])
152160
153-
154161 dfsummary = pd .DataFrame (summary , index = summaryindx , columns = columns )
155162 store .put ('TIMESERIES/SUMMARY' ,dfsummary , format = 't' , data_columns = True )
163+
156164 return dfsummary
157165
166+ @njit
167+ def _splitdate (x ):
168+ year = np .int64 (x >> 14 )
169+ month = np .int64 (x >> 10 & 0xF )
170+ day = np .int64 (x >> 5 & 0x1F )
171+ hour = np .int64 (x & 0x1F )
172+ return _correct_date (year , month , day , hour , 0 ,0 )
173+
174+ @njit
175+ def _splitcontrol (x ):
176+ nval = x >> 16
177+ ltstep = x >> 10 & 0x3f
178+ ltcode = x >> 7 & 0x7
179+ comp = x >> 5 & 0x3
180+ qual = x & 0x1f
181+ return nval , ltstep , ltcode , comp , qual
182+
183+ @njit
184+ def _splitposition (x ):
185+ return ((x >> 9 ) - 1 , (x & 0x1FF ) - 1 ) #args: record, offset
186+
187+ @njit
188+ def _inttostr (i ):
189+ return chr (i & 0xFF ) + chr (i >> 8 & 0xFF ) + chr (i >> 16 & 0xFF ) + chr (i >> 24 & 0xFF )
190+
191+ @njit
192+ def _bits_to_date (x ):
193+ year = x >> 26
194+ month = x >> 22 & 0xf
195+ day = x >> 17 & 0x1f
196+ hour = x >> 12 & 0x1f
197+ minute = x >> 6 & 0x3f
198+ second = x & 0x3f
199+ return year , month , day , hour , minute , second
200+
201+ @njit
202+ def _date_to_bits (year , month , day , hour , minute , second ):
203+ x = year << 26 | month << 22 | day << 17 | hour << 12 | minute << 6 | second
204+ return x
205+
206+ @njit
207+ def _increment_date (date , timecode , timestep ):
208+ year , month , day , hour , minute , second = _bits_to_date (date )
209+
210+ if timecode == 7 : year += 100 * timestep
211+ elif timecode == 6 : year += timestep
212+ elif timecode == 5 : month += timestep
213+ elif timecode == 4 : day += timestep
214+ elif timecode == 3 : hour += timestep
215+ elif timecode == 2 : minute += timestep
216+ elif timecode == 1 : second += timestep
217+
218+ return _correct_date (year , month , day , hour , minute , second )
219+
220+ @njit
221+ def _correct_date (year , month , day , hour , minute , second ):
222+ while second >= 60 :
223+ second -= 60
224+ minute += 1
225+ while minute >= 60 :
226+ minute -= 60
227+ hour += 1
228+ while hour >= 24 :
229+ hour -= 24
230+ day += 1
231+ while day > _days_in_month (year , month ):
232+ day -= _days_in_month (year , month )
233+ month += 1
234+ while month > 12 :
235+ month -= 12
236+ year += 1
237+ return _date_to_bits (year , month , day , hour , minute , second )
238+
239+ @njit
240+ def _days_in_month (year , month ):
241+ if month > 12 : month %= 12
242+
243+ if month in (1 ,3 ,5 ,7 ,8 ,10 ,12 ):
244+ return 31
245+ elif month in (4 ,6 ,9 ,11 ):
246+ return 30
247+ elif month == 2 :
248+ if _is_leapyear (year ): return 29
249+ else : return 28
250+
251+ @njit
252+ def _is_leapyear (year ):
253+ if year % 400 == 0 :
254+ return True
255+ if year % 100 == 0 :
256+ return False
257+ if year % 4 == 0 :
258+ return True
259+ else :
260+ return False
261+
262+ @njit
263+ def _date_convert (dates , date_epoch , dt_year , dt_month , dt_day , dt_hour , dt_minute , dt_second ):
264+ converted_dates = []
265+ for x in dates :
266+ year , month , day , hour , minute , second = _bits_to_date (x )
267+ date = date_epoch
268+ date += (year - 1970 ) * dt_year
269+ date += (month - 1 ) * dt_month
270+ date += (day - 1 ) * dt_day
271+ date += hour * dt_hour
272+ date += minute * dt_minute
273+ date += second * dt_second
274+ converted_dates .append (date )
275+ return converted_dates
276+
277+ @njit
278+ def _process_groups (iarray , farray , records , offsets , tgroup ):
279+ date_array = [0 ] #need initialize with a type for numba
280+ value_array = [0.0 ]
281+
282+ for i in range (0 ,len (records )):
283+ record = records [i ]
284+ offset = offsets [i ]
285+ index = record * 512 + offset
286+ pscfwr = iarray [record * 512 + 3 ] #should be 0 for last record in timeseries
287+ current_date = _splitdate (iarray [index ])
288+ group_enddate = _increment_date (current_date , tgroup , 1 )
289+ offset += 1
290+ index += 1
291+
292+ while current_date < group_enddate :
293+ nval , ltstep , ltcode , comp , qual = _splitcontrol (iarray [index ])
294+ #compressed - only has single value which applies to full range
295+ if comp == 1 :
296+ for i in range (0 , nval , 1 ):
297+ date_array .append (current_date )
298+ current_date = _increment_date (current_date , ltcode , ltstep )
299+ value_array .append (farray [index + 1 ])
300+ index += 2
301+ offset += 2
302+ else :
303+ for i in range (0 , nval , 1 ):
304+ date_array .append (current_date )
305+ current_date = _increment_date (current_date , ltcode , ltstep )
306+ value_array .append (farray [index + 1 + i ])
307+ index += 1 + nval
308+ offset += 1 + nval
309+
310+ if offset >= 511 :
311+ offset = 4
312+ index = (pscfwr - 1 ) * 512 + offset
313+ record = pscfwr
314+ pscfwr = iarray [(record - 1 ) * 512 + 3 ] #should be 0 for last record in timeseries
315+
316+ date_array = date_array [1 :]
317+ value_array = value_array [1 :]
318+
319+ return date_array , value_array , group_enddate
158320
159321'''
160322Get single time series data from a WDM file
@@ -289,41 +451,39 @@ def get_wdm_data_set(wdmfile, attributes):
289451
290452 return None
291453
454+ ########################
455+ ### legacy functions ###
456+ ########################
292457
293458def todatetime (yr = 1900 , mo = 1 , dy = 1 , hr = 0 ):
294459 '''takes yr,mo,dy,hr information then returns its datetime64'''
460+ warnings .warn ("use '_date_convert' instead; Removed for numba compatible datetime handling; reference commit 1b52a1736e45a497ccdf78cd6e2eab8c0b7a444f" , DeprecationWarning )
295461 if hr == 24 :
296462 return datetime .datetime (yr , mo , dy , 23 ) + pd .Timedelta (1 ,'h' )
297463 else :
298464 return datetime .datetime (yr , mo , dy , hr )
299465
300466def splitdate (x ):
301467 '''splits WDM int32 DATWRD into year, month, day, hour -> then returns its datetime64'''
468+ warnings .warn ("use '_splitdate' instead; naming updated to indicate internal function" , DeprecationWarning )
302469 return todatetime (x >> 14 , x >> 10 & 0xF , x >> 5 & 0x1F , x & 0x1F ) # args: year, month, day, hour
303470
304471def splitcontrol (x ):
305472 ''' splits int32 into (qual, compcode, units, tstep, nvalues)'''
473+ warnings .warn ("use '_splitcontrol' instead; naming updated to indicate internal function" , DeprecationWarning )
306474 return (x & 0x1F , x >> 5 & 0x3 , x >> 7 & 0x7 , x >> 10 & 0x3F , x >> 16 )
307475
308476def splitposition (x ):
309477 ''' splits int32 into (record, offset), converting to Pyton zero based indexing'''
478+ warnings .warn ("use '_spiltposition' instead; naming updated to indicate internal function" , DeprecationWarning )
310479 return ((x >> 9 ) - 1 , (x & 0x1FF ) - 1 )
311480
312481def itostr (i ):
482+ warnings .warn ("use '_inttostr' instead; naming updated to indicate internal function; method also handles integer argments so updated name from 'i' to 'int' for additonal clarity" , DeprecationWarning )
313483 return chr (i & 0xFF ) + chr (i >> 8 & 0xFF ) + chr (i >> 16 & 0xFF ) + chr (i >> 24 & 0xFF )
314484
315- # @jit(nopython=True, cache=True)
316- def leap_year (y ):
317- if y % 400 == 0 :
318- return True
319- if y % 100 == 0 :
320- return False
321- if y % 4 == 0 :
322- return True
323- else :
324- return False
325-
326485def getfloats (iarray , farray , floats , findex , rec , offset , count , finalindex , tcode , tstep ):
486+ warnings .warn ("discontinue use and replace with new 'process_groups' instead; Function replaced by incompatible group/block processing approach; reference commit c5b2a1cdd6a55eccc0db67d7840ec3eaf904dcec ." ,DeprecationWarning )
327487 index = rec * 512 + offset + 1
328488 stop = (rec + 1 ) * 512
329489 cntr = 0
@@ -376,6 +536,7 @@ def getfloats(iarray, farray, floats, findex, rec, offset, count, finalindex, tc
376536 return findex
377537
378538def adjustNval (ldate , ltstep , tstep , ltcode , tcode , comp , nval ):
539+ warnings .warn ("supporting function for deprecated 'get_floats' function;" , DeprecationWarning )
379540 lnval = nval
380541 if comp != 1 :
381542 nval = - 1 # only can adjust compressed data
@@ -418,4 +579,3 @@ def adjustNval(ldate, ltstep, tstep, ltcode, tcode, comp, nval):
418579 str (nval ) + ',' , str (lnval ), ')' )
419580
420581 return nval
421-
0 commit comments