-
Notifications
You must be signed in to change notification settings - Fork 4
/
process_series_files.py
90 lines (80 loc) · 3.68 KB
/
process_series_files.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# /usr/bin/env python
# Copyright 2013, 2014 Justis Grant Peters and Sagar Jauhari
# This file is part of BCIpy.
#
# BCIpy is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# BCIpy is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with BCIpy. If not, see <http://www.gnu.org/licenses/>.
import sys, os
from os.path import isfile, join
import re
import dateutil.tz
import pandas as pd
import numpy as np
from datetime import datetime
# Create dict of machine data
def create_dict_machine_data(raw_dir):
onlyfiles_raw = [ f for f in os.listdir(raw_dir) if isfile(join(raw_dir,f)) ]
pat_raw = re.compile("[0-9]*\.[a-z0-9]*\.rawwave\.csv")
temp_dat_raw = [f.split('.')[0:2] for f in onlyfiles_raw if pat_raw.match(f)]
mach_dict = {i[1]: i[0] for i in temp_dat_raw}
return mach_dict
def create_raw_incremental(in_file, out_file, time_t, tzinfo=dateutil.tz.tzlocal()):
"Create raw file with incremental miliseconds"
raw = pd.read_csv(in_file, skipinitialspace=True, index_col=False) # avoid index to keep it from sorting
day = time_t[0:4]+"-"+time_t[4:6]+"-"+time_t[6:8]
#print day #debug
# Incoming data has 512Hz samples with timestamps at resolution of one
# second. For each second, convert the first timestamp to epoch time and
# blank out the others so that we can do linear interpolation.
# TODO estimate microseconds on first and last second, to avoid timestretch
# TODO analyze clock skew, since some seconds have more or less samples
# TODO consider a pandas.DatetimeIndex with just a start time and frequency
prev_time = None
for i,row in raw.iterrows():
timestamp = row['%Time']
if timestamp==prev_time:
raw.set_value(i, '%Time', np.NaN)
else:
timestring = day + ' ' + timestamp + '.0'
dt = datetime.strptime(timestring, '%Y-%m-%d %H:%M:%S.%f')\
.replace(tzinfo=tzinfo) # set specified tz before conversion
# time since UTC 1970-01-01 00:00:00, in seconds
dt = float(dt.strftime('%s.%f'))
raw.set_value(i, '%Time', dt)
prev_time = timestamp
timestring = day + ' ' + prev_time + '.0'
dt = datetime.strptime(timestring, '%Y-%m-%d %H:%M:%S.%f')\
.replace(tzinfo=tzinfo) # set specified tz before conversion
# time since UTC 1970-01-01 00:00:00, in seconds
dt = float(dt.strftime('%s.%f'))
raw.set_value(i, '%Time', dt+1)
# reindex with interpolated timestamps
raw.index = pd.DatetimeIndex(
pd.to_datetime(raw['%Time']\
.convert_objects(convert_numeric=True)\
.interpolate(), unit='s')
).tz_localize('UTC').tz_convert(tzinfo) # convert back to original tz
raw.to_csv(out_file, index=True, cols=['Value'])
return raw
def process_all_in_dir(indir, outdir):
if not os.path.exists(outdir):
os.makedirs(outdir)
mach_dict = create_dict_machine_data(indir)
for i in mach_dict:
file_in = join(indir, mach_dict[i]+"."+i+".rawwave.csv")
print "processing file %s" % file_in
file_out =join(outdir, mach_dict[i]+"."+i+".rawwave_microsec.csv")
create_raw_incremental(file_in,file_out, mach_dict[i])
if __name__ == '__main__':
indir,outdir=sys.argv[1:3]
process_all_in_dir(indir,outdir)