50
50
import csv
51
51
import datetime
52
52
import glob
53
- import subprocess
54
- import random
55
53
from io import StringIO
54
+ import tempfile
56
55
57
56
# third party
58
57
import mysql .connector
64
63
from delphi .utils .epiweek import delta_epiweeks , check_epiweek
65
64
from delphi .utils .epidate import EpiDate
66
65
66
+
67
67
def ensure_tables_exist ():
68
- (u ,p ) = secrets .db .epi
69
- cnx = mysql .connector .connect (user = u ,password = p ,database = ' epidata' )
68
+ (u , p ) = secrets .db .epi
69
+ cnx = mysql .connector .connect (user = u , password = p , database = " epidata" )
70
70
try :
71
71
cursor = cnx .cursor ()
72
- cursor .execute ('''
72
+ cursor .execute (
73
+ """
73
74
CREATE TABLE IF NOT EXISTS `paho_dengue` (
74
75
`id` INT(11) NOT NULL PRIMARY KEY AUTO_INCREMENT,
75
76
`release_date` DATE NOT NULL,
@@ -85,35 +86,44 @@ def ensure_tables_exist():
85
86
`num_deaths` INT(11) NOT NULL,
86
87
UNIQUE KEY (`issue`, `epiweek`, `region`)
87
88
);
88
- ''' );
89
+ """
90
+ )
89
91
cnx .commit ()
90
92
finally :
91
93
cnx .close ()
92
94
95
+
93
96
def safe_float (f ):
94
97
try :
95
- return float (f .replace (',' , '' ))
98
+ return float (f .replace ("," , "" ))
96
99
except :
97
100
return 0
98
101
102
+
99
103
def safe_int (i ):
100
104
try :
101
- return int (i .replace (',' , '' ))
105
+ return int (i .replace ("," , "" ))
102
106
except :
103
107
return 0
104
108
105
- def get_rows (cnx , table = 'paho_dengue' ):
106
- # Count and return the number of rows in the `fluview` table.
107
- select = cnx .cursor ()
108
- select .execute ('SELECT count(1) num FROM %s' % table )
109
- for (num ,) in select :
110
- pass
111
- select .close ()
112
- return num
109
+
110
+ def get_rows (cnx , table = "paho_dengue" ):
111
+ # Count and return the number of rows in the `fluview` table.
112
+ select = cnx .cursor ()
113
+ select .execute ("SELECT count(1) num FROM %s" % table )
114
+ for (num ,) in select :
115
+ pass
116
+ select .close ()
117
+ return num
118
+
113
119
114
120
def get_paho_row (row ):
115
- if row [0 ] == "\ufeff Incidence Rate (c)" and row != "\ufeff Incidence Rate (c),(SD/D) x100 (e),CFR (f),ID,Country or Subregion,Deaths,EW,Confirmed,Epidemiological Week (a),Pop (no usar),Serotype,Severe Dengue (d),Total of Dengue Cases (b),Year,Population x 1000" .split ("," ):
116
- raise Exception ('PAHO header row has changed' )
121
+ if row [
122
+ 0
123
+ ] == "\ufeff Incidence Rate (c)" and row != "\ufeff Incidence Rate (c),(SD/D) x100 (e),CFR (f),ID,Country or Subregion,Deaths,EW,Confirmed,Epidemiological Week (a),Pop (no usar),Serotype,Severe Dengue (d),Total of Dengue Cases (b),Year,Population x 1000" .split (
124
+ ","
125
+ ):
126
+ raise Exception ("PAHO header row has changed" )
117
127
if len (row ) == 1 or row [0 ] == "Incidence Rate (c)" :
118
128
# this is a header row
119
129
return None
@@ -128,23 +138,26 @@ def get_paho_row(row):
128
138
except :
129
139
return None
130
140
try :
131
- check_epiweek (safe_int (row [13 ])* 100 + safe_int (row [8 ]), safe_int (row [13 ])* 100 + safe_int (row [6 ]))
141
+ check_epiweek (
142
+ safe_int (row [13 ]) * 100 + safe_int (row [8 ]), safe_int (row [13 ]) * 100 + safe_int (row [6 ])
143
+ )
132
144
except :
133
145
return None
134
146
return {
135
- ' issue' : safe_int (row [13 ])* 100 + safe_int (row [6 ]),
136
- ' epiweek' : safe_int (row [13 ])* 100 + safe_int (row [8 ]),
137
- ' region' : country ,
138
- ' total_pop' : safe_int (row [14 ]),
139
- ' serotype' : row [10 ],
140
- ' num_dengue' : safe_int (row [12 ]),
141
- ' incidence_rate' : safe_float (row [0 ]),
142
- ' num_severe' : safe_int (row [11 ]),
143
- ' num_deaths' : safe_int (row [5 ]),
144
- ' severe_ratio' : safe_float (row [1 ]),
145
- ' cfr' : safe_float (row [2 ])
147
+ " issue" : safe_int (row [13 ]) * 100 + safe_int (row [6 ]),
148
+ " epiweek" : safe_int (row [13 ]) * 100 + safe_int (row [8 ]),
149
+ " region" : country ,
150
+ " total_pop" : safe_int (row [14 ]),
151
+ " serotype" : row [10 ],
152
+ " num_dengue" : safe_int (row [12 ]),
153
+ " incidence_rate" : safe_float (row [0 ]),
154
+ " num_severe" : safe_int (row [11 ]),
155
+ " num_deaths" : safe_int (row [5 ]),
156
+ " severe_ratio" : safe_float (row [1 ]),
157
+ " cfr" : safe_float (row [2 ]),
146
158
}
147
159
160
+
148
161
def update_from_file (issue , date , filename , test_mode = False ):
149
162
# Read PAHO data from CSV and insert into (or update) the database.
150
163
@@ -156,23 +169,23 @@ def update_from_file(issue, date, filename, test_mode=False):
156
169
157
170
# database connection
158
171
u , p = secrets .db .epi
159
- cnx = mysql .connector .connect (user = u , password = p , database = ' epidata' )
160
- rows1 = get_rows (cnx , ' paho_dengue' )
161
- print (' rows before: %d' % (rows1 ))
172
+ cnx = mysql .connector .connect (user = u , password = p , database = " epidata" )
173
+ rows1 = get_rows (cnx , " paho_dengue" )
174
+ print (" rows before: %d" % (rows1 ))
162
175
insert = cnx .cursor ()
163
176
164
177
# load the data, ignoring empty rows
165
- print (' loading data from %s as issued on %d' % (filename , issue ))
166
- with open (filename ,'r' , encoding = ' utf-8' ) as f :
178
+ print (" loading data from %s as issued on %d" % (filename , issue ))
179
+ with open (filename , encoding = " utf-8" ) as f :
167
180
c = f .read ()
168
181
rows = []
169
- for l in csv .reader (StringIO (c ), delimiter = ',' ):
182
+ for l in csv .reader (StringIO (c ), delimiter = "," ):
170
183
rows .append (get_paho_row (l ))
171
- print (' loaded %d rows' % len (rows ))
184
+ print (" loaded %d rows" % len (rows ))
172
185
entries = [obj for obj in rows if obj ]
173
- print (' found %d entries' % len (entries ))
186
+ print (" found %d entries" % len (entries ))
174
187
175
- sql = '''
188
+ sql = """
176
189
INSERT INTO
177
190
`paho_dengue` (`release_date`, `issue`, `epiweek`, `region`, `lag`,
178
191
`total_pop`, `serotype`, `num_dengue`, `incidence_rate`,
@@ -187,55 +200,56 @@ def update_from_file(issue, date, filename, test_mode=False):
187
200
`incidence_rate` = %s,
188
201
`num_severe` = %s,
189
202
`num_deaths` = %s
190
- '''
203
+ """
191
204
192
205
for row in entries :
193
- if row [' issue' ] > issue : # Issued in a week that hasn't happened yet
206
+ if row [" issue" ] > issue : # Issued in a week that hasn't happened yet
194
207
continue
195
- lag = delta_epiweeks (row ['epiweek' ], issue )
196
- data_args = [row ['total_pop' ], row ['serotype' ], row ['num_dengue' ],
197
- row ['incidence_rate' ], row ['num_severe' ], row ['num_deaths' ]]
208
+ lag = delta_epiweeks (row ["epiweek" ], issue )
209
+ data_args = [
210
+ row ["total_pop" ],
211
+ row ["serotype" ],
212
+ row ["num_dengue" ],
213
+ row ["incidence_rate" ],
214
+ row ["num_severe" ],
215
+ row ["num_deaths" ],
216
+ ]
198
217
199
- insert_args = [date ,issue ,row [' epiweek' ], row [' region' ], lag ] + data_args
218
+ insert_args = [date , issue , row [" epiweek" ], row [" region" ], lag ] + data_args
200
219
update_args = [date ] + data_args
201
220
insert .execute (sql % tuple (insert_args + update_args ))
202
221
203
222
# cleanup
204
223
insert .close ()
205
224
if test_mode :
206
- print (' test mode, not committing' )
225
+ print (" test mode, not committing" )
207
226
rows2 = rows1
208
227
else :
209
228
cnx .commit ()
210
229
rows2 = get_rows (cnx )
211
- print (' rows after: %d (added %d)' % (rows2 ,rows2 - rows1 ))
230
+ print (" rows after: %d (added %d)" % (rows2 , rows2 - rows1 ))
212
231
cnx .close ()
213
232
233
+
214
234
def main ():
215
235
# args and usage
216
236
parser = argparse .ArgumentParser ()
217
237
parser .add_argument (
218
- '--test' ,
219
- action = 'store_true' ,
220
- help = 'do dry run only, do not update the database'
238
+ "--test" , action = "store_true" , help = "do dry run only, do not update the database"
221
239
)
222
240
parser .add_argument (
223
- '--file' ,
224
- type = str ,
225
- help = 'load an existing zip file (otherwise fetch current data)'
241
+ "--file" , type = str , help = "load an existing zip file (otherwise fetch current data)"
226
242
)
227
243
parser .add_argument (
228
- '--issue' ,
229
- type = int ,
230
- help = 'issue of the file (e.g. 201740); used iff --file is given'
244
+ "--issue" , type = int , help = "issue of the file (e.g. 201740); used iff --file is given"
231
245
)
232
246
args = parser .parse_args ()
233
247
234
248
if (args .file is None ) != (args .issue is None ):
235
- raise Exception (' --file and --issue must both be present or absent' )
249
+ raise Exception (" --file and --issue must both be present or absent" )
236
250
237
- date = datetime .datetime .now ().strftime (' %Y-%m-%d' )
238
- print (' assuming release date is today, %s' % date )
251
+ date = datetime .datetime .now ().strftime (" %Y-%m-%d" )
252
+ print (" assuming release date is today, %s" % date )
239
253
240
254
if args .file :
241
255
update_from_file (args .issue , date , args .file , test_mode = args .test )
@@ -274,7 +288,8 @@ def main():
274
288
if not db_error :
275
289
break # Exit loop with success
276
290
if flag >= max_tries :
277
- print ('WARNING: Database `paho_dengue` did not update successfully' )
291
+ print ("WARNING: Database `paho_dengue` did not update successfully" )
292
+
278
293
279
- if __name__ == ' __main__' :
294
+ if __name__ == " __main__" :
280
295
main ()
0 commit comments