Skip to content

Commit 0609700

Browse files
authored
Merge pull request #1189 from cmu-delphi/ds/format100
style(black): format acquisition with `black`, line-length=100
2 parents 7fd6a90 + f93f020 commit 0609700

33 files changed

+4051
-3502
lines changed

.editorconfig

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# EditorConfig helps developers define and maintain consistent
2+
# coding styles between different editors and IDEs
3+
# editorconfig.org
4+
5+
root = true
6+
7+
[*]
8+
# We recommend you to keep these unchanged
9+
end_of_line = lf
10+
charset = utf-8
11+
trim_trailing_whitespace = true
12+
insert_final_newline = true
13+
14+
15+
[*.py]
16+
# Change these settings to your own preference
17+
indent_style = space
18+
indent_size = 4
19+
20+
21+
[*.md]
22+
trim_trailing_whitespace = false

.git-blame-ignore-revs

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# style(black): format cdc acquisition
2+
980b0b7e80c7923b79e14fee620645e680785703
3+
# style(black): format covidcast_nowcast acquisition
4+
9e6ff16f599e8feec34a08dd1bddbc5eae347b55
5+
# style(black): format ecdc acquisition
6+
d1141d904da4e62992b97c92d5caebd8fadffd42
7+
# style(black): format flusurv acquisition
8+
08af0f6b7bff85bbc2b193b63b5abf6a16ba03e4
9+
# style(black): format fluview acquisition
10+
0133ef2042c4df8867e91595eb1f64873edb4632
11+
# style(black): format ght acquisition
12+
b8900a0bc846888885310911efd6e26459effa99
13+
# style(black): format kcdc acquisition
14+
a849384c884934b3b7c3c67b68aa6240277d6b6d
15+
# style(black): format nidss acquisition
16+
d04af3c02fda7708a16bec0952b1aa7475acaec7
17+
# style(black): format paho acquisition
18+
7f60fbba572c1b6e5153a9ef216895bdc2f7f5b3
19+
# style(black): format quidel acquisition
20+
b9ceb400d9248c8271e8342275664ac5524e335d
21+
# style(black): format twitter acquisition
22+
07ed83e5768f717ab0f9a62a9209e4e2cffa058d
23+
# style(black): format wiki acquisition
24+
923852eafa86b8f8b182d499489249ba8f815843

pyproject.toml

+23-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,26 @@
1-
21
[tool.black]
3-
line-length = 200
2+
line-length = 100
43
target-version = ['py38']
54
include = 'server,tests/server'
5+
6+
[tool.pylint]
7+
[tool.pylint.'MESSAGES CONTROL']
8+
max-line-length = 100
9+
disable = [
10+
'logging-format-interpolation',
11+
# Allow pytest functions to be part of a class
12+
'no-self-use',
13+
'too-many-locals',
14+
'too-many-arguments',
15+
# Allow pytest classes to have one test
16+
'too-few-public-methods',
17+
]
18+
19+
[tool.pylint.'BASIC']
20+
# Allow arbitrarily short-named variables.
21+
variable-rgx = ['[a-z_][a-z0-9_]*']
22+
argument-rgx = [ '[a-z_][a-z0-9_]*' ]
23+
attr-rgx = ['[a-z_][a-z0-9_]*']
24+
25+
[tool.pylint.'DESIGN']
26+
ignored-argument-names = ['(_.*|run_as_module)']

src/acquisition/cdcp/cdc_dropbox_receiver.py

+106-106
Original file line numberDiff line numberDiff line change
@@ -29,128 +29,128 @@
2929

3030

3131
# location constants
32-
DROPBOX_BASE_DIR = '/cdc_page_stats'
33-
DELPHI_BASE_DIR = '/common/cdc_stage'
32+
DROPBOX_BASE_DIR = "/cdc_page_stats"
33+
DELPHI_BASE_DIR = "/common/cdc_stage"
3434

3535

3636
def get_timestamp_string():
37-
"""
38-
Return the current local date and time as a string.
37+
"""
38+
Return the current local date and time as a string.
3939
40-
The format is "%Y%m%d_%H%M%S".
41-
"""
42-
return datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
40+
The format is "%Y%m%d_%H%M%S".
41+
"""
42+
return datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
4343

4444

4545
def trigger_further_processing():
46-
"""Add CDCP processing scripts to the Automation run queue."""
46+
"""Add CDCP processing scripts to the Automation run queue."""
4747

48-
# connect
49-
u, p = secrets.db.auto
50-
cnx = mysql.connector.connect(user=u, password=p, database='automation')
51-
cur = cnx.cursor()
48+
# connect
49+
u, p = secrets.db.auto
50+
cnx = mysql.connector.connect(user=u, password=p, database="automation")
51+
cur = cnx.cursor()
5252

53-
# add step "Process CDCP Data" to queue
54-
cur.execute('CALL automation.RunStep(46)')
53+
# add step "Process CDCP Data" to queue
54+
cur.execute("CALL automation.RunStep(46)")
5555

56-
# disconnect
57-
cur.close()
58-
cnx.commit()
59-
cnx.close()
56+
# disconnect
57+
cur.close()
58+
cnx.commit()
59+
cnx.close()
6060

6161

6262
def fetch_data():
63-
"""
64-
Check for new files on dropbox, download them, zip them, cleanup dropbox, and
65-
trigger further processing of new data.
66-
"""
67-
68-
# initialize dropbox api
69-
dbx = dropbox.Dropbox(secrets.cdcp.dropbox_token)
70-
71-
# look for new CDC data files
72-
print('checking dropbox:%s' % DROPBOX_BASE_DIR)
73-
save_list = []
74-
for entry in dbx.files_list_folder(DROPBOX_BASE_DIR).entries:
75-
name = entry.name
76-
if name.endswith('.csv') or name.endswith('.zip'):
77-
print(' download "%s"' % name)
78-
save_list.append(name)
79-
else:
80-
print(' skip "%s"' % name)
81-
82-
# determine if there's anything to be done
83-
if len(save_list) == 0:
84-
print('did not find any new data files')
85-
return
86-
87-
# download new files, saving them inside of a new zip file
88-
timestamp = get_timestamp_string()
89-
zip_path = '%s/dropbox_%s.zip' % (DELPHI_BASE_DIR, timestamp)
90-
print('downloading into delphi:%s' % zip_path)
91-
with ZipFile(zip_path, 'w', ZIP_DEFLATED) as zf:
63+
"""
64+
Check for new files on dropbox, download them, zip them, cleanup dropbox, and
65+
trigger further processing of new data.
66+
"""
67+
68+
# initialize dropbox api
69+
dbx = dropbox.Dropbox(secrets.cdcp.dropbox_token)
70+
71+
# look for new CDC data files
72+
print(f"checking dropbox: {DROPBOX_BASE_DIR}")
73+
save_list = []
74+
for entry in dbx.files_list_folder(DROPBOX_BASE_DIR).entries:
75+
name = entry.name
76+
if name.endswith(".csv") or name.endswith(".zip"):
77+
print(f" download: {name}")
78+
save_list.append(name)
79+
else:
80+
print(f" skip: {name}")
81+
82+
# determine if there's anything to be done
83+
if len(save_list) == 0:
84+
print("did not find any new data files")
85+
return
86+
87+
# download new files, saving them inside of a new zip file
88+
timestamp = get_timestamp_string()
89+
zip_path = f"{DELPHI_BASE_DIR}/dropbox_{timestamp}.zip"
90+
print(f"downloading into delphi:{zip_path}")
91+
with ZipFile(zip_path, "w", ZIP_DEFLATED) as zf:
92+
for name in save_list:
93+
# location of the file on dropbox
94+
dropbox_path = f"{DROPBOX_BASE_DIR}/{name}"
95+
print(f" {dropbox_path}")
96+
97+
# start the download
98+
meta, resp = dbx.files_download(dropbox_path)
99+
100+
# check status and length
101+
if resp.status_code != 200:
102+
raise Exception(["resp.status_code", resp.status_code])
103+
dropbox_len = meta.size
104+
print(f" need {int(dropbox_len)} bytes...")
105+
content_len = int(resp.headers.get("Content-Length", -1))
106+
if dropbox_len != content_len:
107+
info = ["dropbox_len", dropbox_len, "content_len", content_len]
108+
raise Exception(info)
109+
110+
# finish the download, holding the data in this variable
111+
filedata = resp.content
112+
113+
# check the length again
114+
payload_len = len(filedata)
115+
print(" downloaded")
116+
if dropbox_len != payload_len:
117+
info = ["dropbox_len", dropbox_len, "payload_len", payload_len]
118+
raise Exception(info)
119+
120+
# add the downloaded file to the zip file
121+
zf.writestr(name, filedata)
122+
print(" added")
123+
124+
# At this point, all the data is stored and awaiting further processing on
125+
# the delphi server.
126+
print(f"saved all new data in {zip_path}")
127+
128+
# on dropbox, archive downloaded files so they won't be downloaded again
129+
archive_dir = f"archived_reports/processed_{timestamp}"
130+
print("archiving files...")
92131
for name in save_list:
93-
# location of the file on dropbox
94-
dropbox_path = '%s/%s' % (DROPBOX_BASE_DIR, name)
95-
print(' %s' % dropbox_path)
96-
97-
# start the download
98-
meta, resp = dbx.files_download(dropbox_path)
99-
100-
# check status and length
101-
if resp.status_code != 200:
102-
raise Exception(['resp.status_code', resp.status_code])
103-
dropbox_len = meta.size
104-
print(' need %d bytes...' % dropbox_len)
105-
content_len = int(resp.headers.get('Content-Length', -1))
106-
if dropbox_len != content_len:
107-
info = ['dropbox_len', dropbox_len, 'content_len', content_len]
108-
raise Exception(info)
109-
110-
# finish the download, holding the data in this variable
111-
filedata = resp.content
112-
113-
# check the length again
114-
payload_len = len(filedata)
115-
print(' downloaded')
116-
if dropbox_len != payload_len:
117-
info = ['dropbox_len', dropbox_len, 'payload_len', payload_len]
118-
raise Exception(info)
119-
120-
# add the downloaded file to the zip file
121-
zf.writestr(name, filedata)
122-
print(' added')
123-
124-
# At this point, all the data is stored and awaiting further processing on
125-
# the delphi server.
126-
print('saved all new data in %s' % zip_path)
127-
128-
# on dropbox, archive downloaded files so they won't be downloaded again
129-
archive_dir = 'archived_reports/processed_%s' % timestamp
130-
print('archiving files...')
131-
for name in save_list:
132-
# source and destination
133-
dropbox_src = '%s/%s' % (DROPBOX_BASE_DIR, name)
134-
dropbox_dst = '%s/%s/%s' % (DROPBOX_BASE_DIR, archive_dir, name)
135-
print(' "%s" -> "%s"' % (dropbox_src, dropbox_dst))
136-
137-
# move the file
138-
meta = dbx.files_move(dropbox_src, dropbox_dst)
139-
140-
# sanity check
141-
if archive_dir not in meta.path_lower:
142-
raise Exception('failed to move "%s"' % name)
143-
144-
# finally, trigger the usual processing flow
145-
print('triggering processing flow')
146-
trigger_further_processing()
147-
print('done')
132+
# source and destination
133+
dropbox_src = f"{DROPBOX_BASE_DIR}/{name}"
134+
dropbox_dst = f"{DROPBOX_BASE_DIR}/{archive_dir}/{name}"
135+
print(f" {dropbox_src} -> {dropbox_dst}")
136+
137+
# move the file
138+
meta = dbx.files_move(dropbox_src, dropbox_dst)
139+
140+
# sanity check
141+
if archive_dir not in meta.path_lower:
142+
raise Exception(f"failed to move {name}")
143+
144+
# finally, trigger the usual processing flow
145+
print("triggering processing flow")
146+
trigger_further_processing()
147+
print("done")
148148

149149

150150
def main():
151-
# fetch new data
152-
fetch_data()
151+
# fetch new data
152+
fetch_data()
153153

154154

155-
if __name__ == '__main__':
156-
main()
155+
if __name__ == "__main__":
156+
main()

0 commit comments

Comments
 (0)