Skip to content

Commit 1526dc9

Browse files
minhkhulnmdefries
andauthored
Doctor_visits patching code (#1977)
* add patching code * add documentation * linter * fix dir name * Update get_latest_claims_name.py * remove patch var, use only issue" * issue -> issue_date for clarity * fix logger * unit test * fix unit test * lint * Update doctor_visits/delphi_doctor_visits/patch.py Co-authored-by: nmdefries <[email protected]> * add download and get_latest_claims_name tests * Update test_get_latest_claims_name.py test file name --------- Co-authored-by: nmdefries <[email protected]>
1 parent 84d0597 commit 1526dc9

File tree

8 files changed

+176
-16
lines changed

8 files changed

+176
-16
lines changed

doctor_visits/README.md

+6
Original file line numberDiff line numberDiff line change
@@ -53,3 +53,9 @@ The output will show the number of unit tests that passed and failed, along
5353
with the percentage of code covered by the tests. None of the tests should
5454
fail and the code lines that are not covered by unit tests should be small and
5555
should not include critical sub-routines.
56+
57+
## Running Patches:
58+
To get data issued during specific date range, output in batch issue format, adjust `params.json` in accordance with `patch.py`, then run
59+
```
60+
env/bin/python -m delphi_doctor_visits.patch
61+
```

doctor_visits/delphi_doctor_visits/download_claims_ftp_files.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,13 @@ def change_date_format(name):
5151
name = '_'.join(split_name)
5252
return name
5353

54-
def download(ftp_credentials, out_path, logger):
54+
def download(ftp_credentials, out_path, logger, issue_date=None):
5555
"""Pull the latest raw files."""
56-
current_time = datetime.datetime.now()
56+
if not issue_date:
57+
current_time = datetime.datetime.now()
58+
else:
59+
current_time = datetime.datetime.strptime(issue_date, "%Y-%m-%d").replace(hour=23, minute=59, second=59)
60+
5761
logger.info("starting download", time=current_time)
5862
seconds_in_day = 24 * 60 * 60
5963

doctor_visits/delphi_doctor_visits/get_latest_claims_name.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,12 @@
55
import datetime
66
from pathlib import Path
77

8-
def get_latest_filename(dir_path, logger):
8+
def get_latest_filename(dir_path, logger, issue_date=None):
99
"""Get the latest filename from the list of downloaded raw files."""
10-
current_date = datetime.datetime.now()
10+
if issue_date:
11+
current_date = datetime.datetime.strptime(issue_date, "%Y-%m-%d").replace(hour=23, minute=59, second=59)
12+
else:
13+
current_date = datetime.datetime.now()
1114
files = list(Path(dir_path).glob("*"))
1215

1316
latest_timestamp = datetime.datetime(1900, 1, 1)
@@ -24,7 +27,7 @@ def get_latest_filename(dir_path, logger):
2427
latest_timestamp = timestamp
2528
latest_filename = file
2629

27-
assert current_date.date() == latest_timestamp.date(), "no drop for today"
30+
assert current_date.date() == latest_timestamp.date(), f"no drop for {current_date}"
2831

2932
logger.info("Latest claims file", filename=latest_filename)
3033

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
"""
2+
This module is used for patching data in the delphi_doctor_visits package.
3+
4+
To use this module, you need to specify the range of issue dates in params.json, like so:
5+
6+
{
7+
"common": {
8+
...
9+
},
10+
"validation": {
11+
...
12+
},
13+
"patch": {
14+
"patch_dir": "/Users/minhkhuele/Desktop/delphi/covidcast-indicators/doctor_visits/AprilPatch",
15+
"start_issue": "2024-04-20",
16+
"end_issue": "2024-04-21"
17+
}
18+
}
19+
20+
It will generate data for that range of issue dates, and store them in batch issue format:
21+
[name-of-patch]/issue_[issue-date]/doctor-visits/actual_data_file.csv
22+
"""
23+
24+
from datetime import datetime, timedelta
25+
from os import makedirs
26+
27+
from delphi_utils import get_structured_logger, read_params
28+
29+
from .run import run_module
30+
31+
32+
def patch():
33+
"""
34+
Run the doctor visits indicator for a range of issue dates.
35+
36+
The range of issue dates is specified in params.json using the following keys:
37+
- "patch": Only used for patching data
38+
- "start_date": str, YYYY-MM-DD format, first issue date
39+
- "end_date": str, YYYY-MM-DD format, last issue date
40+
- "patch_dir": str, directory to write all issues output
41+
"""
42+
params = read_params()
43+
logger = get_structured_logger("delphi_doctor_visits.patch", filename=params["common"]["log_filename"])
44+
45+
start_issue = datetime.strptime(params["patch"]["start_issue"], "%Y-%m-%d")
46+
end_issue = datetime.strptime(params["patch"]["end_issue"], "%Y-%m-%d")
47+
48+
logger.info(f"""Start patching {params["patch"]["patch_dir"]}""")
49+
logger.info(f"""Start issue: {start_issue.strftime("%Y-%m-%d")}""")
50+
logger.info(f"""End issue: {end_issue.strftime("%Y-%m-%d")}""")
51+
52+
makedirs(params["patch"]["patch_dir"], exist_ok=True)
53+
54+
current_issue = start_issue
55+
56+
while current_issue <= end_issue:
57+
logger.info(f"""Running issue {current_issue.strftime("%Y-%m-%d")}""")
58+
59+
params["patch"]["current_issue"] = current_issue.strftime("%Y-%m-%d")
60+
61+
current_issue_yyyymmdd = current_issue.strftime("%Y%m%d")
62+
current_issue_dir = f"""{params["patch"]["patch_dir"]}/issue_{current_issue_yyyymmdd}/doctor-visits"""
63+
makedirs(f"{current_issue_dir}", exist_ok=True)
64+
params["common"]["export_dir"] = f"""{current_issue_dir}"""
65+
66+
run_module(params, logger)
67+
current_issue += timedelta(days=1)
68+
69+
70+
if __name__ == "__main__":
71+
patch()

doctor_visits/delphi_doctor_visits/run.py

+15-7
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from .get_latest_claims_name import get_latest_filename
2121

2222

23-
def run_module(params): # pylint: disable=too-many-statements
23+
def run_module(params, logger=None): # pylint: disable=too-many-statements
2424
"""
2525
Run doctor visits indicator.
2626
@@ -42,18 +42,26 @@ def run_module(params): # pylint: disable=too-many-statements
4242
- "se": bool, whether to write out standard errors
4343
- "obfuscated_prefix": str, prefix for signal name if write_se is True.
4444
- "parallel": bool, whether to update sensor in parallel.
45+
- "patch": Only used for patching data, remove if not patching.
46+
Check out patch.py and README for more details on how to run patches.
47+
- "start_date": str, YYYY-MM-DD format, first issue date
48+
- "end_date": str, YYYY-MM-DD format, last issue date
49+
- "patch_dir": str, directory to write all issues output
4550
"""
4651
start_time = time.time()
47-
logger = get_structured_logger(
48-
__name__, filename=params["common"].get("log_filename"),
49-
log_exceptions=params["common"].get("log_exceptions", True))
52+
issue_date = params.get("patch", {}).get("current_issue", None)
53+
if not logger:
54+
logger = get_structured_logger(
55+
__name__,
56+
filename=params["common"].get("log_filename"),
57+
log_exceptions=params["common"].get("log_exceptions", True),
58+
)
5059

5160
# pull latest data
52-
download(params["indicator"]["ftp_credentials"],
53-
params["indicator"]["input_dir"], logger)
61+
download(params["indicator"]["ftp_credentials"], params["indicator"]["input_dir"], logger, issue_date=issue_date)
5462

5563
# find the latest files (these have timestamps)
56-
claims_file = get_latest_filename(params["indicator"]["input_dir"], logger)
64+
claims_file = get_latest_filename(params["indicator"]["input_dir"], logger, issue_date=issue_date)
5765

5866
# modify data
5967
modify_and_write(claims_file, logger)

doctor_visits/tests/test_download.py

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import unittest
2+
from unittest.mock import patch, MagicMock
3+
from delphi_doctor_visits.download_claims_ftp_files import download
4+
5+
class TestDownload(unittest.TestCase):
6+
@patch('delphi_doctor_visits.download_claims_ftp_files.paramiko.SSHClient')
7+
@patch('delphi_doctor_visits.download_claims_ftp_files.path.exists', return_value=False)
8+
def test_download(self, mock_exists, mock_sshclient):
9+
mock_sshclient_instance = MagicMock()
10+
mock_sshclient.return_value = mock_sshclient_instance
11+
mock_sftp = MagicMock()
12+
mock_sshclient_instance.open_sftp.return_value = mock_sftp
13+
mock_sftp.listdir_attr.return_value = [MagicMock(filename="SYNEDI_AGG_OUTPATIENT_20200207_1455CDT.csv.gz")]
14+
ftp_credentials = {"host": "test_host", "user": "test_user", "pass": "test_pass", "port": "test_port"}
15+
out_path = "./test_data/"
16+
logger = MagicMock()
17+
18+
#case 1: download with issue_date that does not exist on ftp server
19+
download(ftp_credentials, out_path, logger, issue_date="2020-02-08")
20+
mock_sshclient_instance.connect.assert_called_once_with(ftp_credentials["host"], username=ftp_credentials["user"], password=ftp_credentials["pass"], port=ftp_credentials["port"])
21+
mock_sftp.get.assert_not_called()
22+
23+
# case 2: download with issue_date that exists on ftp server
24+
download(ftp_credentials, out_path, logger, issue_date="2020-02-07")
25+
mock_sftp.get.assert_called()
26+
27+
if __name__ == '__main__':
28+
unittest.main()

doctor_visits/tests/test_get_latest_claims_name.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,12 @@
1111

1212
class TestGetLatestFileName:
1313
logger = Mock()
14-
14+
dir_path = "test_data"
15+
1516
def test_get_latest_claims_name(self):
16-
dir_path = "./test_data/"
17-
1817
with pytest.raises(AssertionError):
19-
get_latest_filename(dir_path, self.logger)
18+
get_latest_filename(self.dir_path, self.logger)
19+
20+
def test_get_latest_claims_name_with_issue_date(self):
21+
result = get_latest_filename(self.dir_path, self.logger, issue_date="2020-02-07")
22+
assert str(result) == f"{self.dir_path}/SYNEDI_AGG_OUTPATIENT_07022020_1455CDT.csv.gz"

doctor_visits/tests/test_patch.py

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import unittest
2+
from unittest.mock import patch as mock_patch, call
3+
from delphi_doctor_visits.patch import patch
4+
import os
5+
import shutil
6+
7+
class TestPatchModule(unittest.TestCase):
8+
def test_patch(self):
9+
with mock_patch('delphi_doctor_visits.patch.run_module') as mock_run_module, \
10+
mock_patch('delphi_doctor_visits.patch.get_structured_logger') as mock_get_structured_logger, \
11+
mock_patch('delphi_doctor_visits.patch.read_params') as mock_read_params:
12+
13+
mock_read_params.return_value = {
14+
"common": {
15+
"log_filename": "test.log"
16+
},
17+
"patch": {
18+
"start_issue": "2021-01-01",
19+
"end_issue": "2021-01-02",
20+
"patch_dir": "./patch_dir"
21+
}
22+
}
23+
24+
patch()
25+
26+
self.assertIn('current_issue', mock_read_params.return_value['patch'])
27+
self.assertEqual(mock_read_params.return_value['patch']['current_issue'], '2021-01-02')
28+
29+
self.assertTrue(os.path.isdir('./patch_dir'))
30+
self.assertTrue(os.path.isdir('./patch_dir/issue_20210101/doctor-visits'))
31+
self.assertTrue(os.path.isdir('./patch_dir/issue_20210102/doctor-visits'))
32+
33+
# Clean up the created directories after the test
34+
shutil.rmtree(mock_read_params.return_value["patch"]["patch_dir"])
35+
36+
if __name__ == '__main__':
37+
unittest.main()

0 commit comments

Comments
 (0)