-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrate_table_builder.py
More file actions
68 lines (58 loc) · 2.37 KB
/
rate_table_builder.py
File metadata and controls
68 lines (58 loc) · 2.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import os
from dotenv import load_dotenv
import camelot
import sqlite3
import requests
def get_rate_sheet(remote_file: str, local_file: str):
response = requests.get(remote_file)
with open(local_file, "wb") as f:
f.write(response.content)
def process_rate_sheet(local_file: str):
# Process the downloaded `PDF and insert the data into the database
# TODO: Right now we handle one format of the document. Error-handling should be added in case the format changes
db_file = os.getenv("DB_FILE", "db.sqlite")
tables = camelot.read_pdf(local_file, flavor='stream', pages='2-end')
country = ""
rate = ""
start_record_found = False
found_patterns = []
dbconn = sqlite3.connect(db_file)
for table in tables:
frame = table.df
for index, row in frame.iterrows():
if len(row) == 3 and '$' in row[2]:
rate = float(row[2].replace('$', ''))
if row[0] and row[0] != 'Country':
country = row[0]
if country == 'Afghanistan':
start_record_found = True
if start_record_found is True:
patterns = row[1].split(", ")
for pattern in patterns:
if pattern != 'Destination code' and pattern != '':
pattern = pattern.replace(',', '')
pattern = pattern.replace(' ', '')
if pattern in found_patterns:
print(f"Duplicate pattern {pattern} found. Unsure how to proceed.")
exit()
else:
found_patterns.append(str(pattern))
print(country, pattern, rate)
sql = f"INSERT INTO rates (pattern, country, rate) VALUES (?, ?, ?)"
dbconn.execute(sql, (pattern, country, rate))
dbconn.commit()
dbconn.close()
def main():
load_dotenv()
remote_file = os.getenv(
"RATE_SHEET_URL",
"https://www.webex.com/content/dam/wbx/us/documents/pdf/us-international-rates.pdf"
)
local_file = os.getenv("LOCAL_RATE_SHEET", "us-international-rates.pdf")
print("Downloading rate sheet")
get_rate_sheet(remote_file, local_file)
print("Processing rate sheet")
process_rate_sheet(local_file)
print("Done")
if __name__ == '__main__':
main()