@@ -28,28 +28,29 @@ def sig_digit_round(value, n_digits):
28
28
sign_mask = value < 0
29
29
value [sign_mask ] *= - 1
30
30
exponent = np .ceil (np .log10 (value ))
31
- result = 10 ** exponent * np .round (value * 10 ** (- exponent ), n_digits )
31
+ result = 10 ** exponent * np .round (value * 10 ** (- exponent ), n_digits )
32
32
result [sign_mask ] *= - 1
33
33
result [zero_mask ] = in_value [zero_mask ]
34
34
return result
35
35
36
36
37
- def convert_df_type (df , logger ):
38
- """convert types and warn if there are unexpected columns"""
37
+ def convert_df_type (df , type_dict , logger ):
38
+ """Convert types and warn if there are unexpected columns. """
39
39
try :
40
- df = df .astype (TYPE_DICT )
40
+ df = df .astype (type_dict )
41
41
except KeyError as exc :
42
+ newline = "\n "
42
43
raise KeyError (
43
44
f"""
44
45
Expected column(s) missed, The dataset schema may
45
46
have changed. Please investigate and amend the code.
46
47
47
- expected={ NEWLINE .join (sorted (type_dict .keys ()))}
48
+ expected={ newline .join (sorted (type_dict .keys ()))}
48
49
49
- received={ NEWLINE .join (sorted (df .columns ))}
50
+ received={ newline .join (sorted (df .columns ))}
50
51
"""
51
52
) from exc
52
- if new_columns := set (df .columns ) - set (TYPE_DICT .keys ()):
53
+ if new_columns := set (df .columns ) - set (type_dict .keys ()):
53
54
logger .info ("New columns found in NWSS dataset." , new_columns = new_columns )
54
55
return df
55
56
@@ -125,15 +126,15 @@ def pull_nwss_data(token: str, logger):
125
126
"""
126
127
# Pull data from Socrata API
127
128
client = Socrata ("data.cdc.gov" , token )
128
- results_concentration = client .get ("g653-rqe2" , limit = 10 ** 10 )
129
- results_metric = client .get ("2ew6-ywp6" , limit = 10 ** 10 )
129
+ results_concentration = client .get ("g653-rqe2" , limit = 10 ** 10 )
130
+ results_metric = client .get ("2ew6-ywp6" , limit = 10 ** 10 )
130
131
df_metric = pd .DataFrame .from_records (results_metric )
131
132
df_concentration = pd .DataFrame .from_records (results_concentration )
132
133
df_concentration = df_concentration .rename (columns = {"date" : "timestamp" })
133
134
134
135
# Schema checks.
135
- df_concentration = convert_df_type (df_concentration , logger )
136
- df_metric = convert_df_type (df_metric , logger )
136
+ df_concentration = convert_df_type (df_concentration , TYPE_DICT , logger )
137
+ df_metric = convert_df_type (df_metric , TYPE_DICT_METRIC , logger )
137
138
138
139
# Drop sites without a normalization scheme.
139
140
df = df_concentration [~ df_concentration ["normalization" ].isna ()]
0 commit comments