Skip to content

Commit 50e40ba

Browse files
committed
differently hacky but more consistent way to cast variable types
1 parent 32e1a42 commit 50e40ba

File tree

1 file changed

+13
-3
lines changed

1 file changed

+13
-3
lines changed

src/acquisition/covid_hosp/common/database.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -172,15 +172,25 @@ def insert_dataset(self, publication_date, dataframe):
172172
tuple(values) +
173173
tuple(i[0] for i in self.additional_fields))
174174

175+
# deal with non/seldomly updated columns used like a fk table (if this database needs it)
175176
if hasattr(self, 'AGGREGATE_KEY_COLS'):
176177
ak_cols = self.AGGREGATE_KEY_COLS
177178

178179
# restrict data to just the key columns and remove duplicate rows
179180
ak_data = dataframe[ak_cols].drop_duplicates()
180-
# cast types (bools and NULLables)
181+
# cast types
182+
dataframe_typemap = {
183+
name: dtype
184+
for name, _, dtype in dataframe_columns_and_types
185+
}
181186
for col in ak_cols:
182-
if col.startswith('is_'): # TODO: this is hacky af
183-
ak_data[col] = (ak_data[col] == 'true')
187+
def cast_but_sidestep_nans(i):
188+
# not the prettiest, but it works to avoid the NaN values that show up in many columns
189+
if isinstance(i, float) and math.isnan(i):
190+
return None
191+
return dataframe_typemap[col](i)
192+
ak_data[col] = ak_data[col].apply(cast_but_sidestep_nans)
193+
# fix NULLs
184194
ak_data = ak_data.to_numpy(na_value=None).tolist()
185195

186196
# create string of tick-quoted and comma-seperated column list

0 commit comments

Comments
 (0)