diff --git a/pilates/activitysim/preprocessor.py b/pilates/activitysim/preprocessor.py index a9b40e2a..ef920d52 100644 --- a/pilates/activitysim/preprocessor.py +++ b/pilates/activitysim/preprocessor.py @@ -112,8 +112,12 @@ def read_skims(settings, mode='a', data_dir=None, file_name='skims.omx'): return skims -def zone_id_to_taz(zones, asim_zone_id_col='TAZ', +def zone_id_to_taz(zones, + asim_zone_id_col='TAZ', default_zone_id_col='zone_id'): + logger.info("Zones table columns: '{0}' with index '{1}', asim zone ID col is '{2}', default is '{3}'.", + ", ".join(zones.columns), zones.index.name, asim_zone_id_col, default_zone_id_col) + if zones.index.name != asim_zone_id_col: if asim_zone_id_col in zones.columns: logger.info("Setting column {0} to index".format(asim_zone_id_col)) @@ -121,14 +125,12 @@ def zone_id_to_taz(zones, asim_zone_id_col='TAZ', elif zones.index.name == default_zone_id_col: logger.info("Renaming index from {0} to {1}".format(default_zone_id_col, asim_zone_id_col)) zones.index.name = asim_zone_id_col - elif asim_zone_id_col not in zones.columns: - logger.info(str(zones.columns)) + elif asim_zone_id_col not in zones.columns and default_zone_id_col in zones.columns: + logger.info("Setting column {0} to index and renaming it {1}".format(default_zone_id_col, asim_zone_id_col)) zones.rename(columns={default_zone_id_col: asim_zone_id_col}, inplace=True) zones.set_index(asim_zone_id_col, inplace=True) - logger.info("Setting column {0} to index and renaming it {1}".format(default_zone_id_col, asim_zone_id_col)) else: - logger.error( - "Not sure what column in the zones table is the zone ID!") + raise KeyError("Not sure what column in the zones table is the zone ID!") else: logger.info("Zone index is already named {0}".format(asim_zone_id_col)) return zones @@ -1838,11 +1840,13 @@ def create_asim_data_from_h5( asim_zone_id_col = 'TAZ' + input_zone_id_col = settings.get("asim_input_zone_id_col", "") # TODO: Generalize this or add it to settings.yaml - if region == "sfbay": - input_zone_id_col = 'taz1454' - else: - input_zone_id_col = 'zone_id' + if not input_zone_id_col: + if region == "sfbay": + input_zone_id_col = 'taz1454' + else: + input_zone_id_col = 'zone_id' # TODO: only call _get_zones_geoms if blocks or colleges or schools # don't already have a zone ID (e.g. TAZ). If they all do then we don't diff --git a/pilates/atlas/postprocessor.py b/pilates/atlas/postprocessor.py index d4346ade..7c62ff71 100644 --- a/pilates/atlas/postprocessor.py +++ b/pilates/atlas/postprocessor.py @@ -35,10 +35,16 @@ def atlas_update_h5_vehicle(settings, output_year, warm_start=False): # read and format atlas vehicle ownership output atlas_output_path = settings['atlas_host_output_folder'] # 'pilates/atlas/atlas_output' # fname = 'householdv_{}.csv'.format(output_year) - df = pd.read_csv(os.path.join(atlas_output_path, fname)) + atlas_output_file_path = os.path.join(atlas_output_path, fname) + + logger.info("Reading atlas output from {0}".format(atlas_output_file_path)) + df = pd.read_csv(atlas_output_file_path) df = df.rename(columns={'nvehicles': 'cars'}).set_index('household_id').sort_index(ascending=True) df['hh_cars'] = pd.cut(df['cars'], bins=[-0.5, 0.5, 1.5, np.inf], labels=['none', 'one', 'two or more']) + # fix for 'Cannot store a category dtype in a HDF5 dataset that uses format="fixed". Use format="table"' + df['hh_cars'] = df['hh_cars'].astype(object) + # set which h5 file to update h5path = settings['usim_local_data_folder'] if warm_start: @@ -59,7 +65,8 @@ def atlas_update_h5_vehicle(settings, output_year, warm_start=False): key = 'households' olddf = h5[key] - if olddf.index.istype(float): + # https://pandas.pydata.org/docs/dev/reference/api/pandas.Index.dtype.html#pandas.Index.dtype + if olddf.index.dtype != int: olddf.index = olddf.index.astype(int) olddf = olddf.reindex(df.index.astype(int)) @@ -120,7 +127,7 @@ def build_beam_vehicles_input(settings, output_year): bestOption = (temp.reset_index()['modelyear'] - modelYear).abs().idxmin() bestYear = temp.reset_index().iloc[bestOption, :]["modelyear"] matched = mapping.loc[(fuelType, bodyType, bestYear, slice(None)), :] - except KeyError: + except (KeyError, ValueError): try: matched = mapping.loc[(fuelType, slice(None), modelYear), :] except KeyError: @@ -129,24 +136,33 @@ def build_beam_vehicles_input(settings, output_year): bestOption = (temp.reset_index()['modelyear'] - modelYear).abs().idxmin() bestYear = temp.reset_index().iloc[bestOption, :]["modelyear"] matched = mapping.loc[(fuelType, slice(None), bestYear), :] - except KeyError: + except (KeyError, ValueError): try: temp = mapping.loc[(slice(None), bodyType, slice(None), slice(None)), :] bestOption = (temp.reset_index()['modelyear'] - modelYear).abs().idxmin() bestYear = temp.reset_index().iloc[bestOption, :]["modelyear"] matched = mapping.loc[(slice(None), bodyType, bestYear), :] - except KeyError: + except (KeyError, ValueError): bestOption = (mapping.reset_index()['modelyear'] - modelYear).abs().idxmin() bestYear = mapping.reset_index().iloc[bestOption, :]["modelyear"] matched = mapping.loc[(slice(None), slice(None), bestYear), :] - createdVehicles = matched.sample(vehiclesSub.shape[0], replace=True, - weights=matched['sampleProbabilityWithinCategory'].values) - createdVehicleCounts = createdVehicles.index.value_counts() - allCounts.loc[createdVehicleCounts.index, 'numberOfVehiclesCreated'] += createdVehicleCounts.values - vehiclesSub['vehicleTypeId'] = createdVehicles.index.get_level_values('vehicleTypeId') - vehiclesSub['stateOfCharge'] = np.nan - allVehicles.append( - vehiclesSub[['household_id', 'vehicleTypeId']]) + try: + createdVehicles = matched.sample(vehiclesSub.shape[0], replace=True, + weights=matched['sampleProbabilityWithinCategory'].values) + createdVehicleCounts = createdVehicles.index.value_counts() + allCounts.loc[createdVehicleCounts.index, 'numberOfVehiclesCreated'] += createdVehicleCounts.values + vehiclesSub['vehicleTypeId'] = createdVehicles.index.get_level_values('vehicleTypeId') + vehiclesSub['stateOfCharge'] = np.nan + allVehicles.append( + vehiclesSub[['household_id', 'vehicleTypeId']]) + except ValueError as we: + # File "..../pilates/atlas/postprocessor.py", line 149, in build_beam_vehicles_input + # createdVehicles = matched.sample(vehiclesSub.shape[0], replace=True, + # File "/opt/conda/lib/python3.8/site-packages/pandas/core/generic.py", line 4959, in sample + # raise ValueError("Invalid weights: weights sum to zero") + # ValueError: Invalid weights: weights sum to zero + logger.error(f"Exception ignored: {we}, 'allVehicles' not extended.") + outputVehicles = pd.concat(allVehicles).reset_index(drop=True) outputVehicles.rename(columns={"household_id": "householdId"}, inplace=True) outputVehicles.index.rename("vehicleId", inplace=True) diff --git a/settings.yaml b/settings.yaml index b5f1acb5..6589b865 100644 --- a/settings.yaml +++ b/settings.yaml @@ -20,7 +20,7 @@ activity_demand_model: activitysim vehicle_ownership_model: atlas # docker or singularity -container_manager: singularity +container_manager: docker #singularity settings singularity_images: urbansim: docker://jdcaicedo/urbansim_demos:v0.1.1 @@ -132,6 +132,7 @@ asim_local_output_folder: pilates/activitysim/output/ asim_local_configs_folder: pilates/activitysim/configs/ asim_validation_folder: pilates/activitysim/validation asim_formattable_command: "-h {0} -n {1} -c {2}" +asim_input_zone_id_col: "zone_id" region_to_asim_subdir: austin: austin detroit: detroit