LBNL-UCB-STI · nikolayilyin · Jun 21, 2024 · Jun 24, 2024 · Jun 25, 2024 · Jun 26, 2024
diff --git a/pilates/activitysim/preprocessor.py b/pilates/activitysim/preprocessor.py
@@ -112,23 +112,25 @@ def read_skims(settings, mode='a', data_dir=None, file_name='skims.omx'):
     return skims
 
 
-def zone_id_to_taz(zones, asim_zone_id_col='TAZ',
+def zone_id_to_taz(zones,
+                   asim_zone_id_col='TAZ',
                    default_zone_id_col='zone_id'):
+    logger.info("Zones table columns: '{0}' with index '{1}', asim zone ID col is '{2}', default is '{3}'.",
+                ", ".join(zones.columns), zones.index.name, asim_zone_id_col, default_zone_id_col)
+
     if zones.index.name != asim_zone_id_col:
         if asim_zone_id_col in zones.columns:
             logger.info("Setting column {0} to index".format(asim_zone_id_col))
             zones.set_index(asim_zone_id_col, inplace=True)
         elif zones.index.name == default_zone_id_col:
             logger.info("Renaming index from {0} to {1}".format(default_zone_id_col, asim_zone_id_col))
             zones.index.name = asim_zone_id_col
-        elif asim_zone_id_col not in zones.columns:
-            logger.info(str(zones.columns))
+        elif asim_zone_id_col not in zones.columns and default_zone_id_col in zones.columns:
+            logger.info("Setting column {0} to index and renaming it {1}".format(default_zone_id_col, asim_zone_id_col))
             zones.rename(columns={default_zone_id_col: asim_zone_id_col}, inplace=True)
             zones.set_index(asim_zone_id_col, inplace=True)
-            logger.info("Setting column {0} to index and renaming it {1}".format(default_zone_id_col, asim_zone_id_col))
         else:
-            logger.error(
-                "Not sure what column in the zones table is the zone ID!")
+            raise KeyError("Not sure what column in the zones table is the zone ID!")
     else:
         logger.info("Zone index is already named {0}".format(asim_zone_id_col))
     return zones
@@ -1838,11 +1840,13 @@ def create_asim_data_from_h5(
 
     asim_zone_id_col = 'TAZ'
 
+    input_zone_id_col = settings.get("asim_input_zone_id_col", "")
     # TODO: Generalize this or add it to settings.yaml
-    if region == "sfbay":
-        input_zone_id_col = 'taz1454'
-    else:
-        input_zone_id_col = 'zone_id'
+    if not input_zone_id_col:
+        if region == "sfbay":
+            input_zone_id_col = 'taz1454'
+        else:
+            input_zone_id_col = 'zone_id'
 
     # TODO: only call _get_zones_geoms if blocks or colleges or schools
     # don't already have a zone ID (e.g. TAZ). If they all do then we don't

diff --git a/pilates/atlas/postprocessor.py b/pilates/atlas/postprocessor.py
@@ -35,10 +35,16 @@ def atlas_update_h5_vehicle(settings, output_year, warm_start=False):
     # read and format atlas vehicle ownership output
     atlas_output_path = settings['atlas_host_output_folder']  # 'pilates/atlas/atlas_output'  #
     fname = 'householdv_{}.csv'.format(output_year)
-    df = pd.read_csv(os.path.join(atlas_output_path, fname))
+    atlas_output_file_path = os.path.join(atlas_output_path, fname)
+
+    logger.info("Reading atlas output from {0}".format(atlas_output_file_path))
+    df = pd.read_csv(atlas_output_file_path)
     df = df.rename(columns={'nvehicles': 'cars'}).set_index('household_id').sort_index(ascending=True)
     df['hh_cars'] = pd.cut(df['cars'], bins=[-0.5, 0.5, 1.5, np.inf], labels=['none', 'one', 'two or more'])
 
+    # fix for 'Cannot store a category dtype in a HDF5 dataset that uses format="fixed". Use format="table"'
+    df['hh_cars'] = df['hh_cars'].astype(object)
+
     # set which h5 file to update
     h5path = settings['usim_local_data_folder']
     if warm_start:
@@ -59,7 +65,8 @@ def atlas_update_h5_vehicle(settings, output_year, warm_start=False):
             key = 'households'
 
         olddf = h5[key]
-        if olddf.index.istype(float):
+        # https://pandas.pydata.org/docs/dev/reference/api/pandas.Index.dtype.html#pandas.Index.dtype
+        if olddf.index.dtype != int:
             olddf.index = olddf.index.astype(int)
         olddf = olddf.reindex(df.index.astype(int))
 
@@ -120,7 +127,7 @@ def build_beam_vehicles_input(settings, output_year):
                 bestOption = (temp.reset_index()['modelyear'] - modelYear).abs().idxmin()
                 bestYear = temp.reset_index().iloc[bestOption, :]["modelyear"]
                 matched = mapping.loc[(fuelType, bodyType, bestYear, slice(None)), :]
-            except KeyError:
+            except (KeyError, ValueError):
                 try:
                     matched = mapping.loc[(fuelType, slice(None), modelYear), :]
                 except KeyError:
@@ -129,24 +136,33 @@ def build_beam_vehicles_input(settings, output_year):
                         bestOption = (temp.reset_index()['modelyear'] - modelYear).abs().idxmin()
                         bestYear = temp.reset_index().iloc[bestOption, :]["modelyear"]
                         matched = mapping.loc[(fuelType, slice(None), bestYear), :]
-                    except KeyError:
+                    except (KeyError, ValueError):
                         try:
                             temp = mapping.loc[(slice(None), bodyType, slice(None), slice(None)), :]
                             bestOption = (temp.reset_index()['modelyear'] - modelYear).abs().idxmin()
                             bestYear = temp.reset_index().iloc[bestOption, :]["modelyear"]
                             matched = mapping.loc[(slice(None), bodyType, bestYear), :]
-                        except KeyError:
+                        except (KeyError, ValueError):
                             bestOption = (mapping.reset_index()['modelyear'] - modelYear).abs().idxmin()
                             bestYear = mapping.reset_index().iloc[bestOption, :]["modelyear"]
                             matched = mapping.loc[(slice(None), slice(None), bestYear), :]
-        createdVehicles = matched.sample(vehiclesSub.shape[0], replace=True,
-                                         weights=matched['sampleProbabilityWithinCategory'].values)
-        createdVehicleCounts = createdVehicles.index.value_counts()
-        allCounts.loc[createdVehicleCounts.index, 'numberOfVehiclesCreated'] += createdVehicleCounts.values
-        vehiclesSub['vehicleTypeId'] = createdVehicles.index.get_level_values('vehicleTypeId')
-        vehiclesSub['stateOfCharge'] = np.nan
-        allVehicles.append(
-            vehiclesSub[['household_id', 'vehicleTypeId']])
+        try:
+            createdVehicles = matched.sample(vehiclesSub.shape[0], replace=True,
+                                             weights=matched['sampleProbabilityWithinCategory'].values)
+            createdVehicleCounts = createdVehicles.index.value_counts()
+            allCounts.loc[createdVehicleCounts.index, 'numberOfVehiclesCreated'] += createdVehicleCounts.values
+            vehiclesSub['vehicleTypeId'] = createdVehicles.index.get_level_values('vehicleTypeId')
+            vehiclesSub['stateOfCharge'] = np.nan
+            allVehicles.append(
+                vehiclesSub[['household_id', 'vehicleTypeId']])
+        except ValueError as we:
+            # File "..../pilates/atlas/postprocessor.py", line 149, in build_beam_vehicles_input
+            #     createdVehicles = matched.sample(vehiclesSub.shape[0], replace=True,
+            # File "/opt/conda/lib/python3.8/site-packages/pandas/core/generic.py", line 4959, in sample
+            #     raise ValueError("Invalid weights: weights sum to zero")
+            # ValueError: Invalid weights: weights sum to zero
+            logger.error(f"Exception ignored: {we}, 'allVehicles' not extended.")
+
     outputVehicles = pd.concat(allVehicles).reset_index(drop=True)
     outputVehicles.rename(columns={"household_id": "householdId"}, inplace=True)
     outputVehicles.index.rename("vehicleId", inplace=True)

diff --git a/settings.yaml b/settings.yaml
@@ -20,7 +20,7 @@ activity_demand_model: activitysim
 vehicle_ownership_model: atlas
 
 # docker or singularity
-container_manager: singularity
+container_manager: docker
 #singularity settings
 singularity_images:
   urbansim: docker://jdcaicedo/urbansim_demos:v0.1.1
@@ -132,6 +132,7 @@ asim_local_output_folder: pilates/activitysim/output/
 asim_local_configs_folder: pilates/activitysim/configs/
 asim_validation_folder: pilates/activitysim/validation
 asim_formattable_command: "-h {0} -n {1} -c {2}"
+asim_input_zone_id_col: "zone_id"
 region_to_asim_subdir:
   austin: austin
   detroit: detroit